Add Bonds, Slaves, and Flows

This commit is contained in:
Joseph Henry 2020-05-12 01:35:48 -07:00
parent de9cfbe9b0
commit a50e8e9878
31 changed files with 4898 additions and 1966 deletions

View File

@ -415,55 +415,128 @@ enum ZT_ResultCode
*/
#define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000))
/**
* The multipath algorithm in use by this node.
* Multipath bonding policy
*/
enum ZT_MultipathMode
enum ZT_MultipathBondingPolicy
{
/**
* No fault tolerance or balancing.
* Normal operation. No fault tolerance, no load balancing
*/
ZT_MULTIPATH_NONE = 0,
ZT_BONDING_POLICY_NONE = 0,
/**
* Sends traffic out on all paths.
* Sends traffic out on only one path at a time. Configurable immediate
* fail-over.
*/
ZT_MULTIPATH_BROADCAST = 1,
ZT_BONDING_POLICY_ACTIVE_BACKUP = 1,
/**
* Sends traffic out on only one path at a time. Immediate fail-over.
* Sends traffic out on all paths
*/
ZT_MULTIPATH_ACTIVE_BACKUP= 2,
ZT_BONDING_POLICY_BROADCAST = 2,
/**
* Sends traffic out on all interfaces according to a uniform random distribution.
* Stripes packets across all paths
*/
ZT_MULTIPATH_BALANCE_RANDOM = 3,
ZT_BONDING_POLICY_BALANCE_RR = 3,
/**
* Stripes packets across all paths.
* Packets destined for specific peers will always be sent over the same
* path.
*/
ZT_MULTIPATH_BALANCE_RR_OPAQUE = 4,
ZT_BONDING_POLICY_BALANCE_XOR = 4,
/**
* Balances flows across all paths.
* Balances flows among all paths according to path performance
*/
ZT_MULTIPATH_BALANCE_RR_FLOW = 5,
ZT_BONDING_POLICY_BALANCE_AWARE = 5
};
/**
* Multipath active re-selection policy (slaveSelectMethod)
*/
enum ZT_MultipathSlaveSelectMethod
{
/**
* Primary slave regains status as active slave whenever it comes back up
* (default when slaves are explicitly specified)
*/
ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0,
/**
* Hashes flows across all paths.
* Primary slave regains status as active slave when it comes back up and
* (if) it is better than the currently-active slave.
*/
ZT_MULTIPATH_BALANCE_XOR_FLOW = 6,
ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1,
/**
* Balances traffic across all paths according to observed performance.
* Primary slave regains status as active slave only if the currently-active
* slave fails.
*/
ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE = 7,
ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2,
/**
* Balances flows across all paths.
* The primary slave can change if a superior path is detected.
* (default if user provides no fail-over guidance)
*/
ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW = 8,
ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3
};
/**
* Mode of multipath slave interface
*/
enum ZT_MultipathSlaveMode
{
ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0,
ZT_MULTIPATH_SLAVE_MODE_SPARE = 1
};
/**
* Strategy for path monitoring
*/
enum ZT_MultipathMonitorStrategy
{
/**
* Use bonding policy's default strategy
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT = 0,
/**
* Does not actively send probes to judge aliveness, will rely
* on conventional traffic and summary statistics.
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE = 1,
/**
* Sends probes at a constant rate to judge aliveness.
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE = 2,
/**
* Sends probes at varying rates which correlate to native
* traffic loads to judge aliveness.
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3
};
/**
* Indices for the path quality weight vector
*/
enum ZT_MultipathQualityWeightIndex
{
ZT_QOS_LAT_IDX,
ZT_QOS_LTM_IDX,
ZT_QOS_PDV_IDX,
ZT_QOS_PLR_IDX,
ZT_QOS_PER_IDX,
ZT_QOS_THR_IDX,
ZT_QOS_THM_IDX,
ZT_QOS_THV_IDX,
ZT_QOS_AGE_IDX,
ZT_QOS_SCP_IDX,
ZT_QOS_WEIGHT_SIZE
};
/**
@ -1272,44 +1345,49 @@ typedef struct
uint64_t trustedPathId;
/**
* One-way latency
* Mean latency
*/
float latency;
float latencyMean;
/**
* How much latency varies over time
* Maximum observed latency
*/
float packetDelayVariance;
float latencyMax;
/**
* How much observed throughput varies over time
* Variance of latency
*/
float throughputDisturbCoeff;
float latencyVariance;
/**
* Packet Error Ratio (PER)
*/
float packetErrorRatio;
/**
* Packet Loss Ratio (PLR)
* Packet loss ratio
*/
float packetLossRatio;
/**
* Stability of the path
* Packet error ratio
*/
float stability;
float packetErrorRatio;
/**
* Current throughput (moving average)
* Mean throughput
*/
uint64_t throughput;
uint64_t throughputMean;
/**
* Maximum observed throughput for this path
* Maximum observed throughput
*/
uint64_t maxThroughput;
float throughputMax;
/**
* Throughput variance
*/
float throughputVariance;
/**
* Address scope
*/
uint8_t scope;
/**
* Percentage of traffic allocated to this path
@ -1319,7 +1397,9 @@ typedef struct
/**
* Name of physical interface (for monitoring)
*/
char *ifname;
char ifname[32];
uint64_t localSocket;
/**
* Is path expired?
@ -1373,9 +1453,11 @@ typedef struct
unsigned int pathCount;
/**
* Whether this peer was ever reachable via an aggregate link
* Whether multiple paths to this peer are bonded
*/
bool hadAggregateLink;
bool isBonded;
int bondingPolicy;
/**
* Known network paths to peer

1730
node/Bond.cpp Normal file

File diff suppressed because it is too large Load Diff

689
node/Bond.hpp Normal file
View File

@ -0,0 +1,689 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_BOND_HPP
#define ZT_BOND_HPP
#include <map>
#include "Path.hpp"
#include "Peer.hpp"
#include "../osdep/Slave.hpp"
#include "Flow.hpp"
namespace ZeroTier {
class RuntimeEnvironment;
class Slave;
class Bond
{
friend class SharedPtr<Bond>;
friend class Peer;
friend class BondController;
struct PathQualityComparator
{
bool operator ()(const SharedPtr<Path> & a, const SharedPtr<Path> & b)
{
if(a->_failoverScore == b->_failoverScore) {
return a < b;
}
return a->_failoverScore > b->_failoverScore;
}
};
public:
// TODO: Remove
bool _header;
int64_t _lastLogTS;
int64_t _lastPrintTS;
void dumpInfo(const int64_t now);
bool relevant();
SharedPtr<Slave> getSlave(const SharedPtr<Path>& path);
/**
* Constructor. For use only in first initialization in Node
*
* @param renv Runtime environment
*/
Bond(const RuntimeEnvironment *renv);
/**
* Constructor. Creates a bond based off of ZT defaults
*
* @param renv Runtime environment
* @param policy Bonding policy
* @param peer
*/
Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr<Peer>& peer);
/**
* Constructor. For use when user intends to manually specify parameters
*
* @param basePolicy
* @param policyAlias
* @param peer
*/
Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr<Peer>& peer);
/**
* Constructor. Creates a bond based off of a user-defined bond template
*
* @param renv Runtime environment
* @param original
* @param peer
*/
Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr<Peer>& peer);
/**
*
* @return
*/
std::string policyAlias() { return _policyAlias; }
/**
* Inform the bond about the path that its peer just learned about
*
* @param path Newly-learned Path which should now be handled by the Bond
* @param now Current time
*/
void nominatePath(const SharedPtr<Path>& path, int64_t now);
/**
* Propagate and memoize often-used bonding preferences for each path
*/
void applyUserPrefs();
/**
* Check path states and perform bond rebuilds if needed.
*
* @param now Current time
* @param rebuild Whether or not the bond should be reconstructed.
*/
void curateBond(const int64_t now, bool rebuild);
/**
* Periodically perform statistical summaries of quality metrics for all paths.
*
* @param now Current time
*/
void estimatePathQuality(int64_t now);
/**
* Record an invalid incoming packet. This packet failed
* MAC/compression/cipher checks and will now contribute to a
* Packet Error Ratio (PER).
*
* @param path Path over which packet was received
*/
void recordIncomingInvalidPacket(const SharedPtr<Path>& path);
/**
* Record statistics on outbound an packet.
*
* @param path Path over which packet is being sent
* @param packetId Packet ID
* @param payloadLength Packet data length
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void recordOutgoingPacket(const SharedPtr<Path> &path, uint64_t packetId,
uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now);
/**
* Process the contents of an inbound VERB_QOS_MEASUREMENT to gather path quality observations.
*
* @param now Current time
* @param count Number of records
* @param rx_id table of packet IDs
* @param rx_ts table of holding times
*/
void receivedQoS(const SharedPtr<Path>& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts);
/**
* Process the contents of an inbound VERB_ACK to gather path quality observations.
*
* @param path Path over which packet was received
* @param now Current time
* @param ackedBytes Number of bytes ACKed by this VERB_ACK
*/
void receivedAck(const SharedPtr<Path>& path, int64_t now, int32_t ackedBytes);
/**
* Generate the contents of a VERB_QOS_MEASUREMENT packet.
*
* @param now Current time
* @param qosBuffer destination buffer
* @return Size of payload
*/
int32_t generateQoSPacket(const SharedPtr<Path>& path, int64_t now, char *qosBuffer);
/**
* Record statistics for an inbound packet.
*
* @param path Path over which packet was received
* @param packetId Packet ID
* @param payloadLength Packet data length
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void recordIncomingPacket(const SharedPtr<Path>& path, uint64_t packetId, uint16_t payloadLength,
Packet::Verb verb, int32_t flowId, int64_t now);
/**
* Determines the most appropriate path for packet and flow egress. This decision is made by
* the underlying bonding policy as well as QoS-related statistical observations of path quality.
*
* @param now Current time
* @param flowId Flow ID
* @return Pointer to suggested Path
*/
SharedPtr<Path> getAppropriatePath(int64_t now, int32_t flowId);
/**
* Creates a new flow record
*
* @param path Path over which flow shall be handled
* @param flowId Flow ID
* @param entropy A byte of entropy to be used by the bonding algorithm
* @param now Current time
* @return Pointer to newly-created Flow
*/
SharedPtr<Flow> createFlow(const SharedPtr<Path> &path, int32_t flowId, unsigned char entropy, int64_t now);
/**
* Removes flow records that are past a certain age limit.
*
* @param age Age threshold to be forgotten
* @param oldest Whether only the oldest shall be forgotten
* @param now Current time
*/
void forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now);
/**
* Assigns a new flow to a bonded path
*
* @param flow Flow to be assigned
* @param now Current time
*/
bool assignFlowToBondedPath(SharedPtr<Flow> &flow, int64_t now);
/**
* Determine whether a path change should occur given the remote peer's reported utility and our
* local peer's known utility. This has the effect of assigning inbound and outbound traffic to
* the same path.
*
* @param now Current time
* @param path Path over which the negotiation request was received
* @param remoteUtility How much utility the remote peer claims to gain by using the declared path
*/
void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr<Path> &path, int16_t remoteUtility);
/**
* Determine state of path synchronization and whether a negotiation request
* shall be sent to the peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void pathNegotiationCheck(void *tPtr, const int64_t now);
/**
* Sends a VERB_ACK to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
* @param localSocket Local source socket
* @param atAddress
* @param now Current time
*/
void sendACK(void *tPtr,const SharedPtr<Path> &path,int64_t localSocket,
const InetAddress &atAddress,int64_t now);
/**
* Sends a VERB_QOS_MEASUREMENT to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
* @param localSocket Local source socket
* @param atAddress
* @param now Current time
*/
void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr<Path> &path,int64_t localSocket,
const InetAddress &atAddress,int64_t now);
/**
* Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
*/
void sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr<Path> &path);
/**
*
* @param now Current time
*/
void processBalanceTasks(int64_t now);
/**
* Perform periodic tasks unique to active-backup
*
* @param now Current time
*/
void processActiveBackupTasks(int64_t now);
/**
* Switches the active slave in an active-backup scenario to the next best during
* a failover event.
*
* @param now Current time
*/
void dequeueNextActiveBackupPath(uint64_t now);
/**
* Set bond parameters to reasonable defaults, these may later be overwritten by
* user-specified parameters.
*
* @param policy Bonding policy
*/
void setReasonableDefaults(int policy);
/**
* Check and assign user-specified quality weights to this bond.
*
* @param weights Set of user-specified weights
* @param len Length of weight vector
*/
void setUserQualityWeights(float weights[], int len);
/**
* @param latencyInMilliseconds Maximum acceptable latency.
*/
void setMaxAcceptableLatency(int16_t latencyInMilliseconds) {
_maxAcceptableLatency = latencyInMilliseconds;
}
/**
* @param latencyInMilliseconds Maximum acceptable (mean) latency.
*/
void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) {
_maxAcceptableMeanLatency = latencyInMilliseconds;
}
/**
* @param latencyVarianceInMilliseconds Maximum acceptable packet delay variance (jitter).
*/
void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) {
_maxAcceptablePacketDelayVariance = latencyVarianceInMilliseconds;
}
/**
* @param lossRatio Maximum acceptable packet loss ratio (PLR).
*/
void setMaxAcceptablePacketLossRatio(float lossRatio) {
_maxAcceptablePacketLossRatio = lossRatio;
}
/**
* @param errorRatio Maximum acceptable packet error ratio (PER).
*/
void setMaxAcceptablePacketErrorRatio(float errorRatio) {
_maxAcceptablePacketErrorRatio = errorRatio;
}
/**
* @param errorRatio Maximum acceptable packet error ratio (PER).
*/
void setMinAcceptableAllocation(float minAlloc) {
_minAcceptableAllocation = minAlloc * 255;
}
/**
* @return Whether the user has defined slaves for use on this bond
*/
inline bool userHasSpecifiedSlaves() { return _userHasSpecifiedSlaves; }
/**
* @return Whether the user has defined a set of failover slave(s) for this bond
*/
inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; };
/**
* @return Whether the user has specified a primary slave
*/
inline bool userHasSpecifiedPrimarySlave() { return _userHasSpecifiedPrimarySlave; }
/**
* @return Whether the user has specified slave speeds
*/
inline bool userHasSpecifiedSlaveSpeeds() { return _userHasSpecifiedSlaveSpeeds; }
/**
* Periodically perform maintenance tasks for each active bond.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void processBackgroundTasks(void *tPtr, int64_t now);
/**
* Rate limit gate for VERB_ACK
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGateACK(const int64_t now)
{
_ackCutoffCount++;
int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount;
_lastAckRateCheck = now;
if (_ackCutoffCount > numToDrain) {
_ackCutoffCount-=numToDrain;
} else {
_ackCutoffCount = 0;
}
return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_QOS_MEASUREMENT
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGateQoS(const int64_t now)
{
_qosCutoffCount++;
int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR;
_lastQoSRateCheck = now;
if (_qosCutoffCount > numToDrain) {
_qosCutoffCount-=numToDrain;
} else {
_qosCutoffCount = 0;
}
return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_PATH_NEGOTIATION_REQUEST
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGatePathNegotiation(const int64_t now)
{
if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME)
++_pathNegotiationCutoffCount;
else _pathNegotiationCutoffCount = 0;
_lastPathNegotiationReceived = now;
return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT);
}
/**
* @param interval Maximum amount of time user expects a failover to take on this bond.
*/
inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; }
/**
* @param strategy The strategy that the bond uses to prob for path aliveness and quality
*/
inline void setSlaveMonitorStrategy(uint8_t strategy) { _slaveMonitorStrategy = strategy; }
/**
* @return the current up delay parameter
*/
inline uint16_t getUpDelay() { return _upDelay; }
/**
* @param upDelay Length of time before a newly-discovered path is admitted to the bond
*/
inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } }
/**
* @return Length of time before a newly-failed path is removed from the bond
*/
inline uint16_t getDownDelay() { return _downDelay; }
/**
* @param downDelay Length of time before a newly-failed path is removed from the bond
*/
inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } }
/**
* @return the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.)
*/
inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; }
/**
* Set the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.)
*
* @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer.
*/
inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; }
/**
* @param policy Bonding policy for this bond
*/
inline void setPolicy(uint8_t policy) { _bondingPolicy = policy; }
/**
* @return the current bonding policy
*/
inline uint8_t getPolicy() { return _bondingPolicy; }
/**
*
* @param allowFlowHashing
*/
inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; }
/**
* @return Whether flow-hashing is currently enabled for this bond.
*/
bool flowHashingEnabled() { return _allowFlowHashing; }
/**
*
* @param packetsPerSlave
*/
inline void setPacketsPerSlave(int packetsPerSlave) { _packetsPerSlave = packetsPerSlave; }
/**
*
* @param slaveSelectMethod
*/
inline void setSlaveSelectMethod(uint8_t method) { _abSlaveSelectMethod = method; }
/**
*
* @return
*/
inline uint8_t getSlaveSelectMethod() { return _abSlaveSelectMethod; }
/**
*
* @param allowPathNegotiation
*/
inline void setAllowPathNegotiation(bool allowPathNegotiation) { _allowPathNegotiation = allowPathNegotiation; }
/**
*
* @return
*/
inline bool allowPathNegotiation() { return _allowPathNegotiation; }
private:
const RuntimeEnvironment *RR;
AtomicCounter __refCount;
/**
* Custom name given by the user to this bond type.
*/
std::string _policyAlias;
/**
* Paths that this bond has been made aware of but that are not necessarily
* part of the bond proper.
*/
SharedPtr<Path> _paths[ZT_MAX_PEER_NETWORK_PATHS];
/**
* Set of indices corresponding to paths currently included in the bond proper. This
* may only be updated during a call to curateBond(). The reason for this is so that
* we can simplify the high frequency packet egress logic.
*/
int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS];
/**
* Number of paths currently included in the _bondedIdx set.
*/
int _numBondedPaths;
/**
* Flows hashed according to port and protocol
*/
std::map<int32_t,SharedPtr<Flow> > _flows;
float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path.
uint8_t _bondingPolicy;
uint32_t _upDelay;
uint32_t _downDelay;
// active-backup
SharedPtr<Path> _abPath; // current active path
std::list<SharedPtr<Path> > _abFailoverQueue;
uint8_t _abSlaveSelectMethod; // slave re-selection policy for the primary slave in active-backup
uint64_t _lastActiveBackupPathChange;
// balance-rr
uint8_t _rrIdx; // index to path currently in use during Round Robin operation
uint16_t _rrPacketsSentOnCurrSlave; // number of packets sent on this slave since the most recent path switch.
/**
* How many packets will be sent on a path before moving to the next path
* in the round-robin sequence. A value of zero will cause a random path
* selection for each outgoing packet.
*/
int _packetsPerSlave;
// balance-aware
uint64_t _totalBondUnderload;
// dynamic slave monitoring
uint8_t _slaveMonitorStrategy;
uint64_t _lastFrame;
uint32_t _dynamicPathMonitorInterval;
// path negotiation
int16_t _localUtility;
SharedPtr<Path> negotiatedPath;
uint8_t _numSentPathNegotiationRequests;
unsigned int _pathNegotiationCutoffCount;
bool _allowPathNegotiation;
uint64_t _lastPathNegotiationReceived;
uint64_t _lastSentPathNegotiationRequest;
// timers
uint32_t _failoverInterval;
uint32_t _qosSendInterval;
uint32_t _ackSendInterval;
uint16_t _ackCutoffCount;
uint64_t _lastAckRateCheck;
uint16_t _qosCutoffCount;
uint64_t _lastQoSRateCheck;
uint32_t throughputMeasurementInterval;
uint32_t _qualityEstimationInterval;
// timestamps
uint64_t _lastCheckUserPreferences;
uint64_t _lastQualityEstimation;
uint64_t _lastFlowStatReset;
uint64_t _lastFlowExpirationCheck;
uint64_t _lastFlowRebalance;
uint64_t _lastPathNegotiationCheck;
uint64_t _lastBackgroundTaskCheck;
float _maxAcceptablePacketLossRatio;
float _maxAcceptablePacketErrorRatio;
uint16_t _maxAcceptableLatency;
uint16_t _maxAcceptableMeanLatency;
uint16_t _maxAcceptablePacketDelayVariance;
uint8_t _minAcceptableAllocation;
/**
* Default initial punishment inflicted on misbehaving paths. Punishment slowly
* drains linearly. For each eligibility change the remaining punishment is doubled.
*/
uint32_t _defaultPathRefractoryPeriod;
/**
* Whether the current bonding policy requires computation of path statistics
*/
bool _shouldCollectPathStatistics;
/**
* Free byte of entropy that is updated on every packet egress event.
*/
unsigned char _freeRandomByte;
/**
* Remote peer that this bond services
*/
SharedPtr<Peer> _peer;
Mutex _paths_m;
Mutex _flows_m;
/**
* Whether the user has specified slaves for this bond.
*/
bool _userHasSpecifiedSlaves;
/**
* Whether the user has specified a primary slave for this bond.
*/
bool _userHasSpecifiedPrimarySlave;
/**
* Whether the user has specified failover instructions for this bond.
*/
bool _userHasSpecifiedFailoverInstructions;
/**
* Whether the user has specified slaves speeds for this bond.
*/
bool _userHasSpecifiedSlaveSpeeds;
/**
* How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a
* path is still active. A value of zero (0) will disable active path
* monitoring; as result, all monitoring will be a function of traffic.
*/
uint16_t _bondMonitorInterval;
/**
* Whether or not flow hashing is allowed.
*/
bool _allowFlowHashing;
};
} // namespace ZeroTier
#endif

203
node/BondController.cpp Normal file
View File

@ -0,0 +1,203 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#include "BondController.hpp"
#include "Peer.hpp"
namespace ZeroTier {
int BondController::_minReqPathMonitorInterval;
uint8_t BondController::_defaultBondingPolicy;
BondController::BondController(const RuntimeEnvironment *renv) :
RR(renv)
{
bondStartTime = RR->node->now();
}
bool BondController::slaveAllowed(std::string &policyAlias, SharedPtr<Slave> slave)
{
bool foundInDefinitions = false;
if (_slaveDefinitions.count(policyAlias)) {
auto it = _slaveDefinitions[policyAlias].begin();
while (it != _slaveDefinitions[policyAlias].end()) {
if (slave->ifname() == (*it)->ifname()) {
foundInDefinitions = true;
break;
}
++it;
}
}
return _slaveDefinitions[policyAlias].empty() || foundInDefinitions;
}
void BondController::addCustomSlave(std::string& policyAlias, SharedPtr<Slave> slave)
{
Mutex::Lock _l(_slaves_m);
_slaveDefinitions[policyAlias].push_back(slave);
auto search = _interfaceToSlaveMap[policyAlias].find(slave->ifname());
if (search == _interfaceToSlaveMap[policyAlias].end()) {
slave->setAsUserSpecified(true);
_interfaceToSlaveMap[policyAlias].insert(std::pair<std::string, SharedPtr<Slave>>(slave->ifname(), slave));
} else {
fprintf(stderr, "slave already exists=%s\n", slave->ifname().c_str());
// Slave is already defined, overlay user settings
}
}
bool BondController::addCustomPolicy(const SharedPtr<Bond>& newBond)
{
Mutex::Lock _l(_bonds_m);
if (!_bondPolicyTemplates.count(newBond->policyAlias())) {
_bondPolicyTemplates[newBond->policyAlias()] = newBond;
return true;
}
return false;
}
bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias)
{
Mutex::Lock _l(_bonds_m);
if (!_policyTemplateAssignments.count(identity)) {
_policyTemplateAssignments[identity] = policyAlias;
return true;
}
return false;
}
SharedPtr<Bond> BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr<Peer>& peer)
{
fprintf(stderr, "createTransportTriggeredBond\n");
Mutex::Lock _l(_bonds_m);
int64_t identity = peer->identity().address().toInt();
Bond *bond = nullptr;
if (!_bonds.count(identity)) {
std::string policyAlias;
int _defaultBondingPolicy = defaultBondingPolicy();
fprintf(stderr, "new bond, registering for %llx\n", identity);
if (!_policyTemplateAssignments.count(identity)) {
if (defaultBondingPolicy()) {
fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy);
bond = new Bond(renv, _defaultBondingPolicy, peer);
}
if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) {
fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str());
bond = new Bond(renv, *(_bondPolicyTemplates[_defaultBondingPolicyStr].ptr()), peer);
}
}
else {
fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str());
if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) {
fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity);
bond = new Bond(renv, _defaultBondingPolicy, peer);
}
else {
bond = new Bond(renv, *(_bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr()), peer);
}
}
}
else {
fprintf(stderr, "bond already exists for %llx, cannot re-register. exiting\n", identity); exit(0); // TODO: Remove
}
if (bond) {
_bonds[identity] = bond;
/**
* Determine if user has specified anything that could affect the bonding policy's decisions
*/
if (_interfaceToSlaveMap.count(bond->policyAlias())) {
std::map<std::string, SharedPtr<Slave> >::iterator it = _interfaceToSlaveMap[bond->policyAlias()].begin();
while (it != _interfaceToSlaveMap[bond->policyAlias()].end()) {
if (it->second->isUserSpecified()) {
bond->_userHasSpecifiedSlaves = true;
}
if (it->second->isUserSpecified() && it->second->primary()) {
bond->_userHasSpecifiedPrimarySlave = true;
}
if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) {
bond->_userHasSpecifiedFailoverInstructions = true;
}
if (it->second->isUserSpecified() && (it->second->speed() > 0)) {
bond->_userHasSpecifiedSlaveSpeeds = true;
}
++it;
}
}
return bond;
}
return SharedPtr<Bond>();
}
SharedPtr<Slave> BondController::getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket)
{
Mutex::Lock _l(_slaves_m);
char ifname[16];
_phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16);
std::string ifnameStr(ifname);
auto search = _interfaceToSlaveMap[policyAlias].find(ifnameStr);
if (search == _interfaceToSlaveMap[policyAlias].end()) {
SharedPtr<Slave> s = new Slave(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0);
_interfaceToSlaveMap[policyAlias].insert(std::pair<std::string,SharedPtr<Slave> >(ifnameStr, s));
return s;
}
else {
return search->second;
}
}
SharedPtr<Slave> BondController::getSlaveByName(const std::string& policyAlias, const std::string& ifname)
{
Mutex::Lock _l(_slaves_m);
auto search = _interfaceToSlaveMap[policyAlias].find(ifname);
if (search != _interfaceToSlaveMap[policyAlias].end()) {
return search->second;
}
return SharedPtr<Slave>();
}
bool BondController::allowedToBind(const std::string& ifname)
{
return true;
/*
if (!_defaultBondingPolicy) {
return true; // no restrictions
}
Mutex::Lock _l(_slaves_m);
if (_interfaceToSlaveMap.empty()) {
return true; // no restrictions
}
std::map<std::string, std::map<std::string, SharedPtr<Slave> > >::iterator policyItr = _interfaceToSlaveMap.begin();
while (policyItr != _interfaceToSlaveMap.end()) {
std::map<std::string, SharedPtr<Slave> >::iterator slaveItr = policyItr->second.begin();
while (slaveItr != policyItr->second.end()) {
if (slaveItr->first == ifname) {
return true;
}
++slaveItr;
}
++policyItr;
}
return false;
*/
}
void BondController::processBackgroundTasks(void *tPtr, const int64_t now)
{
Mutex::Lock _l(_bonds_m);
std::map<int64_t,SharedPtr<Bond> >::iterator bondItr = _bonds.begin();
while (bondItr != _bonds.end()) {
bondItr->second->processBackgroundTasks(tPtr, now);
++bondItr;
}
}
} // namespace ZeroTier

231
node/BondController.hpp Normal file
View File

@ -0,0 +1,231 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_BONDCONTROLLER_HPP
#define ZT_BONDCONTROLLER_HPP
#include <map>
#include <vector>
#include "SharedPtr.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Slave.hpp"
namespace ZeroTier {
class RuntimeEnvironment;
class Bond;
class Peer;
class BondController
{
friend class Bond;
public:
BondController(const RuntimeEnvironment *renv);
/**
* @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
bool slaveAllowed(std::string &policyAlias, SharedPtr<Slave> slave);
/**
* @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; }
/**
* @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; }
/**
* @return Whether the bonding layer is currently set up to be used.
*/
bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; }
/**
* @param basePolicyName Bonding policy name (See ZeroTierOne.h)
* @return The bonding policy code for a given human-readable bonding policy name
*/
static int getPolicyCodeByStr(const std::string& basePolicyName)
{
if (basePolicyName == "active-backup") { return 1; }
if (basePolicyName == "broadcast") { return 2; }
if (basePolicyName == "balance-rr") { return 3; }
if (basePolicyName == "balance-xor") { return 4; }
if (basePolicyName == "balance-aware") { return 5; }
return 0; // "none"
}
/**
* @param policy Bonding policy code (See ZeroTierOne.h)
* @return The human-readable name for the given bonding policy code
*/
static std::string getPolicyStrByCode(int policy)
{
if (policy == 1) { return "active-backup"; }
if (policy == 2) { return "broadcast"; }
if (policy == 3) { return "balance-rr"; }
if (policy == 4) { return "balance-xor"; }
if (policy == 5) { return "balance-aware"; }
return "none";
}
/**
* Sets the default bonding policy for new or undefined bonds.
*
* @param bp Bonding policy
*/
void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; }
/**
* Sets the default (custom) bonding policy for new or undefined bonds.
*
* @param alias Human-readable string alias for bonding policy
*/
void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; }
/**
* @return The default bonding policy
*/
static int defaultBondingPolicy() { return _defaultBondingPolicy; }
/**
* Add a user-defined slave to a given bonding policy.
*
* @param policyAlias User-defined custom name for variant of bonding policy
* @param slave Pointer to new slave definition
*/
void addCustomSlave(std::string& policyAlias, SharedPtr<Slave> slave);
/**
* Add a user-defined bonding policy that is based on one of the standard types.
*
* @param newBond Pointer to custom Bond object
* @return Whether a uniquely-named custom policy was successfully added
*/
bool addCustomPolicy(const SharedPtr<Bond>& newBond);
/**
* Assigns a specific bonding policy
*
* @param identity
* @param policyAlias
* @return
*/
bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias);
/**
* Add a new bond to the bond controller.
*
* @param renv Runtime environment
* @param peer Remote peer that this bond services
* @return A pointer to the newly created Bond
*/
SharedPtr<Bond> createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr<Peer>& peer);
/**
* Periodically perform maintenance tasks for the bonding layer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void processBackgroundTasks(void *tPtr, int64_t now);
/**
* Gets a reference to a physical slave definition given a policy alias and a local socket.
*
* @param policyAlias Policy in use
* @param localSocket Local source socket
* @return Physical slave definition
*/
SharedPtr<Slave> getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket);
/**
* Gets a reference to a physical slave definition given its human-readable system name.
*
* @param policyAlias Policy in use
* @param ifname Alphanumeric human-readable name
* @return Physical slave definition
*/
SharedPtr<Slave> getSlaveByName(const std::string& policyAlias, const std::string& ifname);
/**
* @param ifname Name of interface that we want to know if we can bind to
*/
bool allowedToBind(const std::string& ifname);
uint64_t getBondStartTime() { return bondStartTime; }
private:
Phy<BondController *> *_phy;
const RuntimeEnvironment *RR;
Mutex _bonds_m;
Mutex _slaves_m;
/**
* The last time that the bond controller updated the set of bonds.
*/
uint64_t _lastBackgroundBondControlTaskCheck;
/**
* The minimum monitoring interval among all paths in this bond.
*/
static int _minReqPathMonitorInterval;
/**
* The default bonding policy used for new bonds unless otherwise specified.
*/
static uint8_t _defaultBondingPolicy;
/**
* The default bonding policy used for new bonds unless otherwise specified.
*/
std::string _defaultBondingPolicyStr;
/**
* All currently active bonds.
*/
std::map<int64_t,SharedPtr<Bond> > _bonds;
/**
* Map of peers to custom bonding policies
*/
std::map<int64_t,std::string> _policyTemplateAssignments;
/**
* User-defined bonding policies (can be assigned to a peer)
*/
std::map<std::string,SharedPtr<Bond> > _bondPolicyTemplates;
/**
* Set of slaves defined for a given bonding policy
*/
std::map<std::string,std::vector<SharedPtr<Slave> > > _slaveDefinitions;
/**
* Set of slave objects mapped to their physical interfaces
*/
std::map<std::string, std::map<std::string, SharedPtr<Slave> > > _interfaceToSlaveMap;
// TODO: Remove
uint64_t bondStartTime;
};
} // namespace ZeroTier
#endif

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -192,7 +192,7 @@
/**
* Minimum delay between timer task checks to prevent thrashing
*/
#define ZT_CORE_TIMER_TASK_GRANULARITY 500
#define ZT_CORE_TIMER_TASK_GRANULARITY 60
/**
* How often Topology::clean() and Network::clean() and similar are called, in ms
@ -253,203 +253,6 @@
*/
#define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000
/**
* How long before we consider a flow to be dead and remove it from the balancing
* policy's list.
*/
#define ZT_MULTIPATH_FLOW_EXPIRATION 60000
/**
* How frequently to check for changes to the system's network interfaces. When
* the service decides to use this constant it's because we want to react more
* quickly to new interfaces that pop up or go down.
*/
#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000
/**
* Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by
* this integer. This is to provide a mechanism for both peers to agree on which packets need
* special treatment without having to exchange information. Changing this value would be
* a breaking change and would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the form:
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output we will sample
* 1/16th (or ~6.25%) of packets.
*/
#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
#define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000
/**
* Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be
* processed within cutoff time. Separate totals are kept for each type but
* the limit is the same for both.
*
* This limits how often this peer will compute statistical estimates
* of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to
* CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent
* this from being useful for DOS amplification attacks.
*/
#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 128
/**
* Path choice history window size. This is used to keep track of which paths were
* previously selected so that we can maintain a target allocation over time.
*/
#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128
/**
* How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL
* since we will record a 0 bit/s measurement if no valid latency measurement was made within this
* window of time.
*/
#define ZT_PATH_LATENCY_SAMPLE_INTERVAL (ZT_MULTIPATH_PEER_PING_PERIOD * 2)
/**
* Interval used for rate-limiting the computation of path quality estimates.
*/
#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000
/**
* Number of samples to consider when computing real-time path statistics
*/
#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128
/**
* Number of samples to consider when computing performing long-term path quality analysis.
* By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can
* be set to any value greater than that to observe longer-term path quality behavior.
*/
#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ
/**
* Maximum acceptable Packet Delay Variance (PDV) over a path
*/
#define ZT_PATH_MAX_PDV 1000
/**
* Maximum acceptable time interval between expectation and receipt of at least one ACK over a path
*/
#define ZT_PATH_MAX_AGE 30000
/**
* Maximum acceptable mean latency over a path
*/
#define ZT_PATH_MAX_MEAN_LATENCY 1000
/**
* How much each factor contributes to the "stability" score of a path
*/
#if 0
#define ZT_PATH_CONTRIB_PDV (1.5 / 3.0)
#define ZT_PATH_CONTRIB_LATENCY (0.0 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.5 / 3.0)
#else
#define ZT_PATH_CONTRIB_PDV (1.0 / 3.0)
#define ZT_PATH_CONTRIB_LATENCY (1.0 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0)
#endif
/**
* How much each factor contributes to the "quality" score of a path
*/
#if 0
#define ZT_PATH_CONTRIB_STABILITY (2.00 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT (0.50 / 3.0)
#define ZT_PATH_CONTRIB_SCOPE (0.50 / 3.0)
#else
#define ZT_PATH_CONTRIB_STABILITY (0.75 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0)
#define ZT_PATH_CONTRIB_SCOPE (0.75 / 3.0)
#endif
/**
* How often a QoS packet is sent
*/
#define ZT_PATH_QOS_INTERVAL 3000
/**
* Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1
#define ZT_PATH_MAX_QOS_PACKET_SZ 1400
/**
* How many ID:sojourn time pairs in a single QoS packet
*/
#define ZT_PATH_QOS_TABLE_SIZE ((ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 16))
/**
* Maximum number of outgoing packets we monitor for QoS information
*/
#define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128
/**
* Timeout for QoS records
*/
#define ZT_PATH_QOS_TIMEOUT (ZT_PATH_QOS_INTERVAL * 2)
/**
* How often the service tests the path throughput
*/
#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL (ZT_PATH_ACK_INTERVAL * 8)
/**
* Minimum amount of time between each ACK packet
*/
#define ZT_PATH_ACK_INTERVAL 1000
/**
* How often an aggregate link statistics report is emitted into this tracing system
*/
#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 30000
/**
* How much an aggregate link's component paths can vary from their target allocation
* before the link is considered to be in a state of imbalance.
*/
#define ZT_PATH_IMBALANCE_THRESHOLD 0.20
/**
* Max allowable time spent in any queue
*/
#define ZT_QOS_TARGET 5 // ms
/**
* Time period where the time spent in the queue by a packet should fall below
* target at least once
*/
#define ZT_QOS_INTERVAL 100 // ms
/**
* The number of bytes that each queue is allowed to send during each DRR cycle.
* This approximates a single-byte-based fairness queuing scheme
*/
#define ZT_QOS_QUANTUM ZT_DEFAULT_MTU
/**
* The maximum total number of packets that can be queued among all
* active/inactive, old/new queues
*/
#define ZT_QOS_MAX_ENQUEUED_PACKETS 1024
/**
* Number of QoS queues (buckets)
*/
#define ZT_QOS_NUM_BUCKETS 9
/**
* All unspecified traffic is put in this bucket. Anything in a bucket with a smaller
* value is de-prioritized. Anything in a bucket with a higher value is prioritized over
* other traffic.
*/
#define ZT_QOS_DEFAULT_BUCKET 0
/**
* How frequently to send heartbeats over in-use paths
*/
@ -465,21 +268,6 @@
*/
#define ZT_PEER_PING_PERIOD 60000
/**
* Delay between full-fledge pings of directly connected peers.
*
* With multipath bonding enabled ping peers more often to measure
* packet loss and latency. This uses more bandwidth so is disabled
* by default to avoid increasing idle bandwidth use for regular
* links.
*/
#define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10)
/**
* How long before we consider a path to be dead in rapid fail-over scenarios
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 250
/**
* Paths are considered expired if they have not sent us a real packet in this long
*/
@ -490,6 +278,210 @@
*/
#define ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD (ZT_PEER_PING_PERIOD * 10)
/**
* Outgoing packets are only used for QoS/ACK statistical sampling if their
* packet ID is divisible by this integer. This is to provide a mechanism for
* both peers to agree on which packets need special treatment without having
* to exchange information. Changing this value would be a breaking change and
* would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the
* form:
*
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output
* we will sample 1/16th (or ~6.25%) of packets.
*/
#define ZT_QOS_ACK_DIVISOR 0x2
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
#define ZT_QOS_ACK_CUTOFF_TIME 30000
/**
* Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be
* processed within cutoff time. Separate totals are kept for each type but
* the limit is the same for both.
*
* This limits how often this peer will compute statistical estimates
* of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to
* CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent
* this from being useful for DOS amplification attacks.
*/
#define ZT_QOS_ACK_CUTOFF_LIMIT 128
/**
* Minimum acceptable size for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_QOS_MIN_PACKET_SIZE (8 + 1)
/**
* Maximum acceptable size for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_QOS_MAX_PACKET_SIZE 1400
/**
* How many ID:sojourn time pairs are in a single QoS packet
*/
#define ZT_QOS_TABLE_SIZE ((ZT_QOS_MAX_PACKET_SIZE * 8) / (64 + 16))
/**
* Maximum number of outgoing packets we monitor for QoS information
*/
#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16)
/**
* Interval used for rate-limiting the computation of path quality estimates.
*/
#define ZT_QOS_COMPUTE_INTERVAL 1000
/**
* Number of samples to consider when processing real-time path statistics
*/
#define ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE 32
/**
* Number of samples to consider when processing long-term trends
*/
#define ZT_QOS_LONGTERM_SAMPLE_WIN_SIZE (ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE * 4)
/**
* Max allowable time spent in any queue (in ms)
*/
#define ZT_AQM_TARGET 5
/**
* Time period where the time spent in the queue by a packet should fall below.
* target at least once. (in ms)
*/
#define ZT_AQM_INTERVAL 100
/**
* The number of bytes that each queue is allowed to send during each DRR cycle.
* This approximates a single-byte-based fairness queuing scheme.
*/
#define ZT_AQM_QUANTUM ZT_DEFAULT_MTU
/**
* The maximum total number of packets that can be queued among all
* active/inactive, old/new queues.
*/
#define ZT_AQM_MAX_ENQUEUED_PACKETS 1024
/**
* Number of QoS queues (buckets)
*/
#define ZT_AQM_NUM_BUCKETS 9
/**
* All unspecified traffic is put in this bucket. Anything in a bucket with a
* smaller value is deprioritized. Anything in a bucket with a higher value is
prioritized over other traffic.
*/
#define ZT_AQM_DEFAULT_BUCKET 0
/**
* How long before we consider a path to be dead in the general sense. This is
* used while searching for default or alternative paths to try in the absence
* of direct guidance from the user or a selection policy.
*/
#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000
/**
* How often flows are evaluated
*/
#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000
/**
* How long before we consider a flow to be dead and remove it from the
* policy's list.
*/
#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL 30000
/**
* How often a flow's statistical counters are reset
*/
#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL
/**
* Maximum number of flows allowed before we start forcibly forgetting old ones
*/
#define ZT_FLOW_MAX_COUNT (1024*64)
/**
* How often flows are rebalanced across slave interfaces (if at all)
*/
#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000
/**
* How often flows are rebalanced across slave interfaces (if at all)
*/
#define ZT_FLOW_REBALANCE_INTERVAL 5000
/**
* A defensive timer to prevent path quality metrics from being
* processed too often.
*/
#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY
/**
* How often a bonding policy's background tasks are processed,
* some need more frequent attention than others.
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY
/**
* Minimum amount of time (since a previous transition) before the active-backup bonding
* policy is allowed to transition to a different slave. Only valid for active-backup.
*/
#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000
/**
* How often a peer checks that incoming (and outgoing) traffic on a bonded link is
* appropriately paired.
*/
#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000
/**
* Time horizon for path negotiation paths cutoff
*/
#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000
/**
* Maximum number of path negotiations within cutoff time
*
* This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses
* per CUTOFF_TIME milliseconds per peer to prevent this from being
* useful for DOS amplification attacks.
*/
#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8
/**
* How many times a peer will attempt to petition another peer to synchronize its
* traffic to the same path before giving up and surrendering to the other peer's preference.
*/
#define ZT_PATH_NEGOTIATION_TRY_COUNT 3
/**
* How much greater the quality of a path should be before an
* optimization procedure triggers a switch.
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10
/**
* Artificially inflates the failover score for paths which meet
* certain non-performance-related policy ranking criteria.
*/
#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500
#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000
#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000
/**
* An indicator that no flow is to be associated with the given packet
*/
#define ZT_QOS_NO_FLOW -1
/**
* Timeout for overall peer activity (measured from last receive)
*/
@ -557,20 +549,32 @@
*/
#define ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH 120000
/**
* Interval between direct path pushes in milliseconds if we are currently in multipath
* mode. In this mode the distinction between ZT_DIRECT_PATH_PUSH_INTERVAL and
* ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH does not exist since we want to inform other
* peers of this peer's new link/address as soon as possible so that both peers can
* begin forming an aggregated link.
*/
#define ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH (ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH / 16)
/**
* Time horizon for push direct paths cutoff
*/
#define ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME 30000
/**
* Drainage constants for VERB_ECHO rate-limiters
*/
#define ZT_ECHO_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS)
#define ZT_ECHO_DRAINAGE_DIVISOR (1000 / ZT_ECHO_CUTOFF_LIMIT)
/**
* Drainage constants for VERB_QOS rate-limiters
*/
#define ZT_QOS_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS)
#define ZT_QOS_DRAINAGE_DIVISOR (1000 / ZT_QOS_CUTOFF_LIMIT)
/**
* Drainage constants for VERB_ACK rate-limiters
*/
#define ZT_ACK_CUTOFF_LIMIT 128
#define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT)
#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000
#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000
/**
* Maximum number of direct path pushes within cutoff time
*

123
node/Flow.hpp Normal file
View File

@ -0,0 +1,123 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_FLOW_HPP
#define ZT_FLOW_HPP
#include "Path.hpp"
#include "SharedPtr.hpp"
namespace ZeroTier {
/**
* A protocol flow that is identified by the origin and destination port.
*/
struct Flow
{
/**
* @param flowId Given flow ID
* @param now Current time
*/
Flow(int32_t flowId, int64_t now) :
_flowId(flowId),
_bytesInPerUnitTime(0),
_bytesOutPerUnitTime(0),
_lastActivity(now),
_lastPathReassignment(0),
_assignedPath(SharedPtr<Path>())
{}
/**
* Reset flow statistics
*/
void resetByteCounts()
{
_bytesInPerUnitTime = 0;
_bytesOutPerUnitTime = 0;
}
/**
* @return The Flow's ID
*/
int32_t id() { return _flowId; }
/**
* @return Number of incoming bytes processed on this flow per unit time
*/
int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; }
/**
* Record number of incoming bytes on this flow
*
* @param bytes Number of incoming bytes
*/
void recordIncomingBytes(uint64_t bytes) { _bytesInPerUnitTime += bytes; }
/**
* @return Number of outgoing bytes processed on this flow per unit time
*/
int64_t bytesOutPerUnitTime() { return _bytesOutPerUnitTime; }
/**
* Record number of outgoing bytes on this flow
*
* @param bytes
*/
void recordOutgoingBytes(uint64_t bytes) { _bytesOutPerUnitTime += bytes; }
/**
* @return The total number of bytes processed on this flow
*/
uint64_t totalBytes() { return _bytesInPerUnitTime + _bytesOutPerUnitTime; }
/**
* How long since a packet was sent or received in this flow
*
* @param now Current time
* @return The age of the flow in terms of last recorded activity
*/
int64_t age(int64_t now) { return now - _lastActivity; }
/**
* Record that traffic was processed on this flow at the given time.
*
* @param now Current time
*/
void updateActivity(int64_t now) { _lastActivity = now; }
/**
* @return Path assigned to this flow
*/
SharedPtr<Path> assignedPath() { return _assignedPath; }
/**
* @param path Assigned path over which this flow should be handled
*/
void assignPath(const SharedPtr<Path> &path, int64_t now) {
_assignedPath = path;
_lastPathReassignment = now;
}
AtomicCounter __refCount;
int32_t _flowId;
uint64_t _bytesInPerUnitTime;
uint64_t _bytesOutPerUnitTime;
int64_t _lastActivity;
int64_t _lastPathReassignment;
SharedPtr<Path> _assignedPath;
};
} // namespace ZeroTier
#endif

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -35,10 +35,12 @@
#include "Tag.hpp"
#include "Revocation.hpp"
#include "Trace.hpp"
#include "Path.hpp"
#include "Bond.hpp"
namespace ZeroTier {
bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId)
{
const Address sourceAddress(source());
@ -67,7 +69,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
if (!trusted) {
if (!dearmor(peer->key())) {
RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC");
_path->recordInvalidPacket();
peer->recordIncomingInvalidPacket(_path);
return true;
}
}
@ -78,11 +80,12 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
}
const Packet::Verb v = verb();
bool r = true;
switch(v) {
//case Packet::VERB_NOP:
default: // ignore unknown verbs, but if they pass auth check they are "received"
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
break;
case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break;
case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break;
@ -91,8 +94,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break;
case Packet::VERB_WHOIS: r = _doWHOIS(RR,tPtr,peer); break;
case Packet::VERB_RENDEZVOUS: r = _doRENDEZVOUS(RR,tPtr,peer); break;
case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer); break;
case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer); break;
case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer,flowId); break;
case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer,flowId); break;
case Packet::VERB_ECHO: r = _doECHO(RR,tPtr,peer); break;
case Packet::VERB_MULTICAST_LIKE: r = _doMULTICAST_LIKE(RR,tPtr,peer); break;
case Packet::VERB_NETWORK_CREDENTIALS: r = _doNETWORK_CREDENTIALS(RR,tPtr,peer); break;
@ -103,6 +106,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
case Packet::VERB_PUSH_DIRECT_PATHS: r = _doPUSH_DIRECT_PATHS(RR,tPtr,peer); break;
case Packet::VERB_USER_MESSAGE: r = _doUSER_MESSAGE(RR,tPtr,peer); break;
case Packet::VERB_REMOTE_TRACE: r = _doREMOTE_TRACE(RR,tPtr,peer); break;
case Packet::VERB_PATH_NEGOTIATION_REQUEST: r = _doPATH_NEGOTIATION_REQUEST(RR,tPtr,peer); break;
}
if (r) {
RR->node->statsLogVerb((unsigned int)v,(unsigned int)size());
@ -113,9 +117,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
RR->sw->requestWhois(tPtr,RR->node->now(),sourceAddress);
return false;
}
} catch (int ztExcCode) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()");
return true;
} catch ( ... ) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()");
return true;
@ -193,59 +194,59 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar
default: break;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateACK(RR->node->now()))
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGateACK(RR->node->now())) {
return true;
}
/* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known
* maximums and detect packet loss. */
if (peer->localMultipathSupport()) {
int32_t ackedBytes;
if (payloadLength() != sizeof(ackedBytes)) {
return true; // ignore
}
memcpy(&ackedBytes, payload(), sizeof(ackedBytes));
_path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes));
peer->inferRemoteMultipathEnabled();
int32_t ackedBytes;
if (payloadLength() != sizeof(ackedBytes)) {
return true; // ignore
}
memcpy(&ackedBytes, payload(), sizeof(ackedBytes));
if (bond) {
bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes));
}
return true;
}
bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateQoS(RR->node->now()))
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGateQoS(RR->node->now())) {
return true;
}
/* Dissect incoming QoS packet. From this we can compute latency values and their variance.
* The latency variance is used as a measure of "jitter". */
if (peer->localMultipathSupport()) {
if (payloadLength() > ZT_PATH_MAX_QOS_PACKET_SZ || payloadLength() < ZT_PATH_MIN_QOS_PACKET_SZ) {
return true; // ignore
}
const int64_t now = RR->node->now();
uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE];
uint16_t rx_ts[ZT_PATH_QOS_TABLE_SIZE];
char *begin = (char *)payload();
char *ptr = begin;
int count = 0;
int len = payloadLength();
// Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet
while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) {
memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t));
ptr+=sizeof(uint64_t);
memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t));
ptr+=sizeof(uint16_t);
count++;
}
_path->receivedQoS(now, count, rx_id, rx_ts);
peer->inferRemoteMultipathEnabled();
if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) {
return true; // ignore
}
const int64_t now = RR->node->now();
uint64_t rx_id[ZT_QOS_TABLE_SIZE];
uint16_t rx_ts[ZT_QOS_TABLE_SIZE];
char *begin = (char *)payload();
char *ptr = begin;
int count = 0;
unsigned int len = payloadLength();
// Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet
while (ptr < (begin + len) && (count < ZT_QOS_TABLE_SIZE)) {
memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t));
ptr+=sizeof(uint64_t);
memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t));
ptr+=sizeof(uint16_t);
count++;
}
if (bond) {
bond->receivedQoS(_path, now, count, rx_id, rx_ts);
}
return true;
}
@ -441,11 +442,12 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool
}
outp.setAt<uint16_t>(worldUpdateSizeAt,(uint16_t)(outp.size() - (worldUpdateSizeAt + 2)));
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),now);
peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -493,7 +495,10 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
}
if (!hops()) {
_path->updateLatency((unsigned int)latency,RR->node->now());
SharedPtr<Bond> bond = peer->bond();
if (!bond) {
_path->updateLatency((unsigned int)latency,RR->node->now());
}
}
peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision);
@ -522,8 +527,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
if (network) {
const MulticastGroup mg(MAC(field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_MAC,6),6),at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_ADI));
const unsigned int count = at<uint16_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 4);
if (((ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6) + (count * 5)) <= size())
RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS));
RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS));
}
} break;
@ -556,7 +560,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
default: break;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW);
return true;
}
@ -591,7 +595,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -615,13 +619,108 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId)
{
int32_t _flowId = ZT_QOS_NO_FLOW;
SharedPtr<Bond> bond = peer->bond();
if (bond && bond->flowHashingEnabled()) {
if (size() > ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD) {
const unsigned int etherType = at<uint16_t>(ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE);
const unsigned int frameLen = size() - ZT_PROTO_VERB_FRAME_IDX_PAYLOAD;
const uint8_t *const frameData = reinterpret_cast<const uint8_t *>(data()) + ZT_PROTO_VERB_FRAME_IDX_PAYLOAD;
if (etherType == ZT_ETHERTYPE_IPV4 && (frameLen >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
uint8_t proto = (reinterpret_cast<const uint8_t *>(frameData)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(frameData)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
//flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (frameLen > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
_flowId = dstPort ^ srcPort ^ proto;
}
break;
}
}
if (etherType == ZT_ETHERTYPE_IPV6 && (frameLen >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)frameData, frameLen, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
//flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (frameLen > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
_flowId = dstPort ^ srcPort ^ proto;
}
break;
default:
break;
}
}
}
}
const uint64_t nwid = at<uint64_t>(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID);
const SharedPtr<Network> network(RR->node->network(nwid));
bool trustEstablished = false;
@ -641,13 +740,12 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar
return false;
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid,_flowId);
return true;
}
bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId)
{
const uint64_t nwid = at<uint64_t>(ZT_PROTO_VERB_EXT_FRAME_IDX_NETWORK_ID);
const SharedPtr<Network> network(RR->node->network(nwid));
@ -676,7 +774,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen);
if ((!from)||(from == network->mac())) {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
@ -687,19 +785,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
network->learnBridgeRoute(from,peer->address());
} else {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
} else if (to != network->mac()) {
if (to.isMulticast()) {
if (network->config().multicastLimit == 0) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
} else if (!network->config().permitsBridging(RR->identity.address())) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
}
@ -715,13 +813,15 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
outp.append((uint8_t)Packet::VERB_EXT_FRAME);
outp.append((uint64_t)packetId());
outp.append((uint64_t)nwid);
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId);
} else {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid,flowId);
}
return true;
@ -729,8 +829,10 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateEchoRequest(RR->node->now()))
uint64_t now = RR->node->now();
if (!peer->rateGateEchoRequest(now)) {
return true;
}
const uint64_t pid = packetId();
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK);
@ -738,10 +840,11 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share
outp.append((uint64_t)pid);
if (size() > ZT_PACKET_IDX_PAYLOAD)
outp.append(reinterpret_cast<const unsigned char *>(data()) + ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD);
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -767,7 +870,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c
RR->mc->add(tPtr,now,nwid,MulticastGroup(MAC(field(ptr + 8,6),6),at<uint32_t>(ptr + 14)),peer->address());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -889,7 +992,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0,ZT_QOS_NO_FLOW);
return true;
}
@ -915,7 +1018,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW);
return true;
}
@ -931,12 +1034,14 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c
outp.append((uint64_t)packetId());
outp.append((uint64_t)network->id());
outp.append((uint64_t)configUpdateId);
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0,ZT_QOS_NO_FLOW);
return true;
}
@ -979,12 +1084,13 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr
outp.append((uint32_t)mg.adi());
const unsigned int gatheredLocally = RR->mc->gather(peer->address(),nwid,mg,outp,gatherLimit);
if (gatheredLocally > 0) {
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),now);
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid,ZT_QOS_NO_FLOW);
return true;
}
@ -1032,19 +1138,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
if (network->config().multicastLimit == 0) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW);
return true;
}
if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) {
if (!to.mac().isMulticast()) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
if ((!from)||(from.isMulticast())||(from == network->mac())) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
@ -1058,7 +1164,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
network->learnBridgeRoute(from,peer->address());
} else {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
}
@ -1076,12 +1182,14 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
outp.append((uint32_t)to.adi());
outp.append((unsigned char)0x02); // flag 0x02 = contains gather results
if (RR->mc->gather(peer->address(),nwid,to,outp,gatherLimit)) {
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW);
} else {
_sendErrorNeedCredentials(RR,tPtr,peer,nwid);
return false;
@ -1094,9 +1202,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
{
const int64_t now = RR->node->now();
// First, subject this to a rate limit
if (!peer->rateGatePushDirectPaths(now)) {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1108,8 +1215,6 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2;
while (count--) { // if ptr overflows Buffer will throw
// TODO: some flags are not yet implemented
unsigned int flags = (*this)[ptr++];
unsigned int extLen = at<uint16_t>(ptr); ptr += 2;
ptr += extLen; // unused right now
@ -1132,6 +1237,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
}
} break;
case 6: {
const InetAddress a(field(ptr,16),16,at<uint16_t>(ptr + 16));
if (
((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget
@ -1149,7 +1255,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
ptr += addrLen;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1165,7 +1271,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con
RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast<const void *>(&um));
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1189,11 +1295,29 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
uint64_t now = RR->node->now();
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGatePathNegotiation(now)) {
return true;
}
if (payloadLength() != sizeof(int16_t)) {
return true;
}
int16_t remoteUtility = 0;
memcpy(&remoteUtility, payload(), sizeof(int16_t));
if (peer->bond()) {
peer->bond()->processIncomingPathNegotiationRequest(now, _path, Utils::ntoh(remoteUtility));
}
return true;
}
void IncomingPacket::_sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,const uint64_t nwid)
{
Packet outp(source(),RR->identity.address(),Packet::VERB_ERROR);

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -100,7 +100,7 @@ public:
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @return True if decoding and processing is complete, false if caller should try again
*/
bool tryDecode(const RuntimeEnvironment *RR,void *tPtr);
bool tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId);
/**
* @return Time of packet receipt / start of decode
@ -117,8 +117,8 @@ private:
bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId);
bool _doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
@ -129,6 +129,7 @@ private:
bool _doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
void _sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,const uint64_t nwid);

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -48,6 +48,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
_networks(8),
_now(now),
_lastPingCheck(0),
_lastGratuitousPingCheck(0),
_lastHousekeepingRun(0),
_lastMemoizedTraceSettings(0)
{
@ -102,8 +103,9 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0);
const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0);
const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0);
const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0);
m = reinterpret_cast<char *>(::malloc(16 + ts + sws + mcs + topologys + sas));
m = reinterpret_cast<char *>(::malloc(16 + ts + sws + mcs + topologys + sas + bc));
if (!m)
throw std::bad_alloc();
RR->rtmem = m;
@ -118,12 +120,15 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
RR->topology = new (m) Topology(RR,tptr);
m += topologys;
RR->sa = new (m) SelfAwareness(RR);
m += sas;
RR->bc = new (m) BondController(RR);
} catch ( ... ) {
if (RR->sa) RR->sa->~SelfAwareness();
if (RR->topology) RR->topology->~Topology();
if (RR->mc) RR->mc->~Multicaster();
if (RR->sw) RR->sw->~Switch();
if (RR->t) RR->t->~Trace();
if (RR->bc) RR->bc->~BondController();
::free(m);
throw;
}
@ -142,6 +147,7 @@ Node::~Node()
if (RR->mc) RR->mc->~Multicaster();
if (RR->sw) RR->sw->~Switch();
if (RR->t) RR->t->~Trace();
if (RR->bc) RR->bc->~BondController();
::free(RR->rtmem);
}
@ -246,9 +252,23 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64
_now = now;
Mutex::Lock bl(_backgroundTasksLock);
unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY;
if (RR->bc->inUse()) {
// Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled)
bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL);
if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) {
Hashtable< Address,std::vector<InetAddress> > alwaysContact;
_PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now);
RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc);
_lastGratuitousPingCheck = now;
}
RR->bc->processBackgroundTasks(tptr, now);
}
unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL;
const int64_t timeSinceLastPingCheck = now - _lastPingCheck;
if (timeSinceLastPingCheck >= ZT_PING_CHECK_INVERVAL) {
if (timeSinceLastPingCheck >= timeUntilNextPingCheck) {
try {
_lastPingCheck = now;
@ -354,7 +374,7 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64
}
try {
*nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY);
*nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(bondCheckInterval,std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now))),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY);
} catch ( ... ) {
return ZT_RESULT_FATAL_ERROR_INTERNAL;
}
@ -461,7 +481,7 @@ ZT_PeerList *Node::peers() const
for(std::vector< std::pair< Address,SharedPtr<Peer> > >::iterator pi(peers.begin());pi!=peers.end();++pi) {
ZT_Peer *p = &(pl->peers[pl->peerCount++]);
p->address = pi->second->address().toInt();
p->hadAggregateLink = 0;
p->isBonded = 0;
if (pi->second->remoteVersionKnown()) {
p->versionMajor = pi->second->remoteVersionMajor();
p->versionMinor = pi->second->remoteVersionMinor();
@ -478,28 +498,24 @@ ZT_PeerList *Node::peers() const
std::vector< SharedPtr<Path> > paths(pi->second->paths(_now));
SharedPtr<Path> bestp(pi->second->getAppropriatePath(_now,false));
p->hadAggregateLink |= pi->second->hasAggregateLink();
p->pathCount = 0;
for(std::vector< SharedPtr<Path> >::iterator path(paths.begin());path!=paths.end();++path) {
memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage));
//memcpy(&(p->paths[p->pathCount].ifname,&((*path)->slave()),32);)
p->paths[p->pathCount].localSocket = (*path)->localSocket();
p->paths[p->pathCount].lastSend = (*path)->lastOut();
p->paths[p->pathCount].lastReceive = (*path)->lastIn();
p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address());
p->paths[p->pathCount].expired = 0;
p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0;
p->paths[p->pathCount].latency = (float)(*path)->latency();
p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance();
p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient();
p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio();
p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio();
p->paths[p->pathCount].stability = (*path)->lastComputedStability();
p->paths[p->pathCount].throughput = (*path)->meanThroughput();
p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput();
p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255;
p->paths[p->pathCount].ifname = (*path)->getName();
//p->paths[p->pathCount].age = (*path)->age(_now);
p->paths[p->pathCount].scope = (*path)->ipScope();
++p->pathCount;
}
if (pi->second->bond()) {
p->isBonded = pi->second->bond();
p->bondingPolicy = pi->second->bond()->getPolicy();
}
}
return pl;

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -34,6 +34,7 @@
#include "Salsa20.hpp"
#include "NetworkController.hpp"
#include "Hashtable.hpp"
#include "BondController.hpp"
// Bit mask for "expecting reply" hash
#define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255
@ -186,6 +187,8 @@ public:
inline const Identity &identity() const { return _RR.identity; }
inline BondController *bondController() const { return _RR.bc; }
/**
* Register that we are expecting a reply to a packet ID
*
@ -247,9 +250,6 @@ public:
inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; }
inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; }
inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; }
inline uint8_t getMultipathMode() { return _multipathMode; }
inline bool localControllerHasAuthorized(const int64_t now,const uint64_t nwid,const Address &addr) const
{
_localControllerAuthorizations_m.lock();
@ -306,10 +306,9 @@ private:
Address _remoteTraceTarget;
enum Trace::Level _remoteTraceLevel;
uint8_t _multipathMode;
volatile int64_t _now;
int64_t _lastPingCheck;
int64_t _lastGratuitousPingCheck;
int64_t _lastHousekeepingRun;
int64_t _lastMemoizedTraceSettings;
volatile int64_t _prngState[2];

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -931,13 +931,13 @@ public:
*
* Upon receipt of this packet, the local peer will verify that the correct
* number of bytes were received by the remote peer. If these values do
* not agree that could be an indicator of packet loss.
* not agree that could be an indication of packet loss.
*
* Additionally, the local peer knows the interval of time that has
* elapsed since the last received ACK. With this information it can compute
* a rough estimate of the current throughput.
*
* This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL
* This is sent at a maximum rate of once per every ZT_QOS_ACK_INTERVAL
*/
VERB_ACK = 0x12,
@ -963,7 +963,8 @@ public:
* measure of the amount of time between when a packet was received and the
* egress time of its tracking QoS packet.
*
* This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL
* This is sent at a maximum rate of once per every
* ZT_QOS_MEASUREMENT_INTERVAL
*/
VERB_QOS_MEASUREMENT = 0x13,
@ -996,7 +997,34 @@ public:
* node on startup. This is helpful in identifying traces from different
* members of a cluster.
*/
VERB_REMOTE_TRACE = 0x15
VERB_REMOTE_TRACE = 0x15,
/**
* A request to a peer to use a specific path in a multi-path scenario:
* <[2] 16-bit unsigned integer that encodes a path choice utility>
*
* This is sent when a node operating in multipath mode observes that
* its inbound and outbound traffic aren't going over the same path. The
* node will compute its perceived utility for using its chosen outbound
* path and send this to a peer in an attempt to petition it to send
* its traffic over this same path.
*
* Scenarios:
*
* (1) Remote peer utility is GREATER than ours:
* - Remote peer will refuse the petition and continue using current path
* (2) Remote peer utility is LESS than than ours:
* - Remote peer will accept the petition and switch to our chosen path
* (3) Remote peer utility is EQUAL to our own:
* - To prevent confusion and flapping, both side will agree to use the
* numerical values of their identities to determine which path to use.
* The peer with the greatest identity will win.
*
* If a node petitions a peer repeatedly with no effect it will regard
* that as a refusal by the remote peer, in this case if the utility is
* negligible it will voluntarily switch to the remote peer's chosen path.
*/
VERB_PATH_NEGOTIATION_REQUEST = 0x16
};
/**

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -26,12 +26,11 @@
#include "SharedPtr.hpp"
#include "AtomicCounter.hpp"
#include "Utils.hpp"
#include "RingBuffer.hpp"
#include "Packet.hpp"
#include "RingBuffer.hpp"
//#include "Bond.hpp"
#include "../osdep/Phy.hpp"
#include "../include/ZeroTierDebug.h"
#include "../osdep/Slave.hpp"
/**
* Maximum return value of preferenceRank()
@ -48,7 +47,8 @@ class RuntimeEnvironment;
class Path
{
friend class SharedPtr<Path>;
Phy<Path *> *_phy;
friend class Bond;
//friend class SharedPtr<Bond>;
public:
/**
@ -87,77 +87,113 @@ public:
_lastOut(0),
_lastIn(0),
_lastTrustEstablishedPacketReceived(0),
_lastPathQualityComputeTime(0),
_localSocket(-1),
_latency(0xffff),
_addr(),
_ipScope(InetAddress::IP_SCOPE_NONE),
_lastAck(0),
_lastThroughputEstimation(0),
_lastAckReceived(0),
_lastAckSent(0),
_lastQoSMeasurement(0),
_lastQoSRecordPurge(0),
_lastThroughputEstimation(0),
_lastRefractoryUpdate(0),
_lastAliveToggle(0),
_lastEligibilityState(false),
_lastTrialBegin(0),
_refractoryPeriod(0),
_monitorInterval(0),
_upDelay(0),
_downDelay(0),
_ipvPref(0),
_mode(0),
_onlyPathOnSlave(false),
_enabled(false),
_bonded(false),
_negotiated(false),
_deprecated(false),
_shouldReallocateFlows(false),
_assignedFlowCount(0),
_latencyMean(0),
_latencyVariance(0),
_packetLossRatio(0),
_packetErrorRatio(0),
_throughputMean(0),
_throughputMax(0),
_throughputVariance(0),
_allocation(0),
_byteLoad(0),
_relativeByteLoad(0),
_affinity(0),
_failoverScore(0),
_unackedBytes(0),
_expectingAckAsOf(0),
_packetsReceivedSinceLastAck(0),
_packetsReceivedSinceLastQoS(0),
_maxLifetimeThroughput(0),
_lastComputedMeanThroughput(0),
_bytesAckedSinceLastThroughputEstimation(0),
_lastComputedMeanLatency(0.0),
_lastComputedPacketDelayVariance(0.0),
_lastComputedPacketErrorRatio(0.0),
_lastComputedPacketLossRatio(0),
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0)
{
memset(_ifname, 0, 16);
memset(_addrString, 0, sizeof(_addrString));
}
_packetsIn(0),
_packetsOut(0),
_prevEligibility(false)
{}
Path(const int64_t localSocket,const InetAddress &addr) :
_lastOut(0),
_lastIn(0),
_lastTrustEstablishedPacketReceived(0),
_lastPathQualityComputeTime(0),
_localSocket(localSocket),
_latency(0xffff),
_addr(addr),
_ipScope(addr.ipScope()),
_lastAck(0),
_lastThroughputEstimation(0),
_lastAckReceived(0),
_lastAckSent(0),
_lastQoSMeasurement(0),
_lastQoSRecordPurge(0),
_lastThroughputEstimation(0),
_lastRefractoryUpdate(0),
_lastAliveToggle(0),
_lastEligibilityState(false),
_lastTrialBegin(0),
_refractoryPeriod(0),
_monitorInterval(0),
_upDelay(0),
_downDelay(0),
_ipvPref(0),
_mode(0),
_onlyPathOnSlave(false),
_enabled(false),
_bonded(false),
_negotiated(false),
_deprecated(false),
_shouldReallocateFlows(false),
_assignedFlowCount(0),
_latencyMean(0),
_latencyVariance(0),
_packetLossRatio(0),
_packetErrorRatio(0),
_throughputMean(0),
_throughputMax(0),
_throughputVariance(0),
_allocation(0),
_byteLoad(0),
_relativeByteLoad(0),
_affinity(0),
_failoverScore(0),
_unackedBytes(0),
_expectingAckAsOf(0),
_packetsReceivedSinceLastAck(0),
_packetsReceivedSinceLastQoS(0),
_maxLifetimeThroughput(0),
_lastComputedMeanThroughput(0),
_bytesAckedSinceLastThroughputEstimation(0),
_lastComputedMeanLatency(0.0),
_lastComputedPacketDelayVariance(0.0),
_lastComputedPacketErrorRatio(0.0),
_lastComputedPacketLossRatio(0),
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0)
{
memset(_ifname, 0, 16);
memset(_addrString, 0, sizeof(_addrString));
if (_localSocket != -1) {
_phy->getIfName((PhySocket *) ((uintptr_t) _localSocket), _ifname, 16);
}
}
_packetsIn(0),
_packetsOut(0),
_prevEligibility(false)
{}
/**
* Called when a packet is received from this remote path, regardless of content
*
* @param t Time of receive
*/
inline void received(const uint64_t t) { _lastIn = t; }
inline void received(const uint64_t t) {
_lastIn = t;
if (!_prevEligibility) {
_lastAliveToggle = _lastIn;
}
}
/**
* Set time last trusted packet was received (done in Peer::received())
@ -197,7 +233,6 @@ public:
else {
_latency = l;
}
_latencySamples.push(l);
}
/**
@ -286,341 +321,32 @@ public:
}
/**
* Record statistics on outgoing packets. Used later to estimate QoS metrics.
*
* @param now Current time
* @param packetId ID of packet
* @param payloadLength Length of payload
* @param verb Packet verb
* @param bonded Whether this path is part of a bond.
*/
inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb)
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_unackedBytes += payloadLength;
// Take note that we're expecting a VERB_ACK on this path as of a specific time
_expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now;
if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) {
_outQoSRecords[packetId] = now;
}
}
}
}
inline void setBonded(bool bonded) { _bonded = bonded; }
/**
* Record statistics on incoming packets. Used later to estimate QoS metrics.
*
* @param now Current time
* @param packetId ID of packet
* @param payloadLength Length of payload
* @param verb Packet verb
* @return True if this path is currently part of a bond.
*/
inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb)
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_inACKRecords[packetId] = payloadLength;
_packetsReceivedSinceLastAck++;
_inQoSRecords[packetId] = now;
_packetsReceivedSinceLastQoS++;
}
_packetValiditySamples.push(true);
}
}
/**
* Record that we've received a VERB_ACK on this path, also compute throughput if required.
*
* @param now Current time
* @param ackedBytes Number of bytes acknowledged by other peer
*/
inline void receivedAck(int64_t now, int32_t ackedBytes)
{
_expectingAckAsOf = 0;
_unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes;
int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation);
if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) {
uint64_t throughput = (uint64_t)((float)(_bytesAckedSinceLastThroughputEstimation * 8) / ((float)timeSinceThroughputEstimate / (float)1000));
_throughputSamples.push(throughput);
_maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput;
_lastThroughputEstimation = now;
_bytesAckedSinceLastThroughputEstimation = 0;
} else {
_bytesAckedSinceLastThroughputEstimation += ackedBytes;
}
}
/**
* @return Number of bytes this peer is responsible for ACKing since last ACK
*/
inline int32_t bytesToAck()
{
Mutex::Lock _l(_statistics_m);
int32_t bytesToAck = 0;
std::map<uint64_t,uint16_t>::iterator it = _inACKRecords.begin();
while (it != _inACKRecords.end()) {
bytesToAck += it->second;
it++;
}
return bytesToAck;
}
/**
* @return Number of bytes thus far sent that have not been acknowledged by the remote peer
*/
inline int64_t unackedSentBytes()
{
return _unackedBytes;
}
/**
* Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers
*
* @param Current time
*/
inline void sentAck(int64_t now)
{
Mutex::Lock _l(_statistics_m);
_inACKRecords.clear();
_packetsReceivedSinceLastAck = 0;
_lastAck = now;
}
/**
* Receive QoS data, match with recorded egress times from this peer, compute latency
* estimates.
*
* @param now Current time
* @param count Number of records
* @param rx_id table of packet IDs
* @param rx_ts table of holding times
*/
inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts)
{
Mutex::Lock _l(_statistics_m);
// Look up egress times and compute latency values for each record
std::map<uint64_t,uint64_t>::iterator it;
for (int j=0; j<count; j++) {
it = _outQoSRecords.find(rx_id[j]);
if (it != _outQoSRecords.end()) {
uint16_t rtt = (uint16_t)(now - it->second);
uint16_t rtt_compensated = rtt - rx_ts[j];
uint16_t latency = rtt_compensated / 2;
updateLatency(latency, now);
_outQoSRecords.erase(it);
}
}
}
/**
* Generate the contents of a VERB_QOS_MEASUREMENT packet.
*
* @param now Current time
* @param qosBuffer destination buffer
* @return Size of payload
*/
inline int32_t generateQoSPacket(int64_t now, char *qosBuffer)
{
Mutex::Lock _l(_statistics_m);
int32_t len = 0;
std::map<uint64_t,uint64_t>::iterator it = _inQoSRecords.begin();
int i=0;
while (i<_packetsReceivedSinceLastQoS && it != _inQoSRecords.end()) {
uint64_t id = it->first;
memcpy(qosBuffer, &id, sizeof(uint64_t));
qosBuffer+=sizeof(uint64_t);
uint16_t holdingTime = (uint16_t)(now - it->second);
memcpy(qosBuffer, &holdingTime, sizeof(uint16_t));
qosBuffer+=sizeof(uint16_t);
len+=sizeof(uint64_t)+sizeof(uint16_t);
_inQoSRecords.erase(it++);
i++;
}
return len;
}
/**
* Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers.
*
* @param Current time
*/
inline void sentQoS(int64_t now) {
_packetsReceivedSinceLastQoS = 0;
_lastQoSMeasurement = now;
}
/**
* @param now Current time
* @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time
*/
inline bool needsToSendAck(int64_t now) {
return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL ||
(_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck;
}
/**
* @param now Current time
* @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time
*/
inline bool needsToSendQoS(int64_t now) {
return ((_packetsReceivedSinceLastQoS >= ZT_PATH_QOS_TABLE_SIZE) ||
((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastQoS;
}
/**
* How much time has elapsed since we've been expecting a VERB_ACK on this path. This value
* is used to determine a more relevant path "age". This lets us penalize paths which are no
* longer ACKing, but not those that simple aren't being used to carry traffic at the
* current time.
*/
inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; }
/**
* The maximum observed throughput (in bits/s) for this path
*/
inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; }
/**
* @return The mean throughput (in bits/s) of this link
*/
inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; }
/**
* Assign a new relative quality value for this path in the aggregate link
*
* @param rq Quality of this path in comparison to other paths available to this peer
*/
inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; }
/**
* @return Quality of this path compared to others in the aggregate link
*/
inline float relativeQuality() { return _lastComputedRelativeQuality; }
/**
* Assign a new allocation value for this path in the aggregate link
*
* @param allocation Percentage of traffic to be sent over this path to a peer
*/
inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; }
/**
* @return Percentage of traffic allocated to this path in the aggregate link
*/
inline unsigned char allocation() { return _lastAllocation; }
/**
* @return Stability estimates can become expensive to compute, we cache the most recent result.
*/
inline float lastComputedStability() { return _lastComputedStability; }
/**
* @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to
*/
inline char *getName() { return _ifname; }
/**
* @return Packet delay variance
*/
inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; }
/**
* @return Previously-computed mean latency
*/
inline float meanLatency() { return _lastComputedMeanLatency; }
/**
* @return Packet loss rate (PLR)
*/
inline float packetLossRatio() { return _lastComputedPacketLossRatio; }
/**
* @return Packet error ratio (PER)
*/
inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; }
/**
* Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now
* contribute to a Packet Error Ratio (PER).
*/
inline void recordInvalidPacket() { _packetValiditySamples.push(false); }
/**
* @return A pointer to a cached copy of the address string for this Path (For debugging only)
*/
inline char *getAddressString() { return _addrString; }
/**
* @return The current throughput disturbance coefficient
*/
inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; }
/**
* Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved"
* this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality".
*
* @param now Current time
*/
inline void processBackgroundPathMeasurements(const int64_t now)
{
if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
Mutex::Lock _l(_statistics_m);
_lastPathQualityComputeTime = now;
address().toString(_addrString);
_lastComputedMeanLatency = _latencySamples.mean();
_lastComputedPacketDelayVariance = _latencySamples.stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689)
_lastComputedMeanThroughput = (uint64_t)_throughputSamples.mean();
// If no packet validity samples, assume PER==0
_lastComputedPacketErrorRatio = 1 - (_packetValiditySamples.count() ? _packetValiditySamples.mean() : 1);
// Compute path stability
// Normalize measurements with wildly different ranges into a reasonable range
float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10);
float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10);
float throughput_cv = _throughputSamples.mean() > 0 ? _throughputSamples.stddev() / _throughputSamples.mean() : 1;
// Form an exponential cutoff and apply contribution weights
float pdv_contrib = expf((-1.0f)*normalized_pdv) * (float)ZT_PATH_CONTRIB_PDV;
float latency_contrib = expf((-1.0f)*normalized_la) * (float)ZT_PATH_CONTRIB_LATENCY;
// Throughput Disturbance Coefficient
float throughput_disturbance_contrib = expf((-1.0f)*throughput_cv) * (float)ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE;
_throughputDisturbanceSamples.push(throughput_cv);
_lastComputedThroughputDistCoeff = _throughputDisturbanceSamples.mean();
// Obey user-defined ignored contributions
pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1;
latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1;
throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1;
// Stability
_lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib;
_lastComputedStability *= 1 - _lastComputedPacketErrorRatio;
// Prevent QoS records from sticking around for too long
std::map<uint64_t,uint64_t>::iterator it = _outQoSRecords.begin();
while (it != _outQoSRecords.end()) {
// Time since egress of tracked packet
if ((now - it->second) >= ZT_PATH_QOS_TIMEOUT) {
_outQoSRecords.erase(it++);
} else { it++; }
}
}
}
inline bool bonded() { return _bonded; }
/**
* @return True if this path is alive (receiving heartbeats)
*/
inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); }
inline bool alive(const int64_t now, bool bondingEnabled = false) const {
return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000));
}
/**
* @return True if this path needs a heartbeat
*/
inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); }
/**
* @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency
*/
inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); }
/**
* @return Last time we sent something
*/
@ -631,62 +357,339 @@ public:
*/
inline int64_t lastIn() const { return _lastIn; }
/**
* @return the age of the path in terms of receiving packets
*/
inline int64_t age(int64_t now) { return (now - _lastIn); }
/**
* @return Time last trust-established packet was received
*/
inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; }
/**
* @return Time since last VERB_ACK was received
*/
inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; }
/**
* Set or update a refractory period for the path.
*
* @param punishment How much a path should be punished
* @param pathFailure Whether this call is the result of a recent path failure
*/
inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) {
if (pathFailure) {
unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment;
_refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD);
_lastRefractoryUpdate = 0;
} else {
uint32_t drainRefractory = 0;
if (_lastRefractoryUpdate) {
drainRefractory = (now - _lastRefractoryUpdate);
} else {
drainRefractory = (now - _lastAliveToggle);
}
_lastRefractoryUpdate = now;
if (_refractoryPeriod > drainRefractory) {
_refractoryPeriod -= drainRefractory;
} else {
_refractoryPeriod = 0;
_lastRefractoryUpdate = 0;
}
}
}
/**
* Determine the current state of eligibility of the path.
*
* @param includeRefractoryPeriod Whether current punishment should be taken into consideration
* @return True if this path can be used in a bond at the current time
*/
inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) {
if (includeRefractoryPeriod && _refractoryPeriod) {
return false;
}
bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs)
bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us
bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online
bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period
bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial);
return currEligibility;
}
/**
* Record when this path first entered the bond. Each path is given a trial period where it is admitted
* to the bond without requiring observations to prove its performance or reliability.
*/
inline void startTrial(uint64_t now) { _lastTrialBegin = now; }
/**
* @return True if a path is permitted to be used in a bond (according to user pref.)
*/
inline bool allowed() {
return _enabled
&& (!_ipvPref
|| ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64))
|| ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64)))));
}
/**
* @return True if a path is preferred over another on the same physical slave (according to user pref.)
*/
inline bool preferred() {
return _onlyPathOnSlave
|| (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46))
|| (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64));
}
/**
* @param now Current time
* @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time
*/
inline bool needsToSendAck(int64_t now, int ackSendInterval) {
return ((now - _lastAckSent) >= ackSendInterval ||
(_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck;
}
/**
* @param now Current time
* @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time
*/
inline bool needsToSendQoS(int64_t now, int qosSendInterval) {
return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) ||
((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS;
}
/**
* Reset packet counters
*/
inline void resetPacketCounts()
{
_packetsIn = 0;
_packetsOut = 0;
}
private:
Mutex _statistics_m;
volatile int64_t _lastOut;
volatile int64_t _lastIn;
volatile int64_t _lastTrustEstablishedPacketReceived;
volatile int64_t _lastPathQualityComputeTime;
int64_t _localSocket;
volatile unsigned int _latency;
InetAddress _addr;
InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often
AtomicCounter __refCount;
std::map<uint64_t,uint64_t> _outQoSRecords; // id:egress_time
std::map<uint64_t,uint64_t> _inQoSRecords; // id:now
std::map<uint64_t,uint16_t> _inACKRecords; // id:len
std::map<uint64_t,uint64_t> qosStatsOut; // id:egress_time
std::map<uint64_t,uint64_t> qosStatsIn; // id:now
std::map<uint64_t,uint16_t> ackStatsIn; // id:len
int64_t _lastAck;
int64_t _lastThroughputEstimation;
int64_t _lastQoSMeasurement;
int64_t _lastQoSRecordPurge;
RingBuffer<int,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> qosRecordSize;
RingBuffer<float,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> qosRecordLossSamples;
RingBuffer<uint64_t,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> throughputSamples;
RingBuffer<bool,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> packetValiditySamples;
RingBuffer<float,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> _throughputVarianceSamples;
RingBuffer<uint16_t,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> latencySamples;
/**
* Last time that a VERB_ACK was received on this path.
*/
uint64_t _lastAckReceived;
/**
* Last time that a VERB_ACK was sent out on this path.
*/
uint64_t _lastAckSent;
/**
* Last time that a VERB_QOS_MEASUREMENT was sent out on this path.
*/
uint64_t _lastQoSMeasurement;
/**
* Last time that a the path's throughput was estimated.
*/
uint64_t _lastThroughputEstimation;
/**
* The last time that the refractory period was updated.
*/
uint64_t _lastRefractoryUpdate;
/**
* The last time that the path was marked as "alive".
*/
uint64_t _lastAliveToggle;
/**
* State of eligibility at last check. Used for determining state changes.
*/
bool _lastEligibilityState;
/**
* Timestamp indicating when this path's trial period began.
*/
uint64_t _lastTrialBegin;
/**
* Amount of time that this path is prevented from becoming a member of a bond.
*/
uint32_t _refractoryPeriod;
/**
* Monitor interval specific to this path or that was inherited from the bond controller.
*/
int32_t _monitorInterval;
/**
* Up delay interval specific to this path or that was inherited from the bond controller.
*/
uint32_t _upDelay;
/**
* Down delay interval specific to this path or that was inherited from the bond controller.
*/
uint32_t _downDelay;
/**
* IP version preference inherited from the physical slave.
*/
uint8_t _ipvPref;
/**
* Mode inherited from the physical slave.
*/
uint8_t _mode;
/**
* IP version preference inherited from the physical slave.
*/
bool _onlyPathOnSlave;
/**
* Enabled state inherited from the physical slave.
*/
bool _enabled;
/**
* Whether this path is currently part of a bond.
*/
bool _bonded;
/**
* Whether this path was intentionally _negotiated by either peer.
*/
bool _negotiated;
/**
* Whether this path has been deprecated due to performance issues. Current traffic flows
* will be re-allocated to other paths in the most non-disruptive manner (if possible),
* and new traffic will not be allocated to this path.
*/
bool _deprecated;
/**
* Whether flows should be moved from this path. Current traffic flows will be re-allocated
* immediately.
*/
bool _shouldReallocateFlows;
/**
* The number of flows currently assigned to this path.
*/
uint16_t _assignedFlowCount;
/**
* The mean latency (computed from a sliding window.)
*/
float _latencyMean;
/**
* Packet delay variance (computed from a sliding window.)
*/
float _latencyVariance;
/**
* The ratio of lost packets to received packets.
*/
float _packetLossRatio;
/**
* The ratio of packets that failed their MAC/CRC checks to those that did not.
*/
float _packetErrorRatio;
/**
* The estimated mean throughput of this path.
*/
uint64_t _throughputMean;
/**
* The maximum observed throughput of this path.
*/
uint64_t _throughputMax;
/**
* The variance in the estimated throughput of this path.
*/
float _throughputVariance;
/**
* The relative quality of this path to all others in the bond, [0-255].
*/
uint8_t _allocation;
/**
* How much load this path is under.
*/
uint64_t _byteLoad;
/**
* How much load this path is under (relative to other paths in the bond.)
*/
uint8_t _relativeByteLoad;
/**
* Relative value expressing how "deserving" this path is of new traffic.
*/
uint8_t _affinity;
/**
* Score that indicates to what degree this path is preferred over others that
* are available to the bonding policy. (specifically for active-backup)
*/
uint32_t _failoverScore;
/**
* Number of bytes thus far sent that have not been acknowledged by the remote peer.
*/
int64_t _unackedBytes;
int64_t _expectingAckAsOf;
int16_t _packetsReceivedSinceLastAck;
int16_t _packetsReceivedSinceLastQoS;
uint64_t _maxLifetimeThroughput;
uint64_t _lastComputedMeanThroughput;
/**
* Number of packets received since the last VERB_ACK was sent to the remote peer.
*/
int32_t _packetsReceivedSinceLastAck;
/**
* Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer.
*/
int32_t _packetsReceivedSinceLastQoS;
/**
* Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput.
*/
uint64_t _bytesAckedSinceLastThroughputEstimation;
float _lastComputedMeanLatency;
float _lastComputedPacketDelayVariance;
/**
* Counters used for tracking path load.
*/
int _packetsIn;
int _packetsOut;
float _lastComputedPacketErrorRatio;
float _lastComputedPacketLossRatio;
// TODO: Remove
// cached estimates
float _lastComputedStability;
float _lastComputedRelativeQuality;
float _lastComputedThroughputDistCoeff;
unsigned char _lastAllocation;
// cached human-readable strings for tracing purposes
char _ifname[16];
char _addrString[256];
RingBuffer<uint64_t,ZT_PATH_QUALITY_METRIC_WIN_SZ> _throughputSamples;
RingBuffer<uint32_t,ZT_PATH_QUALITY_METRIC_WIN_SZ> _latencySamples;
RingBuffer<bool,ZT_PATH_QUALITY_METRIC_WIN_SZ> _packetValiditySamples;
RingBuffer<float,ZT_PATH_QUALITY_METRIC_WIN_SZ> _throughputDisturbanceSamples;
bool _prevEligibility;
};
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -14,7 +14,6 @@
#include "../version.h"
#include "Constants.hpp"
#include "Peer.hpp"
#include "Node.hpp"
#include "Switch.hpp"
#include "Network.hpp"
#include "SelfAwareness.hpp"
@ -24,8 +23,6 @@
#include "RingBuffer.hpp"
#include "Utils.hpp"
#include "../include/ZeroTierDebug.h"
namespace ZeroTier {
static unsigned char s_freeRandomByteCounter = 0;
@ -37,20 +34,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_lastTriedMemorizedPath(0),
_lastDirectPathPushSent(0),
_lastDirectPathPushReceive(0),
_lastEchoRequestReceived(0),
_lastCredentialRequestSent(0),
_lastWhoisRequestReceived(0),
_lastEchoRequestReceived(0),
_lastCredentialsReceived(0),
_lastTrustEstablishedPacketReceived(0),
_lastSentFullHello(0),
_lastACKWindowReset(0),
_lastQoSWindowReset(0),
_lastMultipathCompatibilityCheck(0),
_lastEchoCheck(0),
_freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_vProto(0),
_vMajor(0),
_vMinor(0),
@ -58,17 +49,17 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_id(peerIdentity),
_directPathPushCutoffCount(0),
_credentialsCutoffCount(0),
_linkIsBalanced(false),
_linkIsRedundant(false),
_remotePeerMultipathEnabled(false),
_lastAggregateStatsReport(0),
_lastAggregateAllocation(0),
_virtualPathCount(0),
_roundRobinPathAssignmentIdx(0),
_pathAssignmentIdx(0)
_echoRequestCutoffCount(0),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_shouldCollectPathStatistics(0),
_lastComputedAggregateMeanLatency(0)
{
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH))
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) {
throw ZT_EXCEPTION_INVALID_ARGUMENT;
}
}
void Peer::received(
@ -81,7 +72,8 @@ void Peer::received(
const uint64_t inRePacketId,
const Packet::Verb inReVerb,
const bool trustEstablished,
const uint64_t networkId)
const uint64_t networkId,
const int32_t flowId)
{
const int64_t now = RR->node->now();
@ -98,28 +90,13 @@ void Peer::received(
break;
}
recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, flowId, now);
if (trustEstablished) {
_lastTrustEstablishedPacketReceived = now;
path->trustedPacketReceived(now);
}
{
Mutex::Lock _l(_paths_m);
recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now);
if (_canUseMultipath) {
if (path->needsToSendQoS(now)) {
sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now);
}
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->processBackgroundPathMeasurements(now);
}
}
}
}
if (hops == 0) {
// If this is a direct packet (no hops), update existing paths or learn new ones
bool havePath = false;
@ -137,60 +114,45 @@ void Peer::received(
}
bool attemptToContact = false;
int replaceIdx = ZT_MAX_PEER_NETWORK_PATHS;
if ((!havePath)&&(RR->node->shouldUsePathForZeroTierTraffic(tPtr,_id.address(),path->localSocket(),path->address()))) {
Mutex::Lock _l(_paths_m);
// Paths are redundant if they duplicate an alive path to the same IP or
// with the same local socket and address family.
bool redundant = false;
unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) {
redundant = true;
break;
}
// If the path is the same address and port, simply assume this is a replacement
if ( (_paths[i].p->address().ipsEqual2(path->address()))) {
replacePath = i;
break;
}
} else break;
}
// If the path isn't a duplicate of the same localSocket AND we haven't already determined a replacePath,
// then find the worst path and replace it.
if (!redundant && replacePath == ZT_MAX_PEER_NETWORK_PATHS) {
int replacePathQuality = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
const int q = _paths[i].p->quality(now);
if (q > replacePathQuality) {
replacePathQuality = q;
replacePath = i;
// match addr
if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) && (_paths[i].p->address().ipsEqual2(path->address())) ) ) {
// port
if (_paths[i].p->address().port() == path->address().port()) {
replaceIdx = i;
break;
}
} else {
replacePath = i;
}
}
}
if (replaceIdx == ZT_MAX_PEER_NETWORK_PATHS) {
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (!_paths[i].p) {
replaceIdx = i;
break;
}
}
}
if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) {
if (replaceIdx != ZT_MAX_PEER_NETWORK_PATHS) {
if (verb == Packet::VERB_OK) {
RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId);
_paths[replacePath].lr = now;
_paths[replacePath].p = path;
_paths[replacePath].priority = 1;
performMultipathStateCheck(now);
if (_bondToPeer) {
_bondToPeer->nominatePath(path, now);
}
_paths[replaceIdx].lr = now;
_paths[replaceIdx].p = path;
_paths[replaceIdx].priority = 1;
} else {
attemptToContact = true;
}
// Every time we learn of new path, rebuild set of virtual paths
constructSetOfVirtualPaths();
}
}
if (attemptToContact) {
attemptToContactAt(tPtr,path->localSocket(),path->address(),now,true);
path->sent(now);
@ -203,8 +165,7 @@ void Peer::received(
// is done less frequently.
if (this->trustEstablished(now)) {
const int64_t sinceLastPush = now - _lastDirectPathPushSent;
if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL)
|| (_localMultipathSupported && (sinceLastPush >= (ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH)))) {
if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL)) {
_lastDirectPathPushSent = now;
std::vector<InetAddress> pathsToPush(RR->node->directPaths());
if (pathsToPush.size() > 0) {
@ -249,189 +210,15 @@ void Peer::received(
}
}
void Peer::constructSetOfVirtualPaths()
SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId)
{
if (!_remoteMultipathSupported) {
return;
}
Mutex::Lock _l(_virtual_paths_m);
int64_t now = RR->node->now();
_virtualPathCount = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
for(unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[j].p && _paths[j].p->alive(now)) {
int64_t localSocket = _paths[j].p->localSocket();
bool foundVirtualPath = false;
for (int k=0; k<_virtualPaths.size(); k++) {
if (_virtualPaths[k]->localSocket == localSocket && _virtualPaths[k]->p == _paths[i].p) {
foundVirtualPath = true;
}
}
if (!foundVirtualPath)
{
VirtualPath *np = new VirtualPath;
np->p = _paths[i].p;
np->localSocket = localSocket;
_virtualPaths.push_back(np);
}
}
}
}
}
}
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
{
_freeRandomByte += (unsigned char)(packetId >> 8); // grab entropy to use in path selection logic for multipath
if (_canUseMultipath) {
path->recordOutgoingPacket(now, packetId, payloadLength, verb);
}
}
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
{
if (_canUseMultipath) {
if (path->needsToSendAck(now)) {
sendACK(tPtr, path, path->localSocket(), path->address(), now);
}
path->recordIncomingPacket(now, packetId, payloadLength, verb);
}
}
void Peer::computeAggregateAllocation(int64_t now)
{
float maxStability = 0;
float totalRelativeQuality = 0;
float maxThroughput = 1;
float maxScope = 0;
float relStability[ZT_MAX_PEER_NETWORK_PATHS];
float relThroughput[ZT_MAX_PEER_NETWORK_PATHS];
memset(&relStability, 0, sizeof(relStability));
memset(&relThroughput, 0, sizeof(relThroughput));
// Survey all paths
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
relStability[i] = _paths[i].p->lastComputedStability();
relThroughput[i] = (float)_paths[i].p->maxLifetimeThroughput();
maxStability = relStability[i] > maxStability ? relStability[i] : maxStability;
maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput;
maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope;
}
}
// Convert to relative values
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
relStability[i] /= maxStability ? maxStability : 1;
relThroughput[i] /= maxThroughput ? maxThroughput : 1;
float normalized_ma = Utils::normalize((float)_paths[i].p->ackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10);
float age_contrib = exp((-1)*normalized_ma);
float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1));
float relQuality =
(relStability[i] * (float)ZT_PATH_CONTRIB_STABILITY)
+ (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT)
+ relScope * (float)ZT_PATH_CONTRIB_SCOPE;
relQuality *= age_contrib;
// Clamp values
relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f;
relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f;
totalRelativeQuality += relQuality;
_paths[i].p->updateRelativeQuality(relQuality);
}
}
// Convert set of relative performances into an allocation set
for(uint16_t i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) {
_paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255);
}
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) {
_paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255));
}
}
}
}
int Peer::computeAggregateLinkPacketDelayVariance()
{
float pdv = 0.0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
pdv += _paths[i].p->relativeQuality() * _paths[i].p->packetDelayVariance();
}
}
return (int)pdv;
}
int Peer::computeAggregateLinkMeanLatency()
{
int ml = 0;
int pathCount = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
pathCount++;
ml += (int)(_paths[i].p->relativeQuality() * _paths[i].p->meanLatency());
}
}
return ml / pathCount;
}
int Peer::aggregateLinkPhysicalPathCount()
{
std::map<std::string, bool> ifnamemap;
int pathCount = 0;
int64_t now = RR->node->now();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
if (!ifnamemap[_paths[i].p->getName()]) {
ifnamemap[_paths[i].p->getName()] = true;
pathCount++;
}
}
}
return pathCount;
}
int Peer::aggregateLinkLogicalPathCount()
{
int pathCount = 0;
int64_t now = RR->node->now();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
pathCount++;
}
}
return pathCount;
}
std::vector<SharedPtr<Path> > Peer::getAllPaths(int64_t now)
{
Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX
std::vector<SharedPtr<Path> > paths;
for (int i=0; i<_virtualPaths.size(); i++) {
if (_virtualPaths[i]->p) {
paths.push_back(_virtualPaths[i]->p);
}
}
return paths;
}
SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId)
{
Mutex::Lock _l(_paths_m);
SharedPtr<Path> selectedPath;
char curPathStr[128];
char newPathStr[128];
unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
/**
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
if (!_canUseMultipath) {
if (!_bondToPeer) {
Mutex::Lock _l(_paths_m);
unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
/**
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
long bestPathQuality = 2147483647;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@ -449,293 +236,7 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64
}
return SharedPtr<Path>();
}
// Update path measurements
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->processBackgroundPathMeasurements(now);
}
}
if (RR->sw->isFlowAware()) {
// Detect new flows and update existing records
if (_flows.count(flowId)) {
_flows[flowId]->lastSend = now;
}
else {
fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n",
flowId, this->_id.address().toInt(), (_flows.size()+1));
struct Flow *newFlow = new Flow(flowId, now);
_flows[flowId] = newFlow;
newFlow->assignedPath = nullptr;
}
}
// Construct set of virtual paths if needed
if (!_virtualPaths.size()) {
constructSetOfVirtualPaths();
}
if (!_virtualPaths.size()) {
fprintf(stderr, "no paths to send packet out on\n");
return SharedPtr<Path>();
}
/**
* All traffic is sent on all paths.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) {
// Not handled here. Handled in Switch::_trySend()
}
/**
* Only one link is active. Fail-over is immediate.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) {
bool bFoundHotPath = false;
if (!_activeBackupPath) {
/* Select the fist path that appears to still be active.
* This will eventually be user-configurable */
for (int i=0; i<ZT_MAX_PEER_NETWORK_PATHS; i++) {
if (_paths[i].p) {
if (_activeBackupPath.ptr() == _paths[i].p.ptr()) {
continue;
}
_activeBackupPath = _paths[i].p;
if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
bFoundHotPath = true;
_activeBackupPath = _paths[i].p;
_pathAssignmentIdx = i;
_activeBackupPath->address().toString(curPathStr);
fprintf(stderr, "selected %s as the primary active-backup path to %llx (idx=%d)\n",
curPathStr, this->_id.address().toInt(), _pathAssignmentIdx);
break;
}
}
}
}
else {
char what[128];
if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
_activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace
int16_t previousIdx = _pathAssignmentIdx;
SharedPtr<Path> nextAlternativePath;
// Search for a hot path, at the same time find the next path in
// a RR sequence that seems viable to use as an alternative
int searchCount = 0;
while (searchCount < ZT_MAX_PEER_NETWORK_PATHS) {
_pathAssignmentIdx++;
if (_pathAssignmentIdx == ZT_MAX_PEER_NETWORK_PATHS) {
_pathAssignmentIdx = 0;
}
searchCount++;
if (_paths[_pathAssignmentIdx].p) {
_paths[_pathAssignmentIdx].p->address().toString(what);
if (_activeBackupPath.ptr() == _paths[_pathAssignmentIdx].p.ptr()) {
continue;
}
if (!nextAlternativePath) { // Record the first viable alternative in the RR sequence
nextAlternativePath = _paths[_pathAssignmentIdx].p;
}
if ((now - _paths[_pathAssignmentIdx].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
bFoundHotPath = true;
_activeBackupPath = _paths[_pathAssignmentIdx].p;
_activeBackupPath->address().toString(newPathStr);
fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to %s\n",
curPathStr, this->_id.address().toInt(), newPathStr);
break;
}
}
}
if (!bFoundHotPath) {
if (nextAlternativePath) {
_activeBackupPath = nextAlternativePath;
_activeBackupPath->address().toString(curPathStr);
//fprintf(stderr, "no hot paths found to use as active-backup primary to %llx, using next best: %s\n",
// this->_id.address().toInt(), curPathStr);
}
else {
// No change
}
}
}
}
if (!_activeBackupPath) {
return SharedPtr<Path>();
}
return _activeBackupPath;
}
/**
* Traffic is randomly distributed among all active paths.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) {
int sz = _virtualPaths.size();
if (sz) {
int idx = _freeRandomByte % sz;
_pathChoiceHist.push(idx);
_virtualPaths[idx]->p->address().toString(curPathStr);
fprintf(stderr, "sending out: (%llx), idx=%d: path=%s, localSocket=%lld\n",
this->_id.address().toInt(), idx, curPathStr, _virtualPaths[idx]->localSocket);
return _virtualPaths[idx]->p;
}
// This call is algorithmically inert but gives us a value to show in the status output
computeAggregateAllocation(now);
}
/**
* Packets are striped across all available paths.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_OPAQUE) {
int16_t previousIdx = _roundRobinPathAssignmentIdx;
int cycleCount = 0;
int minLastIn = 0;
int bestAlternativeIdx = -1;
while (cycleCount < ZT_MAX_PEER_NETWORK_PATHS) {
if (_roundRobinPathAssignmentIdx < (_virtualPaths.size()-1)) {
_roundRobinPathAssignmentIdx++;
}
else {
_roundRobinPathAssignmentIdx = 0;
}
cycleCount++;
if (_virtualPaths[_roundRobinPathAssignmentIdx]->p) {
uint64_t lastIn = _virtualPaths[_roundRobinPathAssignmentIdx]->p->lastIn();
if (bestAlternativeIdx == -1) {
minLastIn = lastIn; // Initialization
bestAlternativeIdx = 0;
}
if (lastIn < minLastIn) {
minLastIn = lastIn;
bestAlternativeIdx = _roundRobinPathAssignmentIdx;
}
if ((now - lastIn) < 5000) {
selectedPath = _virtualPaths[_roundRobinPathAssignmentIdx]->p;
}
}
}
// If we can't find an appropriate path, try the most recently active one
if (!selectedPath) {
_roundRobinPathAssignmentIdx = bestAlternativeIdx;
selectedPath = _virtualPaths[bestAlternativeIdx]->p;
selectedPath->address().toString(curPathStr);
fprintf(stderr, "could not find good path, settling for next best %s\n",curPathStr);
}
selectedPath->address().toString(curPathStr);
fprintf(stderr, "sending packet out on path %s at index %d\n",
curPathStr, _roundRobinPathAssignmentIdx);
return selectedPath;
}
/**
* Flows are striped across all available paths.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_FLOW) {
// fprintf(stderr, "ZT_MULTIPATH_BALANCE_RR_FLOW\n");
}
/**
* Flows are hashed across all available paths.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) {
// fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId);
struct Flow *currFlow = NULL;
if (_flows.count(flowId)) {
currFlow = _flows[flowId];
if (!currFlow->assignedPath) {
int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1)));
currFlow->assignedPath = _virtualPaths[idx];
_virtualPaths[idx]->p->address().toString(curPathStr);
fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n",
currFlow->flowId, this->_id.address().toInt(), curPathStr, idx);
}
else {
if (!currFlow->assignedPath->p->alive(now)) {
currFlow->assignedPath->p->address().toString(curPathStr);
// Re-assign
int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1)));
currFlow->assignedPath = _virtualPaths[idx];
_virtualPaths[idx]->p->address().toString(newPathStr);
fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n",
curPathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr);
}
}
return currFlow->assignedPath->p;
}
}
/**
* Proportionally allocate traffic according to dynamic path quality measurements.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) {
if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
_lastAggregateAllocation = now;
computeAggregateAllocation(now);
}
// Randomly choose path according to their allocations
float rf = _freeRandomByte;
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (rf < _paths[i].p->allocation()) {
bestPath = i;
_pathChoiceHist.push(bestPath); // Record which path we chose
break;
}
rf -= _paths[i].p->allocation();
}
}
if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) {
return _paths[bestPath].p;
}
}
/**
* Flows are dynamically allocated across paths in proportion to link strength and load.
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW) {
}
return SharedPtr<Path>();
}
char *Peer::interfaceListStr()
{
std::map<std::string, int> ifnamemap;
char tmp[32];
const int64_t now = RR->node->now();
char *ptr = _interfaceListStr;
bool imbalanced = false;
memset(_interfaceListStr, 0, sizeof(_interfaceListStr));
int alivePathCount = aggregateLinkLogicalPathCount();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
int ipv = _paths[i].p->address().isV4();
// If this is acting as an aggregate link, check allocations
float targetAllocation = 1.0f / (float)alivePathCount;
float currentAllocation = 1.0f;
if (alivePathCount > 1) {
currentAllocation = (float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count();
if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) {
imbalanced = true;
}
}
char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6";
sprintf(tmp, "(%s, %s, %.3f)", _paths[i].p->getName(), ipvStr, currentAllocation);
// Prevent duplicates
if(ifnamemap[_paths[i].p->getName()] != ipv) {
memcpy(ptr, tmp, strlen(tmp));
ptr += strlen(tmp);
*ptr = ' ';
ptr++;
ifnamemap[_paths[i].p->getName()] = ipv;
}
}
}
ptr--; // Overwrite trailing space
if (imbalanced) {
sprintf(tmp, ", is asymmetrical");
memcpy(ptr, tmp, sizeof(tmp));
} else {
*ptr = '\0';
}
return _interfaceListStr;
return _bondToPeer->getAppropriatePath(now, flowId);
}
void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &other) const
@ -859,87 +360,6 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &o
}
}
inline void Peer::processBackgroundPeerTasks(const int64_t now)
{
// Determine current multipath compatibility with other peer
if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
//
// Cache number of available paths so that we can short-circuit multipath logic elsewhere
//
// We also take notice of duplicate paths (same IP only) because we may have
// recently received a direct path push from a peer and our list might contain
// a dead path which hasn't been fully recognized as such. In this case we
// don't want the duplicate to trigger execution of multipath code prematurely.
//
// This is done to support the behavior of auto multipath enable/disable
// without user intervention.
//
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_lastMultipathCompatibilityCheck = now;
_localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9));
_remoteMultipathSupported = _vProto > 9;
// If both peers support multipath and more than one path exist, we can use multipath logic
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
// Remove old flows
if (RR->sw->isFlowAware()) {
std::map<int64_t, struct Flow *>::iterator it = _flows.begin();
while (it != _flows.end()) {
if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) {
fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n",
it->first, this->_id.address().toInt(), _flows.size());
it = _flows.erase(it);
} else {
it++;
}
}
}
}
void Peer::sendACK(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK);
uint32_t bytesToAck = path->bytesToAck();
outp.append<uint32_t>(bytesToAck);
if (atAddress) {
outp.armor(_key,false);
RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size());
} else {
RR->sw->send(tPtr,outp,false);
}
path->sentAck(now);
}
void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
const int64_t _now = RR->node->now();
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT);
char qosData[ZT_PATH_MAX_QOS_PACKET_SZ];
int16_t len = path->generateQoSPacket(_now,qosData);
outp.append(qosData,len);
if (atAddress) {
outp.armor(_key,false);
RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size());
} else {
RR->sw->send(tPtr,outp,false);
}
path->sentQoS(now);
}
void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO);
@ -1005,29 +425,57 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now)
}
}
void Peer::performMultipathStateCheck(int64_t now)
{
/**
* Check for conditions required for multipath bonding and create a bond
* if allowed.
*/
_localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9));
if (_localMultipathSupported) {
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_remoteMultipathSupported = _vProto > 9;
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
if (_canUseMultipath && !_bondToPeer) {
if (RR->bc) {
_bondToPeer = RR->bc->createTransportTriggeredBond(RR, this);
/**
* Allow new bond to retroactively learn all paths known to this peer
*/
if (_bondToPeer) {
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_bondToPeer->nominatePath(_paths[i].p, now);
}
}
}
}
}
}
unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
{
unsigned int sent = 0;
Mutex::Lock _l(_paths_m);
processBackgroundPeerTasks(now);
performMultipathStateCheck(now);
// Emit traces regarding aggregate link status
if (_canUseMultipath) {
int alivePathCount = aggregateLinkPhysicalPathCount();
if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) {
_lastAggregateStatsReport = now;
if (alivePathCount) {
RR->t->peerLinkAggregateStatistics(NULL,*this);
}
} if (alivePathCount < 2 && _linkIsRedundant) {
_linkIsRedundant = !_linkIsRedundant;
RR->t->peerLinkNoLongerAggregate(NULL,*this);
} if (alivePathCount > 1 && !_linkIsRedundant) {
_linkIsRedundant = !_linkIsRedundant;
RR->t->peerLinkNoLongerAggregate(NULL,*this);
}
}
const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD);
_lastSentFullHello = now;
// Right now we only keep pinging links that have the maximum priority. The
// priority is used to track cluster redirections, meaning that when a cluster
@ -1040,15 +488,13 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
else break;
}
const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD);
_lastSentFullHello = now;
unsigned int j = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
// Clean expired and reduced priority paths
if ( ((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION) && (_paths[i].priority == maxPriority) ) {
if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))) {
if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))
|| (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) {
attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello);
_paths[i].p->sent(now);
sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2;
@ -1059,14 +505,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
}
} else break;
}
if (canUseMultipath()) {
while(j < ZT_MAX_PEER_NETWORK_PATHS) {
_paths[j].lr = 0;
_paths[j].p.zero();
_paths[j].priority = 1;
++j;
}
}
return sent;
}
@ -1133,4 +571,30 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres
}
}
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now);
}
void Peer::recordIncomingInvalidPacket(const SharedPtr<Path>& path)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordIncomingInvalidPacket(path);
}
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now);
}
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -15,8 +15,6 @@
#define ZT_PEER_HPP
#include <vector>
#include <map>
#include <queue>
#include "../include/ZeroTierOne.h"
@ -33,6 +31,8 @@
#include "AtomicCounter.hpp"
#include "Hashtable.hpp"
#include "Mutex.hpp"
#include "Bond.hpp"
#include "BondController.hpp"
#define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2))
@ -44,6 +44,9 @@ namespace ZeroTier {
class Peer
{
friend class SharedPtr<Peer>;
friend class SharedPtr<Bond>;
friend class Switch;
friend class Bond;
private:
Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized
@ -97,7 +100,8 @@ public:
const uint64_t inRePacketId,
const Packet::Verb inReVerb,
const bool trustEstablished,
const uint64_t networkId);
const uint64_t networkId,
const int32_t flowId);
/**
* Check whether we have an active path to this peer via the given address
@ -136,94 +140,49 @@ public:
return false;
}
void constructSetOfVirtualPaths();
/**
* Record statistics on outgoing packets
*
* @param path Path over which packet was sent
* @param id Packet ID
* @param len Length of packet payload
* @param verb Packet verb
* @param now Current time
*/
void recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now);
/**
* Record statistics on incoming packets
*
* @param path Path over which packet was sent
* @param id Packet ID
* @param len Length of packet payload
* @param verb Packet verb
* @param now Current time
*/
void recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now);
/**
* Send an ACK to peer for the most recent packets received
* Record incoming packets to
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param localSocket Raw socket the ACK packet will be sent over
* @param atAddress Destination for the ACK packet
* @param path Path over which packet was received
* @param packetId Packet ID
* @param payloadLength Length of packet data payload
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void sendACK(void *tPtr, const SharedPtr<Path> &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now);
void recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now);
/**
* Send a QoS packet to peer so that it can evaluate the quality of this link
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param localSocket Raw socket the QoS packet will be sent over
* @param atAddress Destination for the QoS packet
* @param path Path over which packet is being sent
* @param packetId Packet ID
* @param payloadLength Length of packet data payload
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr<Path> &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now);
void recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now);
/**
* Compute relative quality values and allocations for the components of the aggregate link
* Record an invalid incoming packet. This packet failed
* MAC/compression/cipher checks and will now contribute to a
* Packet Error Ratio (PER).
*
* @param now Current time
* @param path Path over which packet was received
*/
void computeAggregateAllocation(int64_t now);
/**
* @return The aggregate link Packet Delay Variance (PDV)
*/
int computeAggregateLinkPacketDelayVariance();
/**
* @return The aggregate link mean latency
*/
int computeAggregateLinkMeanLatency();
/**
* @return The number of currently alive "physical" paths in the aggregate link
*/
int aggregateLinkPhysicalPathCount();
/**
* @return The number of currently alive "logical" paths in the aggregate link
*/
int aggregateLinkLogicalPathCount();
std::vector<SharedPtr<Path>> getAllPaths(int64_t now);
void recordIncomingInvalidPacket(const SharedPtr<Path>& path);
/**
* Get the most appropriate direct path based on current multipath and QoS configuration
*
* @param now Current time
* @param flowId Session-specific protocol flow identifier used for path allocation
* @param includeExpired If true, include even expired paths
* @return Best current path or NULL if none
*/
SharedPtr<Path> getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId = -1);
/**
* Generate a human-readable string of interface names making up the aggregate link, also include
* moving allocation and IP version number for each (for tracing)
*/
char *interfaceListStr();
SharedPtr<Path> getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId = -1);
/**
* Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path
@ -265,6 +224,13 @@ public:
*/
void tryMemorizedPath(void *tPtr,int64_t now);
/**
* A check to be performed periodically which determines whether multipath communication is
* possible with this peer. This check should be performed early in the life-cycle of the peer
* as well as during the process of learning new paths.
*/
void performMultipathStateCheck(int64_t now);
/**
* Send pings or keepalives depending on configured timeouts
*
@ -277,16 +243,6 @@ public:
*/
unsigned int doPingAndKeepalive(void *tPtr,int64_t now);
/**
* Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state.
* This should be called frequently so that we can detect and remove unproductive or invalid paths.
*
* Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL.
*
* @return Number of paths that were pruned this round
*/
unsigned int prunePaths();
/**
* Process a cluster redirect sent by this peer
*
@ -348,7 +304,7 @@ public:
inline unsigned int latency(const int64_t now)
{
if (_canUseMultipath) {
return (int)computeAggregateLinkMeanLatency();
return (int)_lastComputedAggregateMeanLatency;
} else {
SharedPtr<Path> bp(getAppropriatePath(now,false));
if (bp)
@ -407,37 +363,6 @@ public:
inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); }
/**
* Periodically update known multipath activation constraints. This is done so that we know when and when
* not to use multipath logic. Doing this once every few seconds is sufficient.
*
* @param now Current time
*/
inline void processBackgroundPeerTasks(const int64_t now);
/**
* Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK
* or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client
* shall assume that multipath is not enabled and should only use classical Protocol 9 logic.
*/
inline void inferRemoteMultipathEnabled() { _remotePeerMultipathEnabled = true; }
/**
* @return Whether the local client supports and is configured to use multipath
*/
inline bool localMultipathSupport() { return _localMultipathSupported; }
/**
* @return Whether the remote peer supports and is configured to use multipath
*/
inline bool remoteMultipathSupport() { return _remoteMultipathSupported; }
/**
* @return Whether this client can use multipath to communicate with this peer. True if both peers are using
* the correct protocol and if both peers have multipath enabled. False if otherwise.
*/
inline bool canUseMultipath() { return _canUseMultipath; }
/**
* @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms
*/
@ -492,50 +417,35 @@ public:
}
/**
* Rate limit gate for inbound ECHO requests
* Rate limit gate for inbound ECHO requests. This rate limiter works
* by draining a certain number of requests per unit time. Each peer may
* theoretically receive up to ZT_ECHO_CUTOFF_LIMIT requests per second.
*/
inline bool rateGateEchoRequest(const int64_t now)
{
if ((now - _lastEchoRequestReceived) >= ZT_PEER_GENERAL_RATE_LIMIT) {
_lastEchoRequestReceived = now;
return true;
}
return false;
}
/**
* Rate limit gate for VERB_ACK
*/
inline bool rateGateACK(const int64_t now)
{
if ((now - _lastACKWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) {
_lastACKWindowReset = now;
_ACKCutoffCount = 0;
/*
// TODO: Rethink this
if (_canUseMultipath) {
_echoRequestCutoffCount++;
int numToDrain = (now - _lastEchoCheck) / ZT_ECHO_DRAINAGE_DIVISOR;
_lastEchoCheck = now;
fprintf(stderr, "ZT_ECHO_CUTOFF_LIMIT=%d, (now - _lastEchoCheck)=%d, numToDrain=%d, ZT_ECHO_DRAINAGE_DIVISOR=%d\n", ZT_ECHO_CUTOFF_LIMIT, (now - _lastEchoCheck), numToDrain, ZT_ECHO_DRAINAGE_DIVISOR);
if (_echoRequestCutoffCount > numToDrain) {
_echoRequestCutoffCount-=numToDrain;
}
else {
_echoRequestCutoffCount = 0;
}
return (_echoRequestCutoffCount < ZT_ECHO_CUTOFF_LIMIT);
} else {
++_ACKCutoffCount;
if ((now - _lastEchoRequestReceived) >= (ZT_PEER_GENERAL_RATE_LIMIT)) {
_lastEchoRequestReceived = now;
return true;
}
return false;
}
return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_QOS_MEASUREMENT
*/
inline bool rateGateQoS(const int64_t now)
{
if ((now - _lastQoSWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) {
_lastQoSWindowReset = now;
_QoSCutoffCount = 0;
} else {
++_QoSCutoffCount;
}
return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* @return Whether this peer is reachable via an aggregate link
*/
inline bool hasAggregateLink() {
return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled;
*/
return true;
}
/**
@ -610,6 +520,18 @@ public:
}
}
/**
*
* @return
*/
SharedPtr<Bond> bond() { return _bondToPeer; }
/**
*
* @return
*/
inline int8_t bondingPolicy() { return _bondingPolicy; }
private:
struct _PeerPath
{
@ -628,25 +550,16 @@ private:
int64_t _lastTriedMemorizedPath;
int64_t _lastDirectPathPushSent;
int64_t _lastDirectPathPushReceive;
int64_t _lastEchoRequestReceived;
int64_t _lastCredentialRequestSent;
int64_t _lastWhoisRequestReceived;
int64_t _lastEchoRequestReceived;
int64_t _lastCredentialsReceived;
int64_t _lastTrustEstablishedPacketReceived;
int64_t _lastSentFullHello;
int64_t _lastPathPrune;
int64_t _lastACKWindowReset;
int64_t _lastQoSWindowReset;
int64_t _lastMultipathCompatibilityCheck;
int64_t _lastEchoCheck;
unsigned char _freeRandomByte;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
uint16_t _vProto;
uint16_t _vMajor;
uint16_t _vMinor;
@ -659,62 +572,22 @@ private:
unsigned int _directPathPushCutoffCount;
unsigned int _credentialsCutoffCount;
unsigned int _QoSCutoffCount;
unsigned int _ACKCutoffCount;
unsigned int _echoRequestCutoffCount;
AtomicCounter __refCount;
RingBuffer<int,ZT_MULTIPATH_PROPORTION_WIN_SZ> _pathChoiceHist;
bool _linkIsBalanced;
bool _linkIsRedundant;
bool _remotePeerMultipathEnabled;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
int64_t _lastAggregateStatsReport;
int64_t _lastAggregateAllocation;
volatile bool _shouldCollectPathStatistics;
volatile int8_t _bondingPolicy;
char _interfaceListStr[256]; // 16 characters * 16 paths in a link
int32_t _lastComputedAggregateMeanLatency;
//
struct LinkPerformanceEntry
{
int64_t packetId;
struct VirtualPath *egressVirtualPath;
struct VirtualPath *ingressVirtualPath;
};
// Virtual paths
int _virtualPathCount;
Mutex _virtual_paths_m;
struct VirtualPath
{
SharedPtr<Path> p;
int64_t localSocket;
std::queue<struct LinkPerformanceEntry *> performanceEntries;
};
std::vector<struct VirtualPath*> _virtualPaths;
// Flows
struct Flow
{
Flow(int64_t fid, int64_t ls) :
flowId(fid),
lastSend(ls),
assignedPath(NULL)
{}
int64_t flowId;
int64_t bytesPerSecond;
int64_t lastSend;
struct VirtualPath *assignedPath;
};
std::map<int64_t, struct Flow *> _flows;
int16_t _roundRobinPathAssignmentIdx;
SharedPtr<Path> _activeBackupPath;
int16_t _pathAssignmentIdx;
SharedPtr<Bond> _bondToPeer;
};
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -238,6 +238,21 @@ public:
return curr_cnt ? subtotal / (float)curr_cnt : 0;
}
/**
* @return The sum of the contents of the buffer
*/
inline float sum()
{
size_t iterator = begin;
float total = 0;
size_t curr_cnt = count();
for (size_t i=0; i<curr_cnt; i++) {
iterator = (iterator + S - 1) % curr_cnt;
total += (float)*(buf + iterator);
}
return total;
}
/**
* @return The sample standard deviation of element values
*/
@ -306,10 +321,10 @@ public:
for (size_t i=0; i<S; i++) {
iterator = (iterator + S - 1) % S;
if (typeid(T) == typeid(int)) {
//DEBUG_INFO("buf[%2zu]=%2d", iterator, (int)*(buf + iterator));
fprintf(stderr, "buf[%2zu]=%2d\n", iterator, (int)*(buf + iterator));
}
else {
//DEBUG_INFO("buf[%2zu]=%2f", iterator, (float)*(buf + iterator));
fprintf(stderr, "buf[%2zu]=%2f\n", iterator, (float)*(buf + iterator));
}
}
}

View File

@ -30,6 +30,7 @@ class Multicaster;
class NetworkController;
class SelfAwareness;
class Trace;
class BondController;
/**
* Holds global state for an instance of ZeroTier::Node
@ -75,6 +76,7 @@ public:
Multicaster *mc;
Topology *topology;
SelfAwareness *sa;
BondController *bc;
// This node's identity and string representations thereof
Identity identity;

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -42,8 +42,38 @@ Switch::Switch(const RuntimeEnvironment *renv) :
{
}
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len)
{
int32_t flowId = ZT_QOS_NO_FLOW;
try {
const int64_t now = RR->node->now();
@ -112,6 +142,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
if (rq->packetId != fragmentPacketId) {
// No packet found, so we received a fragment without its head.
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = fragmentPacketId;
rq->frags[fragmentNumber - 1] = fragment;
@ -130,7 +161,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
for(unsigned int f=1;f<totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,tPtr)) {
if (rq->frag0.tryDecode(RR,tPtr,flowId)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
@ -195,6 +226,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
if (rq->packetId != packetId) {
// If we have no other fragments yet, create an entry and save the head
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = packetId;
rq->frag0.init(data,len,path,now);
@ -211,7 +243,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
for(unsigned int f=1;f<rq->totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,tPtr)) {
if (rq->frag0.tryDecode(RR,tPtr,flowId)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
@ -224,9 +256,10 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
} else {
// Packet is unfragmented, so just process it
IncomingPacket packet(data,len,path,now);
if (!packet.tryDecode(RR,tPtr)) {
if (!packet.tryDecode(RR,tPtr,flowId)) {
RXQueueEntry *const rq = _nextRXQueueEntry();
Mutex::Lock rql(rq->lock);
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = packet.packetId();
rq->frag0 = packet;
@ -242,43 +275,6 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
} catch ( ... ) {} // sanity check, should be caught elsewhere
}
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
bool Switch::isFlowAware()
{
int mode = RR->node->getMultipathMode();
return (( mode == ZT_MULTIPATH_BALANCE_RR_FLOW)
|| (mode == ZT_MULTIPATH_BALANCE_XOR_FLOW)
|| (mode == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW));
}
void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len)
{
if (!network->hasConfig())
@ -293,75 +289,73 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
}
}
uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET;
uint8_t qosBucket = ZT_AQM_DEFAULT_BUCKET;
/* A pseudo-unique identifier used by the balancing and bonding policies to associate properties
* of a specific protocol flow over time and to determine which virtual path this packet
* shall be sent out on. This identifier consists of the source port and destination port
* of the encapsulated frame.
/**
* A pseudo-unique identifier used by balancing and bonding policies to
* categorize individual flows/conversations for assignment to a specific
* physical path. This identifier consists of the source port and
* destination port of the encapsulated frame.
*
* A flowId of -1 will indicate that whatever packet we are about transmit has no
* preferred virtual path and will be sent out according to what the multipath logic
* deems appropriate. An example of this would be an ICMP packet.
* A flowId of -1 will indicate that there is no preference for how this
* packet shall be sent. An example of this would be an ICMP packet.
*/
int64_t flowId = -1;
int32_t flowId = ZT_QOS_NO_FLOW;
if (isFlowAware()) {
if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
int8_t proto = (reinterpret_cast<const uint8_t *>(data)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(data)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto;
}
break;
}
if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
uint8_t proto = (reinterpret_cast<const uint8_t *>(data)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(data)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
//flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = dstPort ^ srcPort ^ proto;
}
break;
}
}
if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)data, len, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto;
}
break;
default:
break;
}
if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)data, len, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
//flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = dstPort ^ srcPort ^ proto;
}
break;
default:
break;
}
}
@ -595,7 +589,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
}
}
void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId)
void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId)
{
if(!network->qosEnabled()) {
send(tPtr, packet, encrypt, flowId);
@ -603,18 +597,16 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
}
NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()];
if (!nqcb) {
// DEBUG_INFO("creating network QoS control block (NQCB) for network %llx", network->id());
nqcb = new NetworkQoSControlBlock();
_netQueueControlBlock[network->id()] = nqcb;
// Initialize ZT_QOS_NUM_BUCKETS queues and place them in the INACTIVE list
// These queues will be shuffled between the new/old/inactive lists by the enqueue/dequeue algorithm
for (int i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (int i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
nqcb->inactiveQueues.push_back(new ManagedQueue(i));
}
}
// Don't apply QoS scheduling to ZT protocol traffic
if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) {
// just send packet normally, no QoS for ZT protocol traffic
send(tPtr, packet, encrypt, flowId);
}
@ -624,8 +616,9 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
const Address dest(packet.destination());
TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId);
ManagedQueue *selectedQueue = nullptr;
for (size_t i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (size_t i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
if (i < nqcb->oldQueues.size()) { // search old queues first (I think this is best since old would imply most recent usage of the queue)
if (nqcb->oldQueues[i]->id == qosBucket) {
selectedQueue = nqcb->oldQueues[i];
@ -638,7 +631,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
if (nqcb->inactiveQueues[i]->id == qosBucket) {
selectedQueue = nqcb->inactiveQueues[i];
// move queue to end of NEW queue list
selectedQueue->byteCredit = ZT_QOS_QUANTUM;
selectedQueue->byteCredit = ZT_AQM_QUANTUM;
// DEBUG_INFO("moving q=%p from INACTIVE to NEW list", selectedQueue);
nqcb->newQueues.push_back(selectedQueue);
nqcb->inactiveQueues.erase(nqcb->inactiveQueues.begin() + i);
@ -657,11 +650,11 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
// Drop a packet if necessary
ManagedQueue *selectedQueueToDropFrom = nullptr;
if (nqcb->_currEnqueuedPackets > ZT_QOS_MAX_ENQUEUED_PACKETS)
if (nqcb->_currEnqueuedPackets > ZT_AQM_MAX_ENQUEUED_PACKETS)
{
// DEBUG_INFO("too many enqueued packets (%d), finding packet to drop", nqcb->_currEnqueuedPackets);
int maxQueueLength = 0;
for (size_t i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (size_t i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
if (i < nqcb->oldQueues.size()) {
if (nqcb->oldQueues[i]->byteLength > maxQueueLength) {
maxQueueLength = nqcb->oldQueues[i]->byteLength;
@ -694,7 +687,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
uint64_t Switch::control_law(uint64_t t, int count)
{
return (uint64_t)(t + ZT_QOS_INTERVAL / sqrt(count));
return (uint64_t)(t + ZT_AQM_INTERVAL / sqrt(count));
}
Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now)
@ -708,14 +701,14 @@ Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now)
return r;
}
uint64_t sojourn_time = now - r.p->creationTime;
if (sojourn_time < ZT_QOS_TARGET || q->byteLength <= ZT_DEFAULT_MTU) {
if (sojourn_time < ZT_AQM_TARGET || q->byteLength <= ZT_DEFAULT_MTU) {
// went below - stay below for at least interval
q->first_above_time = 0;
} else {
if (q->first_above_time == 0) {
// just went above from below. if still above at
// first_above_time, will say it's ok to drop.
q->first_above_time = now + ZT_QOS_INTERVAL;
q->first_above_time = now + ZT_AQM_INTERVAL;
} else if (now >= q->first_above_time) {
r.ok_to_drop = true;
}
@ -747,7 +740,7 @@ Switch::TXQueueEntry * Switch::CoDelDequeue(ManagedQueue *q, bool isNew, uint64_
q->q.pop_front(); // drop
r = dodequeue(q, now);
q->dropping = true;
q->count = (q->count > 2 && now - q->drop_next < 8*ZT_QOS_INTERVAL)?
q->count = (q->count > 2 && now - q->drop_next < 8*ZT_AQM_INTERVAL)?
q->count - 2 : 1;
q->drop_next = control_law(now, q->count);
}
@ -775,7 +768,7 @@ void Switch::aqm_dequeue(void *tPtr)
while (currQueues->size()) {
ManagedQueue *queueAtFrontOfList = currQueues->front();
if (queueAtFrontOfList->byteCredit < 0) {
queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM;
queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM;
// Move to list of OLD queues
// DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList);
oldQueues->push_back(queueAtFrontOfList);
@ -810,7 +803,7 @@ void Switch::aqm_dequeue(void *tPtr)
while (currQueues->size()) {
ManagedQueue *queueAtFrontOfList = currQueues->front();
if (queueAtFrontOfList->byteCredit < 0) {
queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM;
queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM;
oldQueues->push_back(queueAtFrontOfList);
currQueues->erase(currQueues->begin());
} else {
@ -850,7 +843,7 @@ void Switch::removeNetworkQoSControlBlock(uint64_t nwid)
}
}
void Switch::send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId)
void Switch::send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId)
{
const Address dest(packet.destination());
if (dest == RR->identity.address())
@ -883,7 +876,7 @@ void Switch::requestWhois(void *tPtr,const int64_t now,const Address &addr)
const SharedPtr<Peer> upstream(RR->topology->getUpstreamPeer());
if (upstream) {
int64_t flowId = -1;
int32_t flowId = ZT_QOS_NO_FLOW;
Packet outp(upstream->address(),RR->identity.address(),Packet::VERB_WHOIS);
addr.appendTo(outp);
RR->node->expectReplyTo(outp.packetId());
@ -903,7 +896,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer)
RXQueueEntry *const rq = &(_rxQueue[ptr]);
Mutex::Lock rql(rq->lock);
if ((rq->timestamp)&&(rq->complete)) {
if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT))
if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT))
rq->timestamp = 0;
}
}
@ -954,7 +947,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now)
RXQueueEntry *const rq = &(_rxQueue[ptr]);
Mutex::Lock rql(rq->lock);
if ((rq->timestamp)&&(rq->complete)) {
if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) {
if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) {
rq->timestamp = 0;
} else {
const Address src(rq->frag0.source());
@ -1000,7 +993,7 @@ bool Switch::_shouldUnite(const int64_t now,const Address &source,const Address
return false;
}
bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId)
bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId)
{
SharedPtr<Path> viaPath;
const int64_t now = RR->node->now();
@ -1008,8 +1001,18 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId)
const SharedPtr<Peer> peer(RR->topology->getPeer(tPtr,destination));
if (peer) {
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) {
// Nothing here, we'll grab an entire set of paths to send out on below
if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST)
&& (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) {
const SharedPtr<Peer> relay(RR->topology->getUpstreamPeer());
Mutex::Lock _l(peer->_paths_m);
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (peer->_paths[i].p && peer->_paths[i].p->alive(now)) {
char pathStr[128];
peer->_paths[i].p->address().toString(pathStr);
_sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId);
}
}
return true;
}
else {
viaPath = peer->getAppropriatePath(now,false,flowId);
@ -1021,61 +1024,51 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId)
return false;
}
}
if (viaPath) {
_sendViaSpecificPath(tPtr,peer,viaPath,now,packet,encrypt,flowId);
return true;
}
}
}
return false;
}
void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId)
{
unsigned int mtu = ZT_DEFAULT_PHYSMTU;
uint64_t trustedPathId = 0;
RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId);
unsigned int chunkSize = std::min(packet.size(),mtu);
packet.setFragmented(chunkSize < packet.size());
peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), flowId, now);
if (trustedPathId) {
packet.setTrusted(trustedPathId);
} else {
return false;
packet.armor(peer->key(),encrypt);
}
// If sending on all paths, set viaPath to first path
int nextPathIdx = 0;
std::vector<SharedPtr<Path>> paths = peer->getAllPaths(now);
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) {
if (paths.size()) {
viaPath = paths[nextPathIdx++];
}
}
if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) {
if (chunkSize < packet.size()) {
// Too big for one packet, fragment the rest
unsigned int fragStart = chunkSize;
unsigned int remaining = packet.size() - chunkSize;
unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH));
if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining)
++fragsRemaining;
const unsigned int totalFragments = fragsRemaining + 1;
while (viaPath) {
unsigned int mtu = ZT_DEFAULT_PHYSMTU;
uint64_t trustedPathId = 0;
RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId);
unsigned int chunkSize = std::min(packet.size(),mtu);
packet.setFragmented(chunkSize < packet.size());
peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now);
if (trustedPathId) {
packet.setTrusted(trustedPathId);
} else {
packet.armor(peer->key(),encrypt);
}
if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) {
if (chunkSize < packet.size()) {
// Too big for one packet, fragment the rest
unsigned int fragStart = chunkSize;
unsigned int remaining = packet.size() - chunkSize;
unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH));
if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining)
++fragsRemaining;
const unsigned int totalFragments = fragsRemaining + 1;
for(unsigned int fno=1;fno<totalFragments;++fno) {
chunkSize = std::min(remaining,(unsigned int)(mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH));
Packet::Fragment frag(packet,fragStart,chunkSize,fno,totalFragments);
viaPath->send(RR,tPtr,frag.data(),frag.size(),now);
fragStart += chunkSize;
remaining -= chunkSize;
}
}
}
viaPath.zero();
if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) {
if (paths.size() > nextPathIdx) {
viaPath = paths[nextPathIdx++];
for(unsigned int fno=1;fno<totalFragments;++fno) {
chunkSize = std::min(remaining,(unsigned int)(mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH));
Packet::Fragment frag(packet,fragStart,chunkSize,fno,totalFragments);
viaPath->send(RR,tPtr,frag.data(),frag.size(),now);
fragStart += chunkSize;
remaining -= chunkSize;
}
}
}
return true;
}
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -59,6 +59,8 @@ class Switch
struct ManagedQueue;
struct TXQueueEntry;
friend class SharedPtr<Peer>;
typedef struct {
TXQueueEntry *p;
bool ok_to_drop;
@ -123,7 +125,7 @@ public:
* @param encrypt Encrypt packet payload? (always true except for HELLO)
* @param qosBucket Which bucket the rule-system determined this packet should fall into
*/
void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId = -1);
void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW);
/**
* Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks
@ -169,7 +171,7 @@ public:
* @param packet Packet to send (buffer may be modified)
* @param encrypt Encrypt packet payload? (always true except for HELLO)
*/
void send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1);
void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW);
/**
* Request WHOIS on a given address
@ -204,7 +206,8 @@ public:
private:
bool _shouldUnite(const int64_t now,const Address &source,const Address &destination);
bool _trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); // packet is modified if return is true
bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true
void _sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId);
const RuntimeEnvironment *const RR;
int64_t _lastBeaconResponse;
@ -225,6 +228,7 @@ private:
unsigned int totalFragments; // 0 if only frag0 received, waiting for frags
uint32_t haveFragments; // bit mask, LSB to MSB
volatile bool complete; // if true, packet is complete
volatile int32_t flowId;
Mutex lock;
};
RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];
@ -253,7 +257,7 @@ private:
struct TXQueueEntry
{
TXQueueEntry() {}
TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int64_t fid) :
TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) :
dest(d),
creationTime(ct),
packet(p),
@ -264,7 +268,7 @@ private:
uint64_t creationTime;
Packet packet; // unencrypted/unMAC'd packet -- this is done at send time
bool encrypt;
int64_t flowId;
int32_t flowId;
};
std::list< TXQueueEntry > _txQueue;
Mutex _txQueue_m;
@ -296,7 +300,7 @@ private:
{
ManagedQueue(int id) :
id(id),
byteCredit(ZT_QOS_QUANTUM),
byteCredit(ZT_AQM_QUANTUM),
byteLength(0),
dropping(false)
{}

View File

@ -94,29 +94,26 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,
}
}
void Trace::peerLinkNowAggregate(void *const tPtr,Peer &peer)
void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer)
{
if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM)) {
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt());
}
if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE)) {
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a proportionally-balanced aggregate link",peer.address().toInt());
}
//ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt());
}
void Trace::peerLinkNoLongerAggregate(void *const tPtr,Peer &peer)
void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer)
{
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx has degraded and is no longer an aggregate link",peer.address().toInt());
//ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt());
}
void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer)
{
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has PDV (%.0f ms), mean latency (%.0f ms)",
/*
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)",
peer.address().toInt(),
peer.aggregateLinkPhysicalPathCount(),
peer.interfaceListStr(),
peer.computeAggregateLinkPacketDelayVariance(),
peer.computeAggregateLinkMeanLatency());
*/
}
void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr<Path> &newPath,const uint64_t packetId)

View File

@ -109,8 +109,8 @@ public:
void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr<Path> &path,const uint64_t packetId,const Packet::Verb verb);
void peerLinkNowAggregate(void *const tPtr,Peer &peer);
void peerLinkNoLongerAggregate(void *const tPtr,Peer &peer);
void peerLinkNowRedundant(void *const tPtr,Peer &peer);
void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer);
void peerLinkAggregateStatistics(void *const tPtr,Peer &peer);

View File

@ -214,12 +214,12 @@ public:
return l;
}
static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax)
static inline float normalize(float value, float bigMin, float bigMax, float targetMin, float targetMax)
{
int64_t bigSpan = bigMax - bigMin;
int64_t smallSpan = targetMax - targetMin;
float valueScaled = (value - (float)bigMin) / (float)bigSpan;
return (float)targetMin + valueScaled * (float)smallSpan;
float bigSpan = bigMax - bigMin;
float smallSpan = targetMax - targetMin;
float valueScaled = (value - bigMin) / bigSpan;
return targetMin + valueScaled * smallSpan;
}
/**
@ -253,6 +253,7 @@ public:
static inline int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); }
static inline unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); }
static inline long strToLong(const char *s) { return strtol(s,(char **)0,10); }
static inline double strToDouble(const char *s) { return strtod(s,NULL); }
static inline unsigned long long strToU64(const char *s)
{
#ifdef __WINDOWS__

View File

@ -24,7 +24,9 @@ CORE_OBJS=\
node/Tag.o \
node/Topology.o \
node/Trace.o \
node/Utils.o
node/Utils.o \
node/Bond.o \
node/BondController.o
ONE_OBJS=\
controller/EmbeddedNetworkController.o \

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -347,6 +347,23 @@ public:
}
}
// Generate set of unique interface names (used for formation of logical slave set in multipath code)
for(std::map<InetAddress,std::string>::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) {
slaveIfNames.insert(ii->second);
}
for (std::set<std::string>::iterator si(slaveIfNames.begin());si!=slaveIfNames.end();si++) {
bool bFoundMatch = false;
for(std::map<InetAddress,std::string>::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) {
if (ii->second == *si) {
bFoundMatch = true;
break;
}
}
if (!bFoundMatch) {
slaveIfNames.erase(si);
}
}
// Create new bindings for those not already bound
for(std::map<InetAddress,std::string>::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) {
unsigned int bi = 0;
@ -444,7 +461,15 @@ public:
return false;
}
inline std::set<std::string> getSlaveInterfaceNames()
{
Mutex::Lock _l(_lock);
return slaveIfNames;
}
private:
std::set<std::string> slaveIfNames;
_Binding _bindings[ZT_BINDER_MAX_BINDINGS];
std::atomic<unsigned int> _bindingCount;
Mutex _lock;

View File

@ -55,8 +55,6 @@ LinuxNetLink::LinuxNetLink()
{
// set socket timeout to 1 sec so we're not permablocking recv() calls
_setSocketTimeout(_fd, 1);
int yes=1;
setsockopt(_fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
_la.nl_family = AF_NETLINK;
_la.nl_pid = 0; //getpid()+1;
@ -430,8 +428,6 @@ void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp)
void LinuxNetLink::_requestIPv4Routes()
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;
@ -485,8 +481,6 @@ void LinuxNetLink::_requestIPv4Routes()
void LinuxNetLink::_requestIPv6Routes()
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;
@ -540,8 +534,6 @@ void LinuxNetLink::_requestIPv6Routes()
void LinuxNetLink::_requestInterfaceList()
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;
@ -595,8 +587,6 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c
if (!target) return;
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;
@ -713,8 +703,6 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c
if (!target) return;
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;
@ -828,8 +816,6 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c
void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface)
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;
@ -948,8 +934,6 @@ void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface)
void LinuxNetLink::removeAddress(const InetAddress &addr, const char *iface)
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
int yes=1;
setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
if (fd == -1) {
fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno));
return;

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -459,6 +459,22 @@ uint64_t OSUtils::jsonInt(const nlohmann::json &jv,const uint64_t dfl)
return dfl;
}
double OSUtils::jsonDouble(const nlohmann::json &jv,const double dfl)
{
try {
if (jv.is_number()) {
return (double)jv;
}
else if (jv.is_string()) {
std::string s = jv;
return Utils::strToDouble(s.c_str());
} else if (jv.is_boolean()) {
return (double)jv;
}
} catch ( ... ) {}
return dfl;
}
uint64_t OSUtils::jsonIntHex(const nlohmann::json &jv,const uint64_t dfl)
{
try {

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -277,6 +277,7 @@ public:
static nlohmann::json jsonParse(const std::string &buf);
static std::string jsonDump(const nlohmann::json &j,int indentation = 1);
static uint64_t jsonInt(const nlohmann::json &jv,const uint64_t dfl);
static double jsonDouble(const nlohmann::json &jv,const double dfl);
static uint64_t jsonIntHex(const nlohmann::json &jv,const uint64_t dfl);
static bool jsonBool(const nlohmann::json &jv,const bool dfl);
static std::string jsonString(const nlohmann::json &jv,const char *dfl);

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -261,46 +261,6 @@ public:
}
}
/**
* Whether or not the socket object is in a closed state
*
* @param s Socket object
* @return true if socket is closed, false if otherwise
*/
inline bool isClosed(PhySocket *s)
{
PhySocketImpl *sws = (reinterpret_cast<PhySocketImpl *>(s));
return sws->type == ZT_PHY_SOCKET_CLOSED;
}
/**
* Get state of socket object
*
* @param s Socket object
* @return State of socket
*/
inline int getState(PhySocket *s)
{
PhySocketImpl *sws = (reinterpret_cast<PhySocketImpl *>(s));
return sws->type;
}
/**
* In the event that this socket is erased, we need a way to convey to the multipath logic
* that this path is no longer valid.
*
* @param s Socket object
* @return Whether the state of this socket is within an acceptable range of values
*/
inline bool isValidState(PhySocket *s)
{
if (s) {
PhySocketImpl *sws = (reinterpret_cast<PhySocketImpl *>(s));
return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN;
}
return false;
}
/**
* Cause poll() to stop waiting immediately
*

238
osdep/Slave.hpp Normal file
View File

@ -0,0 +1,238 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_SLAVE_HPP
#define ZT_SLAVE_HPP
#include <string>
#include "../node/AtomicCounter.hpp"
namespace ZeroTier {
class Slave
{
friend class SharedPtr<Slave>;
public:
Slave() {}
/**
*
* @param ifnameStr
* @param ipvPref
* @param speed
* @param enabled
* @param mode
* @param failoverToSlaveStr
* @param userSpecifiedAlloc
*/
Slave(std::string& ifnameStr,
uint8_t ipvPref,
uint32_t speed,
uint32_t slaveMonitorInterval,
uint32_t upDelay,
uint32_t downDelay,
bool enabled,
uint8_t mode,
std::string failoverToSlaveStr,
float userSpecifiedAlloc) :
_ifnameStr(ifnameStr),
_ipvPref(ipvPref),
_speed(speed),
_relativeSpeed(0),
_slaveMonitorInterval(slaveMonitorInterval),
_upDelay(upDelay),
_downDelay(downDelay),
_enabled(enabled),
_mode(mode),
_failoverToSlaveStr(failoverToSlaveStr),
_userSpecifiedAlloc(userSpecifiedAlloc),
_isUserSpecified(false)
{}
/**
* @return The string representation of this slave's underlying interface's system name.
*/
inline std::string ifname() { return _ifnameStr; }
/**
* @return Whether this slave is designated as a primary.
*/
inline bool primary() { return _mode == ZT_MULTIPATH_SLAVE_MODE_PRIMARY; }
/**
* @return Whether this slave is designated as a spare.
*/
inline bool spare() { return _mode == ZT_MULTIPATH_SLAVE_MODE_SPARE; }
/**
* @return The name of the slave interface that should be used in the event of a failure.
*/
inline std::string failoverToSlave() { return _failoverToSlaveStr; }
/**
* @return Whether this slave interface was specified by the user or auto-detected.
*/
inline bool isUserSpecified() { return _isUserSpecified; }
/**
* Signify that this slave was specified by the user and not the result of auto-detection.
*
* @param isUserSpecified
*/
inline void setAsUserSpecified(bool isUserSpecified) { _isUserSpecified = isUserSpecified; }
/**
* @return Whether or not the user has specified failover instructions.
*/
inline bool userHasSpecifiedFailoverInstructions() { return _failoverToSlaveStr.length(); }
/**
* @return The speed of the slave relative to others in the bond.
*/
inline uint8_t relativeSpeed() { return _relativeSpeed; }
/**
* Sets the speed of the slave relative to others in the bond.
*
* @param relativeSpeed The speed relative to the rest of the slave interfaces.
*/
inline void setRelativeSpeed(uint8_t relativeSpeed) { _relativeSpeed = relativeSpeed; }
/**
* Sets the speed of the slave relative to others in the bond.
*
* @param relativeSpeed
*/
inline void setMonitorInterval(uint32_t interval) { _slaveMonitorInterval = interval; }
/**
* @return The absolute speed of the slave interface (as specified by the user.)
*/
inline uint32_t monitorInterval() { return _slaveMonitorInterval; }
/**
* @return The absolute speed of the slave interface (as specified by the user.)
*/
inline uint32_t speed() { return _speed; }
/**
* @return The address preference for this slave interface (as specified by the user.)
*/
inline uint8_t ipvPref() { return _ipvPref; }
/**
* @return The mode (e.g. primary/spare) for this slave interface (as specified by the user.)
*/
inline uint8_t mode() { return _mode; }
/**
* @return The upDelay parameter for all paths on this slave interface.
*/
inline uint32_t upDelay() { return _upDelay; }
/**
* @return The downDelay parameter for all paths on this slave interface.
*/
inline uint32_t downDelay() { return _downDelay; }
/**
* @return Whether this slave is enabled or disabled
*/
inline uint8_t enabled() { return _enabled; }
private:
/**
* String representation of underlying interface's system name
*/
std::string _ifnameStr;
/**
* What preference (if any) a user has for IP protocol version used in
* path aggregations. Preference is expressed in the order of the digits:
*
* 0: no preference
* 4: IPv4 only
* 6: IPv6 only
* 46: IPv4 over IPv6
* 64: IPv6 over IPv4
*/
uint8_t _ipvPref;
/**
* User-specified speed of this slave/link
*/
uint32_t _speed;
/**
* Speed relative to other specified slaves/links (computed by Bond)
*/
uint8_t _relativeSpeed;
/**
* User-specified interval for monitoring paths on this specific slave
* instead of using the more generic interval specified for the entire
* bond.
*/
uint32_t _slaveMonitorInterval;
/**
* How long before a path is considered to be usable after coming online. (when using policies that
* support fail-over events).
*/
uint32_t _upDelay;
/**
* How long before a path is considered to be dead (when using policies that
* support fail-over events).
*/
uint32_t _downDelay;
/**
* Whether this slave is enabled, or (disabled (possibly bad config))
*/
uint8_t _enabled;
/**
* Whether this slave is designated as a primary, a spare, or no preference.
*/
uint8_t _mode;
/**
* The specific name of the interface to be used in the event that this
* slave fails.
*/
std::string _failoverToSlaveStr;
/**
* User-specified allocation
*/
float _userSpecifiedAlloc;
/**
* Whether or not this slave was created as a result of manual user specification. This is
* important to know because certain policy decisions are dependent on whether the user
* intents to use a specific set of interfaces.
*/
bool _isUserSpecified;
AtomicCounter __refCount;
};
} // namespace ZeroTier
#endif

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -39,6 +39,8 @@
#include "../node/Salsa20.hpp"
#include "../node/Poly1305.hpp"
#include "../node/SHA512.hpp"
#include "../node/Bond.hpp"
#include "../node/Peer.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Thread.hpp"
@ -48,6 +50,7 @@
#include "../osdep/Binder.hpp"
#include "../osdep/ManagedRoute.hpp"
#include "../osdep/BlockingQueue.hpp"
#include "../osdep/Slave.hpp"
#include "OneService.hpp"
#include "SoftwareUpdater.hpp"
@ -266,37 +269,43 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer)
pj["paths"] = pa;
}
static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer)
static void _peerBondToJson(nlohmann::json &pj,const ZT_Peer *peer)
{
char tmp[256];
OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address);
pj["aggregateLinkLatency"] = peer->latency;
//pj["aggregateLinkLatency"] = peer->latency;
std::string policyStr = BondController::getPolicyStrByCode(peer->bondingPolicy);
pj["policy"] = policyStr;
nlohmann::json pa = nlohmann::json::array();
for(unsigned int i=0;i<peer->pathCount;++i) {
int64_t lastSend = peer->paths[i].lastSend;
int64_t lastReceive = peer->paths[i].lastReceive;
nlohmann::json j;
j["address"] = reinterpret_cast<const InetAddress *>(&(peer->paths[i].address))->toString(tmp);
j["lastSend"] = (lastSend < 0) ? 0 : lastSend;
j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive;
j["ifname"] = std::string(peer->paths[i].ifname);
j["path"] = reinterpret_cast<const InetAddress *>(&(peer->paths[i].address))->toString(tmp);
j["lastTX"] = (lastSend < 0) ? 0 : lastSend;
j["lastRX"] = (lastReceive < 0) ? 0 : lastReceive;
j["lat"] = peer->paths[i].latencyMean;
j["pdv"] = peer->paths[i].latencyVariance;
//j["trustedPathId"] = peer->paths[i].trustedPathId;
//j["active"] = (bool)(peer->paths[i].expired == 0);
//j["expired"] = (bool)(peer->paths[i].expired != 0);
//j["preferred"] = (bool)(peer->paths[i].preferred != 0);
j["latency"] = peer->paths[i].latency;
j["pdv"] = peer->paths[i].packetDelayVariance;
//j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff;
//j["packetErrorRatio"] = peer->paths[i].packetErrorRatio;
//j["packetLossRatio"] = peer->paths[i].packetLossRatio;
j["stability"] = peer->paths[i].stability;
j["throughput"] = peer->paths[i].throughput;
//j["maxThroughput"] = peer->paths[i].maxThroughput;
j["allocation"] = peer->paths[i].allocation;
j["ifname"] = peer->paths[i].ifname;
//j["ltm"] = peer->paths[i].latencyMax;
//j["plr"] = peer->paths[i].packetLossRatio;
//j["per"] = peer->paths[i].packetErrorRatio;
//j["thr"] = peer->paths[i].throughputMean;
//j["thm"] = peer->paths[i].throughputMax;
//j["thv"] = peer->paths[i].throughputVariance;
//j["avl"] = peer->paths[i].availability;
//j["age"] = peer->paths[i].age;
//j["alloc"] = peer->paths[i].allocation;
//j["ifname"] = peer->paths[i].ifname;
pa.push_back(j);
}
pj["paths"] = pa;
pj["slaves"] = pa;
}
static void _moonToJson(nlohmann::json &mj,const World &world)
@ -429,7 +438,7 @@ public:
bool _updateAutoApply;
bool _allowTcpFallbackRelay;
bool _allowSecondaryPort;
unsigned int _multipathMode;
unsigned int _primaryPort;
unsigned int _secondaryPort;
unsigned int _tertiaryPort;
@ -718,6 +727,7 @@ public:
}
}
#endif
// Delete legacy iddb.d if present (cleanup)
OSUtils::rmDashRf((_homePath + ZT_PATH_SEPARATOR_S "iddb.d").c_str());
@ -752,7 +762,6 @@ public:
int64_t lastTapMulticastGroupCheck = 0;
int64_t lastBindRefresh = 0;
int64_t lastUpdateCheck = clockShouldBe;
int64_t lastMultipathModeUpdate = 0;
int64_t lastCleanedPeersDb = 0;
int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle
int64_t lastLocalConfFileCheck = OSUtils::now();
@ -798,7 +807,7 @@ public:
}
// Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default)
if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) {
if (((now - lastBindRefresh) >= (_node->bondController()->inUse() ? ZT_BINDER_REFRESH_PERIOD / 4 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) {
lastBindRefresh = now;
unsigned int p[3];
unsigned int pc = 0;
@ -815,11 +824,6 @@ public:
}
}
}
// Update multipath mode (if needed)
if (((now - lastMultipathModeUpdate) >= ZT_BINDER_REFRESH_PERIOD / 8)||(restarted)) {
lastMultipathModeUpdate = now;
_node->setMultipathMode(_multipathMode);
}
// Run background task processor in core if it's time to do so
int64_t dl = _nextBackgroundTaskDeadline;
@ -855,7 +859,7 @@ public:
}
// Sync information about physical network interfaces
if ((now - lastLocalInterfaceAddressCheck) >= (_multipathMode ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) {
if ((now - lastLocalInterfaceAddressCheck) >= (_node->bondController()->inUse() ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) {
lastLocalInterfaceAddressCheck = now;
_node->clearLocalInterfaceAddresses();
@ -869,8 +873,9 @@ public:
#endif
std::vector<InetAddress> boundAddrs(_binder.allBoundLocalInterfaceAddresses());
for(std::vector<InetAddress>::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i)
for(std::vector<InetAddress>::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i) {
_node->addLocalInterfaceAddress(reinterpret_cast<const struct sockaddr_storage *>(&(*i)));
}
}
// Clean peers.d periodically
@ -1209,15 +1214,15 @@ public:
settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff;
settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay);
if (_multipathMode) {
json &multipathConfig = res["multipath"];
if (_node->bondController()->inUse()) {
json &multipathConfig = res["bonds"];
ZT_PeerList *pl = _node->peers();
char peerAddrStr[256];
if (pl) {
for(unsigned long i=0;i<pl->peerCount;++i) {
if (pl->peers[i].hadAggregateLink) {
if (pl->peers[i].isBonded) {
nlohmann::json pj;
_peerAggregateLinkToJson(pj,&(pl->peers[i]));
_peerBondToJson(pj,&(pl->peers[i]));
OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address);
multipathConfig[peerAddrStr] = (pj);
}
@ -1346,8 +1351,8 @@ public:
if (j.is_object()) {
seed = Utils::hexStrToU64(OSUtils::jsonString(j["seed"],"0").c_str());
}
} catch (std::exception &exc) {
} catch ( ... ) {
// discard invalid JSON
}
std::vector<World> moons(_node->moons());
@ -1396,8 +1401,8 @@ public:
json &allowDefault = j["allowDefault"];
if (allowDefault.is_boolean()) localSettings.allowDefault = (bool)allowDefault;
}
} catch (std::exception &exc) {
} catch ( ... ) {
// discard invalid JSON
}
setNetworkSettings(nws->networks[i].nwid,localSettings);
@ -1551,10 +1556,133 @@ public:
json &settings = lc["settings"];
if (!_node->bondController()->inUse()) {
// defaultBondingPolicy
std::string defaultBondingPolicyStr(OSUtils::jsonString(settings["defaultBondingPolicy"],""));
int defaultBondingPolicy = _node->bondController()->getPolicyCodeByStr(defaultBondingPolicyStr);
_node->bondController()->setBondingLayerDefaultPolicy(defaultBondingPolicy);
_node->bondController()->setBondingLayerDefaultPolicyStr(defaultBondingPolicyStr); // Used if custom policy
// Custom Policies
json &customBondingPolicies = settings["policies"];
for (json::iterator policyItr = customBondingPolicies.begin(); policyItr != customBondingPolicies.end();++policyItr) {
fprintf(stderr, "\n\n--- (%s)\n", policyItr.key().c_str());
// Custom Policy
std::string customPolicyStr(policyItr.key());
json &customPolicy = policyItr.value();
std::string basePolicyStr(OSUtils::jsonString(customPolicy["basePolicy"],""));
if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BONDING_POLICY_NONE) {
fprintf(stderr, "error: custom policy (%s) is invalid, unknown base policy (%s).\n",
customPolicyStr.c_str(), basePolicyStr.c_str());
continue;
} if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BONDING_POLICY_NONE) {
fprintf(stderr, "error: custom policy (%s) will be ignored, cannot use standard policy names for custom policies.\n",
customPolicyStr.c_str());
continue;
}
// New bond, used as a copy template for new instances
SharedPtr<Bond> newTemplateBond = new Bond(basePolicyStr, customPolicyStr, SharedPtr<Peer>());
// Acceptable ranges
newTemplateBond->setMaxAcceptableLatency(OSUtils::jsonInt(customPolicy["maxAcceptableLatency"],-1));
newTemplateBond->setMaxAcceptableMeanLatency(OSUtils::jsonInt(customPolicy["maxAcceptableMeanLatency"],-1));
newTemplateBond->setMaxAcceptablePacketDelayVariance(OSUtils::jsonInt(customPolicy["maxAcceptablePacketDelayVariance"],-1));
newTemplateBond->setMaxAcceptablePacketLossRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketLossRatio"],-1));
newTemplateBond->setMaxAcceptablePacketErrorRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketErrorRatio"],-1));
newTemplateBond->setMinAcceptableAllocation((float)OSUtils::jsonDouble(customPolicy["minAcceptableAllocation"],0));
// Quality weights
json &qualityWeights = customPolicy["qualityWeights"];
if (qualityWeights.size() == ZT_QOS_WEIGHT_SIZE) { // TODO: Generalize this
float weights[ZT_QOS_WEIGHT_SIZE];
weights[ZT_QOS_LAT_IDX] = (float)OSUtils::jsonDouble(qualityWeights["lat"],0.0);
weights[ZT_QOS_LTM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["ltm"],0.0);
weights[ZT_QOS_PDV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["pdv"],0.0);
weights[ZT_QOS_PLR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["plr"],0.0);
weights[ZT_QOS_PER_IDX] = (float)OSUtils::jsonDouble(qualityWeights["per"],0.0);
weights[ZT_QOS_THR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thr"],0.0);
weights[ZT_QOS_THM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thm"],0.0);
weights[ZT_QOS_THV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thv"],0.0);
newTemplateBond->setUserQualityWeights(weights,ZT_QOS_WEIGHT_SIZE);
}
// Bond-specific properties
newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1));
newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1));
newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0));
newTemplateBond->setPacketsPerSlave(OSUtils::jsonInt(customPolicy["packetsPerSlave"],-1));
std::string slaveMonitorStrategyStr(OSUtils::jsonString(customPolicy["slaveMonitorStrategy"],""));
uint8_t slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT;
if (slaveMonitorStrategyStr == "passive") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE); }
if (slaveMonitorStrategyStr == "active") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE); }
if (slaveMonitorStrategyStr == "dynamic") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC); }
// Policy-Specific slave set
json &slaves = customPolicy["slaves"];
for (json::iterator slaveItr = slaves.begin(); slaveItr != slaves.end();++slaveItr) {
fprintf(stderr, "\t--- slave (%s)\n", slaveItr.key().c_str());
std::string slaveNameStr(slaveItr.key());
json &slave = slaveItr.value();
bool enabled = OSUtils::jsonInt(slave["enabled"],true);
uint32_t speed = OSUtils::jsonInt(slave["speed"],0);
float alloc = (float)OSUtils::jsonDouble(slave["alloc"],0);
if (speed && alloc) {
fprintf(stderr, "error: cannot specify both speed (%d) and alloc (%f) for slave (%s), pick one, slave disabled.\n",
speed, alloc, slaveNameStr.c_str());
enabled = false;
}
uint32_t upDelay = OSUtils::jsonInt(slave["upDelay"],-1);
uint32_t downDelay = OSUtils::jsonInt(slave["downDelay"],-1);
uint8_t ipvPref = OSUtils::jsonInt(slave["ipvPref"],0);
uint32_t slaveMonitorInterval = OSUtils::jsonInt(slave["monitorInterval"],(uint64_t)0);
std::string failoverToStr(OSUtils::jsonString(slave["failoverTo"],""));
// Mode
std::string slaveModeStr(OSUtils::jsonString(slave["mode"],"spare"));
uint8_t slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE;
if (slaveModeStr == "primary") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; }
if (slaveModeStr == "spare") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; }
// ipvPref
if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) {
fprintf(stderr, "error: invalid ipvPref value (%d), slave disabled.\n", ipvPref);
enabled = false;
}
if (slaveMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) {
fprintf(stderr, "error: cannot specify failover slaves for spares, slave disabled.\n");
failoverToStr = "";
enabled = false;
}
_node->bondController()->addCustomSlave(customPolicyStr, new Slave(slaveNameStr,ipvPref,speed,slaveMonitorInterval,upDelay,downDelay,enabled,slaveMode,failoverToStr,alloc));
}
// TODO: This is dumb
std::string slaveSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize"));
if (slaveSelectMethodStr == "always") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); }
if (slaveSelectMethodStr == "better") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); }
if (slaveSelectMethodStr == "failure") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); }
if (slaveSelectMethodStr == "optimize") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); }
if (newTemplateBond->getSlaveSelectMethod() < 0 || newTemplateBond->getSlaveSelectMethod() > 3) {
fprintf(stderr, "warning: invalid value (%s) for slaveSelectMethod, assuming mode: always\n", slaveSelectMethodStr.c_str());
}
/*
newBond->setPolicy(_node->bondController()->getPolicyCodeByStr(basePolicyStr));
newBond->setFlowHashing((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowFlowHashing"],(bool)allowFlowHashing));
newBond->setBondMonitorInterval((unsigned int)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["monitorInterval"],(uint64_t)0));
newBond->setAllowPathNegotiation((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowPathNegotiation"],(bool)false));
*/
if (!_node->bondController()->addCustomPolicy(newTemplateBond)) {
fprintf(stderr, "error: a custom policy of this name (%s) already exists.\n", customPolicyStr.c_str());
}
}
// Peer-specific bonding
json &peerSpecificBonds = settings["peerSpecificBonds"];
for (json::iterator peerItr = peerSpecificBonds.begin(); peerItr != peerSpecificBonds.end();++peerItr) {
_node->bondController()->assignBondingPolicyToPeer(std::stoull(peerItr.key(),0,16), peerItr.value());
}
// Check settings
if (defaultBondingPolicyStr.length() && !defaultBondingPolicy && !_node->bondController()->inUse()) {
fprintf(stderr, "error: unknown policy (%s) specified by defaultBondingPolicy, slave disabled.\n", defaultBondingPolicyStr.c_str());
}
}
// bondingPolicy cannot be used with allowTcpFallbackRelay
_allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !(_node->bondController()->inUse());
_primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff;
_multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0);
// multipathMode cannot be used with allowTcpFallbackRelay
_allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !_multipathMode;
_allowSecondaryPort = OSUtils::jsonBool(settings["allowSecondaryPort"],true);
_secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0);
_tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0);
@ -1705,9 +1833,8 @@ public:
}
}
#ifdef __SYNOLOGY__
if (!n.tap->addIps(newManagedIps)) {
if (!n.tap->addIpSyn(newManagedIps))
fprintf(stderr,"ERROR: unable to add ip addresses to ifcfg" ZT_EOL_S);
}
#else
for(std::vector<InetAddress>::iterator ip(newManagedIps.begin());ip!=newManagedIps.end();++ip) {
if (std::find(n.managedIps.begin(),n.managedIps.end(),*ip) == n.managedIps.end()) {
@ -2025,8 +2152,6 @@ public:
return;
}
} catch (std::exception &exc) {
_phy.close(sock);
} catch ( ... ) {
_phy.close(sock);
}
@ -2135,8 +2260,6 @@ public:
#endif
_nets.erase(nwid);
return -999;
} catch (int exc) {
return -999;
} catch ( ... ) {
return -999; // tap init failed
}
@ -2743,6 +2866,7 @@ public:
if (!strncmp(p->c_str(),ifname,p->length()))
return false;
}
return _node->bondController()->allowedToBind(std::string(ifname));
}
{
// Check global blacklists