Merge multipath branch into dev branch

This commit is contained in:
Joseph Henry 2020-07-23 00:40:17 -07:00
commit ff50762649
31 changed files with 5166 additions and 1433 deletions

View File

@ -415,33 +415,155 @@ enum ZT_ResultCode
*/
#define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000))
/**
* The multipath algorithm in use by this node.
* Multipath bonding policy
*/
enum ZT_MultipathMode
enum ZT_MultipathBondingPolicy
{
/**
* No active multipath.
*
* Traffic is merely sent over the strongest path. That being
* said, this mode will automatically failover in the event that a link goes down.
* Normal operation. No fault tolerance, no load balancing
*/
ZT_MULTIPATH_NONE = 0,
ZT_BONDING_POLICY_NONE = 0,
/**
* Traffic is randomly distributed among all active paths.
*
* Will cease sending traffic over links that appear to be stale.
* Sends traffic out on only one path at a time. Configurable immediate
* fail-over.
*/
ZT_MULTIPATH_RANDOM = 1,
ZT_BONDING_POLICY_ACTIVE_BACKUP = 1,
/**
* Traffic is allocated across all active paths in proportion to their strength and
* reliability.
*
* Will cease sending traffic over links that appear to be stale.
* Sends traffic out on all paths
*/
ZT_MULTIPATH_PROPORTIONALLY_BALANCED = 2,
ZT_BONDING_POLICY_BROADCAST = 2,
/**
* Stripes packets across all paths
*/
ZT_BONDING_POLICY_BALANCE_RR = 3,
/**
* Packets destined for specific peers will always be sent over the same
* path.
*/
ZT_BONDING_POLICY_BALANCE_XOR = 4,
/**
* Balances flows among all paths according to path performance
*/
ZT_BONDING_POLICY_BALANCE_AWARE = 5
};
/**
* Multipath active re-selection policy (linkSelectMethod)
*/
enum ZT_MultipathLinkSelectMethod
{
/**
* Primary link regains status as active link whenever it comes back up
* (default when links are explicitly specified)
*/
ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0,
/**
* Primary link regains status as active link when it comes back up and
* (if) it is better than the currently-active link.
*/
ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1,
/**
* Primary link regains status as active link only if the currently-active
* link fails.
*/
ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2,
/**
* The primary link can change if a superior path is detected.
* (default if user provides no fail-over guidance)
*/
ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3
};
/**
* Mode of multipath link interface
*/
enum ZT_MultipathLinkMode
{
ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0,
ZT_MULTIPATH_SLAVE_MODE_SPARE = 1
};
/**
* Strategy for path monitoring
*/
enum ZT_MultipathMonitorStrategy
{
/**
* Use bonding policy's default strategy
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT = 0,
/**
* Does not actively send probes to judge aliveness, will rely
* on conventional traffic and summary statistics.
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE = 1,
/**
* Sends probes at a constant rate to judge aliveness.
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE = 2,
/**
* Sends probes at varying rates which correlate to native
* traffic loads to judge aliveness.
*/
ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3
};
/**
* Strategy for re-balancing protocol flows
*/
enum ZT_MultipathFlowRebalanceStrategy
{
/**
* Flows will only be re-balanced among links during
* assignment or failover. This minimizes the possibility
* of sequence reordering and is thus the default setting.
*/
ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE = 0,
/**
* Flows that are active may be re-assigned to a new more
* suitable link if it can be done without disrupting the flow.
* This setting can sometimes cause sequence re-ordering.
*/
ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC = 0,
/**
* Flows will be continuously re-assigned the most suitable link
* in order to maximize "balance". This can often cause sequence
* reordering and is thus only reccomended for protocols like UDP.
*/
ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE = 2
};
/**
* Indices for the path quality weight vector
*/
enum ZT_MultipathQualityWeightIndex
{
ZT_QOS_LAT_IDX,
ZT_QOS_LTM_IDX,
ZT_QOS_PDV_IDX,
ZT_QOS_PLR_IDX,
ZT_QOS_PER_IDX,
ZT_QOS_THR_IDX,
ZT_QOS_THM_IDX,
ZT_QOS_THV_IDX,
ZT_QOS_AGE_IDX,
ZT_QOS_SCP_IDX,
ZT_QOS_WEIGHT_SIZE
};
/**
@ -1250,44 +1372,49 @@ typedef struct
uint64_t trustedPathId;
/**
* One-way latency
* Mean latency
*/
float latency;
float latencyMean;
/**
* How much latency varies over time
* Maximum observed latency
*/
float packetDelayVariance;
float latencyMax;
/**
* How much observed throughput varies over time
* Variance of latency
*/
float throughputDisturbCoeff;
float latencyVariance;
/**
* Packet Error Ratio (PER)
*/
float packetErrorRatio;
/**
* Packet Loss Ratio (PLR)
* Packet loss ratio
*/
float packetLossRatio;
/**
* Stability of the path
* Packet error ratio
*/
float stability;
float packetErrorRatio;
/**
* Current throughput (moving average)
* Mean throughput
*/
uint64_t throughput;
uint64_t throughputMean;
/**
* Maximum observed throughput for this path
* Maximum observed throughput
*/
uint64_t maxThroughput;
float throughputMax;
/**
* Throughput variance
*/
float throughputVariance;
/**
* Address scope
*/
uint8_t scope;
/**
* Percentage of traffic allocated to this path
@ -1297,7 +1424,9 @@ typedef struct
/**
* Name of physical interface (for monitoring)
*/
char *ifname;
char ifname[32];
uint64_t localSocket;
/**
* Is path expired?
@ -1351,9 +1480,11 @@ typedef struct
unsigned int pathCount;
/**
* Whether this peer was ever reachable via an aggregate link
* Whether multiple paths to this peer are bonded
*/
bool hadAggregateLink;
bool isBonded;
int bondingPolicy;
/**
* Known network paths to peer

View File

@ -99,7 +99,7 @@ mac-agent: FORCE
one: $(CORE_OBJS) $(ONE_OBJS) one.o mac-agent
$(CXX) $(CXXFLAGS) -o zerotier-one $(CORE_OBJS) $(ONE_OBJS) one.o $(LIBS)
$(STRIP) zerotier-one
# $(STRIP) zerotier-one
ln -sf zerotier-one zerotier-idtool
ln -sf zerotier-one zerotier-cli
$(CODESIGN) -f -s $(CODESIGN_APP_CERT) zerotier-one

1721
node/Bond.cpp Normal file

File diff suppressed because it is too large Load Diff

688
node/Bond.hpp Normal file
View File

@ -0,0 +1,688 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_BOND_HPP
#define ZT_BOND_HPP
#include <map>
#include "Path.hpp"
#include "Peer.hpp"
#include "../osdep/Link.hpp"
#include "Flow.hpp"
namespace ZeroTier {
class RuntimeEnvironment;
class Link;
class Bond
{
friend class SharedPtr<Bond>;
friend class Peer;
friend class BondController;
struct PathQualityComparator
{
bool operator ()(const SharedPtr<Path> & a, const SharedPtr<Path> & b)
{
if(a->_failoverScore == b->_failoverScore) {
return a < b;
}
return a->_failoverScore > b->_failoverScore;
}
};
public:
// TODO: Remove
bool _header;
int64_t _lastLogTS;
int64_t _lastPrintTS;
void dumpInfo(const int64_t now);
bool relevant();
SharedPtr<Link> getLink(const SharedPtr<Path>& path);
/**
* Constructor. Creates a bond based off of ZT defaults
*
* @param renv Runtime environment
* @param policy Bonding policy
* @param peer
*/
Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr<Peer>& peer);
/**
* Constructor. For use when user intends to manually specify parameters
*
* @param basePolicy
* @param policyAlias
* @param peer
*/
Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr<Peer>& peer);
/**
* Constructor. Creates a bond based off of a user-defined bond template
*
* @param renv Runtime environment
* @param original
* @param peer
*/
Bond(const RuntimeEnvironment *renv, SharedPtr<Bond> originalBond, const SharedPtr<Peer>& peer);
/**
* @return The human-readable name of the bonding policy
*/
std::string policyAlias() { return _policyAlias; }
/**
* Inform the bond about the path that its peer (owning object) just learned about
*
* @param path Newly-learned Path which should now be handled by the Bond
* @param now Current time
*/
void nominatePath(const SharedPtr<Path>& path, int64_t now);
/**
* Propagate and memoize often-used bonding preferences for each path
*/
void applyUserPrefs();
/**
* Check path states and perform bond rebuilds if needed.
*
* @param now Current time
* @param rebuild Whether or not the bond should be reconstructed.
*/
void curateBond(const int64_t now, bool rebuild);
/**
* Periodically perform statistical summaries of quality metrics for all paths.
*
* @param now Current time
*/
void estimatePathQuality(int64_t now);
/**
* Record an invalid incoming packet. This packet failed
* MAC/compression/cipher checks and will now contribute to a
* Packet Error Ratio (PER).
*
* @param path Path over which packet was received
*/
void recordIncomingInvalidPacket(const SharedPtr<Path>& path);
/**
* Record statistics on outbound an packet.
*
* @param path Path over which packet is being sent
* @param packetId Packet ID
* @param payloadLength Packet data length
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void recordOutgoingPacket(const SharedPtr<Path> &path, uint64_t packetId,
uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now);
/**
* Process the contents of an inbound VERB_QOS_MEASUREMENT to gather path quality observations.
*
* @param now Current time
* @param count Number of records
* @param rx_id table of packet IDs
* @param rx_ts table of holding times
*/
void receivedQoS(const SharedPtr<Path>& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts);
/**
* Process the contents of an inbound VERB_ACK to gather path quality observations.
*
* @param path Path over which packet was received
* @param now Current time
* @param ackedBytes Number of bytes ACKed by this VERB_ACK
*/
void receivedAck(const SharedPtr<Path>& path, int64_t now, int32_t ackedBytes);
/**
* Generate the contents of a VERB_QOS_MEASUREMENT packet.
*
* @param now Current time
* @param qosBuffer destination buffer
* @return Size of payload
*/
int32_t generateQoSPacket(const SharedPtr<Path>& path, int64_t now, char *qosBuffer);
/**
* Record statistics for an inbound packet.
*
* @param path Path over which packet was received
* @param packetId Packet ID
* @param payloadLength Packet data length
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void recordIncomingPacket(const SharedPtr<Path>& path, uint64_t packetId, uint16_t payloadLength,
Packet::Verb verb, int32_t flowId, int64_t now);
/**
* Determines the most appropriate path for packet and flow egress. This decision is made by
* the underlying bonding policy as well as QoS-related statistical observations of path quality.
*
* @param now Current time
* @param flowId Flow ID
* @return Pointer to suggested Path
*/
SharedPtr<Path> getAppropriatePath(int64_t now, int32_t flowId);
/**
* Creates a new flow record
*
* @param path Path over which flow shall be handled
* @param flowId Flow ID
* @param entropy A byte of entropy to be used by the bonding algorithm
* @param now Current time
* @return Pointer to newly-created Flow
*/
SharedPtr<Flow> createFlow(const SharedPtr<Path> &path, int32_t flowId, unsigned char entropy, int64_t now);
/**
* Removes flow records that are past a certain age limit.
*
* @param age Age threshold to be forgotten
* @param oldest Whether only the oldest shall be forgotten
* @param now Current time
*/
void forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now);
/**
* Assigns a new flow to a bonded path
*
* @param flow Flow to be assigned
* @param now Current time
*/
bool assignFlowToBondedPath(SharedPtr<Flow> &flow, int64_t now);
/**
* Determine whether a path change should occur given the remote peer's reported utility and our
* local peer's known utility. This has the effect of assigning inbound and outbound traffic to
* the same path.
*
* @param now Current time
* @param path Path over which the negotiation request was received
* @param remoteUtility How much utility the remote peer claims to gain by using the declared path
*/
void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr<Path> &path, int16_t remoteUtility);
/**
* Determine state of path synchronization and whether a negotiation request
* shall be sent to the peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void pathNegotiationCheck(void *tPtr, const int64_t now);
/**
* Sends a VERB_ACK to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
* @param localSocket Local source socket
* @param atAddress
* @param now Current time
*/
void sendACK(void *tPtr,const SharedPtr<Path> &path,int64_t localSocket,
const InetAddress &atAddress,int64_t now);
/**
* Sends a VERB_QOS_MEASUREMENT to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
* @param localSocket Local source socket
* @param atAddress
* @param now Current time
*/
void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr<Path> &path,int64_t localSocket,
const InetAddress &atAddress,int64_t now);
/**
* Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
*/
void sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr<Path> &path);
/**
*
* @param now Current time
*/
void processBalanceTasks(int64_t now);
/**
* Perform periodic tasks unique to active-backup
*
* @param now Current time
*/
void processActiveBackupTasks(int64_t now);
/**
* Switches the active link in an active-backup scenario to the next best during
* a failover event.
*
* @param now Current time
*/
void dequeueNextActiveBackupPath(uint64_t now);
/**
* Set bond parameters to reasonable defaults, these may later be overwritten by
* user-specified parameters.
*
* @param policy Bonding policy
* @param templateBond
*/
void setReasonableDefaults(int policy, SharedPtr<Bond> templateBond, bool useTemplate);
/**
* Check and assign user-specified quality weights to this bond.
*
* @param weights Set of user-specified weights
* @param len Length of weight vector
*/
void setUserQualityWeights(float weights[], int len);
/**
* @param latencyInMilliseconds Maximum acceptable latency.
*/
void setMaxAcceptableLatency(int16_t latencyInMilliseconds) {
_maxAcceptableLatency = latencyInMilliseconds;
}
/**
* @param latencyInMilliseconds Maximum acceptable (mean) latency.
*/
void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) {
_maxAcceptableMeanLatency = latencyInMilliseconds;
}
/**
* @param latencyVarianceInMilliseconds Maximum acceptable packet delay variance (jitter).
*/
void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) {
_maxAcceptablePacketDelayVariance = latencyVarianceInMilliseconds;
}
/**
* @param lossRatio Maximum acceptable packet loss ratio (PLR).
*/
void setMaxAcceptablePacketLossRatio(float lossRatio) {
_maxAcceptablePacketLossRatio = lossRatio;
}
/**
* @param errorRatio Maximum acceptable packet error ratio (PER).
*/
void setMaxAcceptablePacketErrorRatio(float errorRatio) {
_maxAcceptablePacketErrorRatio = errorRatio;
}
/**
* @param errorRatio Maximum acceptable packet error ratio (PER).
*/
void setMinAcceptableAllocation(float minAlloc) {
_minAcceptableAllocation = minAlloc * 255;
}
/**
* @return Whether the user has defined links for use on this bond
*/
inline bool userHasSpecifiedLinks() { return _userHasSpecifiedLinks; }
/**
* @return Whether the user has defined a set of failover link(s) for this bond
*/
inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; };
/**
* @return Whether the user has specified a primary link
*/
inline bool userHasSpecifiedPrimaryLink() { return _userHasSpecifiedPrimaryLink; }
/**
* @return Whether the user has specified link speeds
*/
inline bool userHasSpecifiedLinkSpeeds() { return _userHasSpecifiedLinkSpeeds; }
/**
* Periodically perform maintenance tasks for each active bond.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void processBackgroundTasks(void *tPtr, int64_t now);
/**
* Rate limit gate for VERB_ACK
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGateACK(const int64_t now)
{
_ackCutoffCount++;
int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount;
_lastAckRateCheck = now;
if (_ackCutoffCount > numToDrain) {
_ackCutoffCount-=numToDrain;
} else {
_ackCutoffCount = 0;
}
return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_QOS_MEASUREMENT
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGateQoS(const int64_t now)
{
_qosCutoffCount++;
int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR;
_lastQoSRateCheck = now;
if (_qosCutoffCount > numToDrain) {
_qosCutoffCount-=numToDrain;
} else {
_qosCutoffCount = 0;
}
return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_PATH_NEGOTIATION_REQUEST
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGatePathNegotiation(const int64_t now)
{
if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME)
++_pathNegotiationCutoffCount;
else _pathNegotiationCutoffCount = 0;
_lastPathNegotiationReceived = now;
return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT);
}
/**
* @param interval Maximum amount of time user expects a failover to take on this bond.
*/
inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; }
/**
* @param strategy Strategy that the bond uses to re-assign protocol flows.
*/
inline void setFlowRebalanceStrategy(uint32_t strategy) { _flowRebalanceStrategy = strategy; }
/**
* @param strategy Strategy that the bond uses to prob for path aliveness and quality
*/
inline void setLinkMonitorStrategy(uint8_t strategy) { _linkMonitorStrategy = strategy; }
/**
* @return the current up delay parameter
*/
inline uint16_t getUpDelay() { return _upDelay; }
/**
* @param upDelay Length of time before a newly-discovered path is admitted to the bond
*/
inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } }
/**
* @return Length of time before a newly-failed path is removed from the bond
*/
inline uint16_t getDownDelay() { return _downDelay; }
/**
* @param downDelay Length of time before a newly-failed path is removed from the bond
*/
inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } }
/**
* @return the current monitoring interval for the bond (can be overridden with intervals specific to certain links.)
*/
inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; }
/**
* Set the current monitoring interval for the bond (can be overridden with intervals specific to certain links.)
*
* @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer.
*/
inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; }
/**
* @param policy Bonding policy for this bond
*/
inline void setPolicy(uint8_t policy) { _bondingPolicy = policy; }
/**
* @return the current bonding policy
*/
inline uint8_t getPolicy() { return _bondingPolicy; }
/**
*
* @param allowFlowHashing
*/
inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; }
/**
* @return Whether flow-hashing is currently enabled for this bond.
*/
bool flowHashingEnabled() { return _allowFlowHashing; }
/**
*
* @param packetsPerLink
*/
inline void setPacketsPerLink(int packetsPerLink) { _packetsPerLink = packetsPerLink; }
/**
*
* @param linkSelectMethod
*/
inline void setLinkSelectMethod(uint8_t method) { _abLinkSelectMethod = method; }
/**
*
* @return
*/
inline uint8_t getLinkSelectMethod() { return _abLinkSelectMethod; }
/**
*
* @param allowPathNegotiation
*/
inline void setAllowPathNegotiation(bool allowPathNegotiation) { _allowPathNegotiation = allowPathNegotiation; }
/**
*
* @return
*/
inline bool allowPathNegotiation() { return _allowPathNegotiation; }
private:
const RuntimeEnvironment *RR;
AtomicCounter __refCount;
/**
* Custom name given by the user to this bond type.
*/
std::string _policyAlias;
/**
* Paths that this bond has been made aware of but that are not necessarily
* part of the bond proper.
*/
SharedPtr<Path> _paths[ZT_MAX_PEER_NETWORK_PATHS];
/**
* Set of indices corresponding to paths currently included in the bond proper. This
* may only be updated during a call to curateBond(). The reason for this is so that
* we can simplify the high frequency packet egress logic.
*/
int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS];
/**
* Number of paths currently included in the _bondedIdx set.
*/
int _numBondedPaths;
/**
* Flows hashed according to port and protocol
*/
std::map<int32_t,SharedPtr<Flow> > _flows;
float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path.
uint8_t _bondingPolicy;
uint32_t _upDelay;
uint32_t _downDelay;
// active-backup
SharedPtr<Path> _abPath; // current active path
std::list<SharedPtr<Path> > _abFailoverQueue;
uint8_t _abLinkSelectMethod; // link re-selection policy for the primary link in active-backup
uint64_t _lastActiveBackupPathChange;
// balance-rr
uint8_t _rrIdx; // index to path currently in use during Round Robin operation
uint16_t _rrPacketsSentOnCurrLink; // number of packets sent on this link since the most recent path switch.
/**
* How many packets will be sent on a path before moving to the next path
* in the round-robin sequence. A value of zero will cause a random path
* selection for each outgoing packet.
*/
int _packetsPerLink;
// balance-aware
uint64_t _totalBondUnderload;
uint8_t _flowRebalanceStrategy;
// dynamic link monitoring
uint8_t _linkMonitorStrategy;
uint64_t _lastFrame;
uint32_t _dynamicPathMonitorInterval;
// path negotiation
int16_t _localUtility;
SharedPtr<Path> negotiatedPath;
uint8_t _numSentPathNegotiationRequests;
unsigned int _pathNegotiationCutoffCount;
bool _allowPathNegotiation;
uint64_t _lastPathNegotiationReceived;
uint64_t _lastSentPathNegotiationRequest;
// timers
uint32_t _failoverInterval;
uint32_t _qosSendInterval;
uint32_t _ackSendInterval;
uint16_t _ackCutoffCount;
uint64_t _lastAckRateCheck;
uint16_t _qosCutoffCount;
uint64_t _lastQoSRateCheck;
uint32_t throughputMeasurementInterval;
uint32_t _qualityEstimationInterval;
// timestamps
uint64_t _lastCheckUserPreferences;
uint64_t _lastQualityEstimation;
uint64_t _lastFlowStatReset;
uint64_t _lastFlowExpirationCheck;
uint64_t _lastFlowRebalance;
uint64_t _lastPathNegotiationCheck;
uint64_t _lastBackgroundTaskCheck;
float _maxAcceptablePacketLossRatio;
float _maxAcceptablePacketErrorRatio;
uint16_t _maxAcceptableLatency;
uint16_t _maxAcceptableMeanLatency;
uint16_t _maxAcceptablePacketDelayVariance;
uint8_t _minAcceptableAllocation;
/**
* Default initial punishment inflicted on misbehaving paths. Punishment slowly
* drains linearly. For each eligibility change the remaining punishment is doubled.
*/
uint32_t _defaultPathRefractoryPeriod;
/**
* Whether the current bonding policy requires computation of path statistics
*/
bool _shouldCollectPathStatistics;
/**
* Free byte of entropy that is updated on every packet egress event.
*/
unsigned char _freeRandomByte;
/**
* Remote peer that this bond services
*/
SharedPtr<Peer> _peer;
Mutex _paths_m;
Mutex _flows_m;
/**
* Whether the user has specified links for this bond.
*/
bool _userHasSpecifiedLinks;
/**
* Whether the user has specified a primary link for this bond.
*/
bool _userHasSpecifiedPrimaryLink;
/**
* Whether the user has specified failover instructions for this bond.
*/
bool _userHasSpecifiedFailoverInstructions;
/**
* Whether the user has specified links speeds for this bond.
*/
bool _userHasSpecifiedLinkSpeeds;
/**
* How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a
* path is still active. A value of zero (0) will disable active path
* monitoring; as result, all monitoring will be a function of traffic.
*/
uint16_t _bondMonitorInterval;
/**
* Whether or not flow hashing is allowed.
*/
bool _allowFlowHashing;
};
} // namespace ZeroTier
#endif

203
node/BondController.cpp Normal file
View File

@ -0,0 +1,203 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#include "Constants.hpp"
#include "BondController.hpp"
#include "Peer.hpp"
namespace ZeroTier {
int BondController::_minReqPathMonitorInterval;
uint8_t BondController::_defaultBondingPolicy;
BondController::BondController(const RuntimeEnvironment *renv) :
RR(renv)
{
bondStartTime = RR->node->now();
_defaultBondingPolicy = ZT_BONDING_POLICY_NONE;
}
bool BondController::linkAllowed(std::string &policyAlias, SharedPtr<Link> link)
{
bool foundInDefinitions = false;
if (_linkDefinitions.count(policyAlias)) {
auto it = _linkDefinitions[policyAlias].begin();
while (it != _linkDefinitions[policyAlias].end()) {
if (link->ifname() == (*it)->ifname()) {
foundInDefinitions = true;
break;
}
++it;
}
}
return _linkDefinitions[policyAlias].empty() || foundInDefinitions;
}
void BondController::addCustomLink(std::string& policyAlias, SharedPtr<Link> link)
{
Mutex::Lock _l(_links_m);
_linkDefinitions[policyAlias].push_back(link);
auto search = _interfaceToLinkMap[policyAlias].find(link->ifname());
if (search == _interfaceToLinkMap[policyAlias].end()) {
link->setAsUserSpecified(true);
_interfaceToLinkMap[policyAlias].insert(std::pair<std::string, SharedPtr<Link>>(link->ifname(), link));
} else {
//fprintf(stderr, "link already exists=%s\n", link->ifname().c_str());
// Link is already defined, overlay user settings
}
}
bool BondController::addCustomPolicy(const SharedPtr<Bond>& newBond)
{
Mutex::Lock _l(_bonds_m);
if (!_bondPolicyTemplates.count(newBond->policyAlias())) {
_bondPolicyTemplates[newBond->policyAlias()] = newBond;
return true;
}
return false;
}
bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias)
{
Mutex::Lock _l(_bonds_m);
if (!_policyTemplateAssignments.count(identity)) {
_policyTemplateAssignments[identity] = policyAlias;
return true;
}
return false;
}
SharedPtr<Bond> BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr<Peer>& peer)
{
Mutex::Lock _l(_bonds_m);
int64_t identity = peer->identity().address().toInt();
Bond *bond = nullptr;
if (!_bonds.count(identity)) {
std::string policyAlias;
//fprintf(stderr, "new bond, registering for %llx\n", identity);
if (!_policyTemplateAssignments.count(identity)) {
if (_defaultBondingPolicy) {
//fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy);
bond = new Bond(renv, _defaultBondingPolicy, peer);
}
if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) {
//fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str());
bond = new Bond(renv, _bondPolicyTemplates[_defaultBondingPolicyStr].ptr(), peer);
}
}
else {
//fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str());
if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) {
//fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity);
bond = new Bond(renv, _defaultBondingPolicy, peer);
}
else {
bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer);
}
}
}
else {
//fprintf(stderr, "bond already exists for %llx.\n", identity);
}
if (bond) {
_bonds[identity] = bond;
/**
* Determine if user has specified anything that could affect the bonding policy's decisions
*/
if (_interfaceToLinkMap.count(bond->policyAlias())) {
std::map<std::string, SharedPtr<Link> >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin();
while (it != _interfaceToLinkMap[bond->policyAlias()].end()) {
if (it->second->isUserSpecified()) {
bond->_userHasSpecifiedLinks = true;
}
if (it->second->isUserSpecified() && it->second->primary()) {
bond->_userHasSpecifiedPrimaryLink = true;
}
if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) {
bond->_userHasSpecifiedFailoverInstructions = true;
}
if (it->second->isUserSpecified() && (it->second->speed() > 0)) {
bond->_userHasSpecifiedLinkSpeeds = true;
}
++it;
}
}
return bond;
}
return SharedPtr<Bond>();
}
SharedPtr<Link> BondController::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket)
{
Mutex::Lock _l(_links_m);
char ifname[16];
_phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16);
std::string ifnameStr(ifname);
auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr);
if (search == _interfaceToLinkMap[policyAlias].end()) {
SharedPtr<Link> s = new Link(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0);
_interfaceToLinkMap[policyAlias].insert(std::pair<std::string,SharedPtr<Link> >(ifnameStr, s));
return s;
}
else {
return search->second;
}
}
SharedPtr<Link> BondController::getLinkByName(const std::string& policyAlias, const std::string& ifname)
{
Mutex::Lock _l(_links_m);
auto search = _interfaceToLinkMap[policyAlias].find(ifname);
if (search != _interfaceToLinkMap[policyAlias].end()) {
return search->second;
}
return SharedPtr<Link>();
}
bool BondController::allowedToBind(const std::string& ifname)
{
return true;
/*
if (!_defaultBondingPolicy) {
return true; // no restrictions
}
Mutex::Lock _l(_links_m);
if (_interfaceToLinkMap.empty()) {
return true; // no restrictions
}
std::map<std::string, std::map<std::string, SharedPtr<Link> > >::iterator policyItr = _interfaceToLinkMap.begin();
while (policyItr != _interfaceToLinkMap.end()) {
std::map<std::string, SharedPtr<Link> >::iterator linkItr = policyItr->second.begin();
while (linkItr != policyItr->second.end()) {
if (linkItr->first == ifname) {
return true;
}
++linkItr;
}
++policyItr;
}
return false;
*/
}
void BondController::processBackgroundTasks(void *tPtr, const int64_t now)
{
Mutex::Lock _l(_bonds_m);
std::map<int64_t,SharedPtr<Bond> >::iterator bondItr = _bonds.begin();
while (bondItr != _bonds.end()) {
bondItr->second->processBackgroundTasks(tPtr, now);
++bondItr;
}
}
} // namespace ZeroTier

231
node/BondController.hpp Normal file
View File

@ -0,0 +1,231 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_BONDCONTROLLER_HPP
#define ZT_BONDCONTROLLER_HPP
#include <map>
#include <vector>
#include "SharedPtr.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Link.hpp"
namespace ZeroTier {
class RuntimeEnvironment;
class Bond;
class Peer;
class BondController
{
friend class Bond;
public:
BondController(const RuntimeEnvironment *renv);
/**
* @return Whether this link is permitted to become a member of a bond.
*/
bool linkAllowed(std::string &policyAlias, SharedPtr<Link> link);
/**
* @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; }
/**
* @param minReqPathMonitorInterval The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; }
/**
* @return Whether the bonding layer is currently set up to be used.
*/
bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; }
/**
* @param basePolicyName Bonding policy name (See ZeroTierOne.h)
* @return The bonding policy code for a given human-readable bonding policy name
*/
static int getPolicyCodeByStr(const std::string& basePolicyName)
{
if (basePolicyName == "active-backup") { return 1; }
if (basePolicyName == "broadcast") { return 2; }
if (basePolicyName == "balance-rr") { return 3; }
if (basePolicyName == "balance-xor") { return 4; }
if (basePolicyName == "balance-aware") { return 5; }
return 0; // "none"
}
/**
* @param policy Bonding policy code (See ZeroTierOne.h)
* @return The human-readable name for the given bonding policy code
*/
static std::string getPolicyStrByCode(int policy)
{
if (policy == 1) { return "active-backup"; }
if (policy == 2) { return "broadcast"; }
if (policy == 3) { return "balance-rr"; }
if (policy == 4) { return "balance-xor"; }
if (policy == 5) { return "balance-aware"; }
return "none";
}
/**
* Sets the default bonding policy for new or undefined bonds.
*
* @param bp Bonding policy
*/
void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; }
/**
* Sets the default (custom) bonding policy for new or undefined bonds.
*
* @param alias Human-readable string alias for bonding policy
*/
void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; }
/**
* @return The default bonding policy
*/
static int defaultBondingPolicy() { return _defaultBondingPolicy; }
/**
* Add a user-defined link to a given bonding policy.
*
* @param policyAlias User-defined custom name for variant of bonding policy
* @param link Pointer to new link definition
*/
void addCustomLink(std::string& policyAlias, SharedPtr<Link> link);
/**
* Add a user-defined bonding policy that is based on one of the standard types.
*
* @param newBond Pointer to custom Bond object
* @return Whether a uniquely-named custom policy was successfully added
*/
bool addCustomPolicy(const SharedPtr<Bond>& newBond);
/**
* Assigns a specific bonding policy
*
* @param identity
* @param policyAlias
* @return
*/
bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias);
/**
* Add a new bond to the bond controller.
*
* @param renv Runtime environment
* @param peer Remote peer that this bond services
* @return A pointer to the newly created Bond
*/
SharedPtr<Bond> createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr<Peer>& peer);
/**
* Periodically perform maintenance tasks for the bonding layer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void processBackgroundTasks(void *tPtr, int64_t now);
/**
* Gets a reference to a physical link definition given a policy alias and a local socket.
*
* @param policyAlias Policy in use
* @param localSocket Local source socket
* @return Physical link definition
*/
SharedPtr<Link> getLinkBySocket(const std::string& policyAlias, uint64_t localSocket);
/**
* Gets a reference to a physical link definition given its human-readable system name.
*
* @param policyAlias Policy in use
* @param ifname Alphanumeric human-readable name
* @return Physical link definition
*/
SharedPtr<Link> getLinkByName(const std::string& policyAlias, const std::string& ifname);
/**
* @param ifname Name of interface that we want to know if we can bind to
*/
bool allowedToBind(const std::string& ifname);
uint64_t getBondStartTime() { return bondStartTime; }
private:
Phy<BondController *> *_phy;
const RuntimeEnvironment *RR;
Mutex _bonds_m;
Mutex _links_m;
/**
* The last time that the bond controller updated the set of bonds.
*/
uint64_t _lastBackgroundBondControlTaskCheck;
/**
* The minimum monitoring interval among all paths in this bond.
*/
static int _minReqPathMonitorInterval;
/**
* The default bonding policy used for new bonds unless otherwise specified.
*/
static uint8_t _defaultBondingPolicy;
/**
* The default bonding policy used for new bonds unless otherwise specified.
*/
std::string _defaultBondingPolicyStr;
/**
* All currently active bonds.
*/
std::map<int64_t,SharedPtr<Bond> > _bonds;
/**
* Map of peers to custom bonding policies
*/
std::map<int64_t,std::string> _policyTemplateAssignments;
/**
* User-defined bonding policies (can be assigned to a peer)
*/
std::map<std::string,SharedPtr<Bond> > _bondPolicyTemplates;
/**
* Set of links defined for a given bonding policy
*/
std::map<std::string,std::vector<SharedPtr<Link> > > _linkDefinitions;
/**
* Set of link objects mapped to their physical interfaces
*/
std::map<std::string, std::map<std::string, SharedPtr<Link> > > _interfaceToLinkMap;
// TODO: Remove
uint64_t bondStartTime;
};
} // namespace ZeroTier
#endif

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -192,7 +192,7 @@
/**
* Minimum delay between timer task checks to prevent thrashing
*/
#define ZT_CORE_TIMER_TASK_GRANULARITY 500
#define ZT_CORE_TIMER_TASK_GRANULARITY 60
/**
* How often Topology::clean() and Network::clean() and similar are called, in ms
@ -253,184 +253,6 @@
*/
#define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000
/**
* How frequently to check for changes to the system's network interfaces. When
* the service decides to use this constant it's because we want to react more
* quickly to new interfaces that pop up or go down.
*/
#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000
/**
* Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by
* this integer. This is to provide a mechanism for both peers to agree on which packets need
* special treatment without having to exchange information. Changing this value would be
* a breaking change and would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the form:
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output we will sample
* 1/16th (or ~6.25%) of packets.
*/
#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
#define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000
/**
* Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be
* processed within cutoff time. Separate totals are kept for each type but
* the limit is the same for both.
*
* This limits how often this peer will compute statistical estimates
* of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to
* CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent
* this from being useful for DOS amplification attacks.
*/
#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 128
/**
* Path choice history window size. This is used to keep track of which paths were
* previously selected so that we can maintain a target allocation over time.
*/
#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128
/**
* How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL
* since we will record a 0 bit/s measurement if no valid latency measurement was made within this
* window of time.
*/
#define ZT_PATH_LATENCY_SAMPLE_INTERVAL (ZT_MULTIPATH_PEER_PING_PERIOD * 2)
/**
* Interval used for rate-limiting the computation of path quality estimates.
*/
#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000
/**
* Number of samples to consider when computing real-time path statistics
*/
#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128
/**
* Number of samples to consider when computing performing long-term path quality analysis.
* By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can
* be set to any value greater than that to observe longer-term path quality behavior.
*/
#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ
/**
* Maximum acceptable Packet Delay Variance (PDV) over a path
*/
#define ZT_PATH_MAX_PDV 1000
/**
* Maximum acceptable time interval between expectation and receipt of at least one ACK over a path
*/
#define ZT_PATH_MAX_AGE 30000
/**
* Maximum acceptable mean latency over a path
*/
#define ZT_PATH_MAX_MEAN_LATENCY 1000
/**
* How much each factor contributes to the "stability" score of a path
*/
#define ZT_PATH_CONTRIB_PDV (1.0 / 3.0)
#define ZT_PATH_CONTRIB_LATENCY (1.0 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0)
/**
* How much each factor contributes to the "quality" score of a path
*/
#define ZT_PATH_CONTRIB_STABILITY (0.75 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0)
#define ZT_PATH_CONTRIB_SCOPE (0.75 / 3.0)
/**
* How often a QoS packet is sent
*/
#define ZT_PATH_QOS_INTERVAL 3000
/**
* Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1
#define ZT_PATH_MAX_QOS_PACKET_SZ 1400
/**
* How many ID:sojourn time pairs in a single QoS packet
*/
#define ZT_PATH_QOS_TABLE_SIZE ((ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 16))
/**
* Maximum number of outgoing packets we monitor for QoS information
*/
#define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128
/**
* Timeout for QoS records
*/
#define ZT_PATH_QOS_TIMEOUT (ZT_PATH_QOS_INTERVAL * 2)
/**
* How often the service tests the path throughput
*/
#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL (ZT_PATH_ACK_INTERVAL * 8)
/**
* Minimum amount of time between each ACK packet
*/
#define ZT_PATH_ACK_INTERVAL 1000
/**
* How often an aggregate link statistics report is emitted into this tracing system
*/
#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 60000
/**
* How much an aggregate link's component paths can vary from their target allocation
* before the link is considered to be in a state of imbalance.
*/
#define ZT_PATH_IMBALANCE_THRESHOLD 0.20
/**
* Max allowable time spent in any queue
*/
#define ZT_QOS_TARGET 5 // ms
/**
* Time period where the time spent in the queue by a packet should fall below
* target at least once
*/
#define ZT_QOS_INTERVAL 100 // ms
/**
* The number of bytes that each queue is allowed to send during each DRR cycle.
* This approximates a single-byte-based fairness queuing scheme
*/
#define ZT_QOS_QUANTUM ZT_DEFAULT_MTU
/**
* The maximum total number of packets that can be queued among all
* active/inactive, old/new queues
*/
#define ZT_QOS_MAX_ENQUEUED_PACKETS 1024
/**
* Number of QoS queues (buckets)
*/
#define ZT_QOS_NUM_BUCKETS 9
/**
* All unspecified traffic is put in this bucket. Anything in a bucket with a smaller
* value is de-prioritized. Anything in a bucket with a higher value is prioritized over
* other traffic.
*/
#define ZT_QOS_DEFAULT_BUCKET 0
/**
* How frequently to send heartbeats over in-use paths
*/
@ -446,16 +268,6 @@
*/
#define ZT_PEER_PING_PERIOD 60000
/**
* Delay between full-fledge pings of directly connected peers.
*
* With multipath bonding enabled ping peers more often to measure
* packet loss and latency. This uses more bandwidth so is disabled
* by default to avoid increasing idle bandwidth use for regular
* links.
*/
#define ZT_MULTIPATH_PEER_PING_PERIOD 5000
/**
* Paths are considered expired if they have not sent us a real packet in this long
*/
@ -466,6 +278,205 @@
*/
#define ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD (ZT_PEER_PING_PERIOD * 10)
/**
* Outgoing packets are only used for QoS/ACK statistical sampling if their
* packet ID is divisible by this integer. This is to provide a mechanism for
* both peers to agree on which packets need special treatment without having
* to exchange information. Changing this value would be a breaking change and
* would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the
* form:
*
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output
* we will sample 1/16th (or ~6.25%) of packets.
*/
#define ZT_QOS_ACK_DIVISOR 0x2
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
#define ZT_QOS_ACK_CUTOFF_TIME 30000
/**
* Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be
* processed within cutoff time. Separate totals are kept for each type but
* the limit is the same for both.
*
* This limits how often this peer will compute statistical estimates
* of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to
* CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent
* this from being useful for DOS amplification attacks.
*/
#define ZT_QOS_ACK_CUTOFF_LIMIT 128
/**
* Minimum acceptable size for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_QOS_MIN_PACKET_SIZE (8 + 1)
/**
* Maximum acceptable size for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_QOS_MAX_PACKET_SIZE 1400
/**
* How many ID:sojourn time pairs are in a single QoS packet
*/
#define ZT_QOS_TABLE_SIZE ((ZT_QOS_MAX_PACKET_SIZE * 8) / (64 + 16))
/**
* Maximum number of outgoing packets we monitor for QoS information
*/
#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16)
/**
* Interval used for rate-limiting the computation of path quality estimates.
*/
#define ZT_QOS_COMPUTE_INTERVAL 1000
/**
* Number of samples to consider when processing real-time path statistics
*/
#define ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE 32
/**
* Max allowable time spent in any queue (in ms)
*/
#define ZT_AQM_TARGET 5
/**
* Time period where the time spent in the queue by a packet should fall below.
* target at least once. (in ms)
*/
#define ZT_AQM_INTERVAL 100
/**
* The number of bytes that each queue is allowed to send during each DRR cycle.
* This approximates a single-byte-based fairness queuing scheme.
*/
#define ZT_AQM_QUANTUM ZT_DEFAULT_MTU
/**
* The maximum total number of packets that can be queued among all
* active/inactive, old/new queues.
*/
#define ZT_AQM_MAX_ENQUEUED_PACKETS 1024
/**
* Number of QoS queues (buckets)
*/
#define ZT_AQM_NUM_BUCKETS 9
/**
* All unspecified traffic is put in this bucket. Anything in a bucket with a
* smaller value is deprioritized. Anything in a bucket with a higher value is
prioritized over other traffic.
*/
#define ZT_AQM_DEFAULT_BUCKET 0
/**
* How long before we consider a path to be dead in the general sense. This is
* used while searching for default or alternative paths to try in the absence
* of direct guidance from the user or a selection policy.
*/
#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000
/**
* How often flows are evaluated
*/
#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000
/**
* How long before we consider a flow to be dead and remove it from the
* policy's list.
*/
#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL 30000
/**
* How often a flow's statistical counters are reset
*/
#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL
/**
* Maximum number of flows allowed before we start forcibly forgetting old ones
*/
#define ZT_FLOW_MAX_COUNT (1024*64)
/**
* How often flows are rebalanced across link (if at all)
*/
#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000
/**
* How often flows are rebalanced across link (if at all)
*/
#define ZT_FLOW_REBALANCE_INTERVAL 5000
/**
* A defensive timer to prevent path quality metrics from being
* processed too often.
*/
#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY
/**
* How often a bonding policy's background tasks are processed,
* some need more frequent attention than others.
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY
/**
* Minimum amount of time (since a previous transition) before the active-backup bonding
* policy is allowed to transition to a different link. Only valid for active-backup.
*/
#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000
/**
* How often a peer checks that incoming (and outgoing) traffic on a bonded link is
* appropriately paired.
*/
#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000
/**
* Time horizon for path negotiation paths cutoff
*/
#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000
/**
* Maximum number of path negotiations within cutoff time
*
* This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses
* per CUTOFF_TIME milliseconds per peer to prevent this from being
* useful for DOS amplification attacks.
*/
#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8
/**
* How many times a peer will attempt to petition another peer to synchronize its
* traffic to the same path before giving up and surrendering to the other peer's preference.
*/
#define ZT_PATH_NEGOTIATION_TRY_COUNT 3
/**
* How much greater the quality of a path should be before an
* optimization procedure triggers a switch.
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10
/**
* Artificially inflates the failover score for paths which meet
* certain non-performance-related policy ranking criteria.
*/
#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500
#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000
#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000
/**
* An indicator that no flow is to be associated with the given packet
*/
#define ZT_QOS_NO_FLOW -1
/**
* Timeout for overall peer activity (measured from last receive)
*/
@ -538,6 +549,27 @@
*/
#define ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME 30000
/**
* Drainage constants for VERB_ECHO rate-limiters
*/
#define ZT_ECHO_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS)
#define ZT_ECHO_DRAINAGE_DIVISOR (1000 / ZT_ECHO_CUTOFF_LIMIT)
/**
* Drainage constants for VERB_QOS rate-limiters
*/
#define ZT_QOS_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS)
#define ZT_QOS_DRAINAGE_DIVISOR (1000 / ZT_QOS_CUTOFF_LIMIT)
/**
* Drainage constants for VERB_ACK rate-limiters
*/
#define ZT_ACK_CUTOFF_LIMIT 128
#define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT)
#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000
#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000
/**
* Maximum number of direct path pushes within cutoff time
*

124
node/Flow.hpp Normal file
View File

@ -0,0 +1,124 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_FLOW_HPP
#define ZT_FLOW_HPP
#include "Path.hpp"
#include "SharedPtr.hpp"
namespace ZeroTier {
/**
* A protocol flow that is identified by the origin and destination port.
*/
struct Flow
{
/**
* @param flowId Given flow ID
* @param now Current time
*/
Flow(int32_t flowId, int64_t now) :
_flowId(flowId),
_bytesInPerUnitTime(0),
_bytesOutPerUnitTime(0),
_lastActivity(now),
_lastPathReassignment(0),
_assignedPath(SharedPtr<Path>())
{}
/**
* Reset flow statistics
*/
void resetByteCounts()
{
_bytesInPerUnitTime = 0;
_bytesOutPerUnitTime = 0;
}
/**
* @return The Flow's ID
*/
int32_t id() { return _flowId; }
/**
* @return Number of incoming bytes processed on this flow per unit time
*/
int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; }
/**
* Record number of incoming bytes on this flow
*
* @param bytes Number of incoming bytes
*/
void recordIncomingBytes(uint64_t bytes) { _bytesInPerUnitTime += bytes; }
/**
* @return Number of outgoing bytes processed on this flow per unit time
*/
int64_t bytesOutPerUnitTime() { return _bytesOutPerUnitTime; }
/**
* Record number of outgoing bytes on this flow
*
* @param bytes
*/
void recordOutgoingBytes(uint64_t bytes) { _bytesOutPerUnitTime += bytes; }
/**
* @return The total number of bytes processed on this flow
*/
uint64_t totalBytes() { return _bytesInPerUnitTime + _bytesOutPerUnitTime; }
/**
* How long since a packet was sent or received in this flow
*
* @param now Current time
* @return The age of the flow in terms of last recorded activity
*/
int64_t age(int64_t now) { return now - _lastActivity; }
/**
* Record that traffic was processed on this flow at the given time.
*
* @param now Current time
*/
void updateActivity(int64_t now) { _lastActivity = now; }
/**
* @return Path assigned to this flow
*/
SharedPtr<Path> assignedPath() { return _assignedPath; }
/**
* @param path Assigned path over which this flow should be handled
*/
void assignPath(const SharedPtr<Path> &path, int64_t now) {
_assignedPath = path;
_lastPathReassignment = now;
}
AtomicCounter __refCount;
int32_t _flowId;
uint64_t _bytesInPerUnitTime;
uint64_t _bytesOutPerUnitTime;
int64_t _lastActivity;
int64_t _lastPathReassignment;
SharedPtr<Path> _assignedPath;
SharedPtr<Path> _previouslyAssignedPath;
};
} // namespace ZeroTier
#endif

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -35,10 +35,12 @@
#include "Tag.hpp"
#include "Revocation.hpp"
#include "Trace.hpp"
#include "Path.hpp"
#include "Bond.hpp"
namespace ZeroTier {
bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId)
{
const Address sourceAddress(source());
@ -67,7 +69,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
if (!trusted) {
if (!dearmor(peer->key())) {
RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC");
_path->recordInvalidPacket();
peer->recordIncomingInvalidPacket(_path);
return true;
}
}
@ -78,11 +80,12 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
}
const Packet::Verb v = verb();
bool r = true;
switch(v) {
//case Packet::VERB_NOP:
default: // ignore unknown verbs, but if they pass auth check they are "received"
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
break;
case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break;
case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break;
@ -91,8 +94,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break;
case Packet::VERB_WHOIS: r = _doWHOIS(RR,tPtr,peer); break;
case Packet::VERB_RENDEZVOUS: r = _doRENDEZVOUS(RR,tPtr,peer); break;
case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer); break;
case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer); break;
case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer,flowId); break;
case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer,flowId); break;
case Packet::VERB_ECHO: r = _doECHO(RR,tPtr,peer); break;
case Packet::VERB_MULTICAST_LIKE: r = _doMULTICAST_LIKE(RR,tPtr,peer); break;
case Packet::VERB_NETWORK_CREDENTIALS: r = _doNETWORK_CREDENTIALS(RR,tPtr,peer); break;
@ -103,6 +106,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
case Packet::VERB_PUSH_DIRECT_PATHS: r = _doPUSH_DIRECT_PATHS(RR,tPtr,peer); break;
case Packet::VERB_USER_MESSAGE: r = _doUSER_MESSAGE(RR,tPtr,peer); break;
case Packet::VERB_REMOTE_TRACE: r = _doREMOTE_TRACE(RR,tPtr,peer); break;
case Packet::VERB_PATH_NEGOTIATION_REQUEST: r = _doPATH_NEGOTIATION_REQUEST(RR,tPtr,peer); break;
}
if (r) {
RR->node->statsLogVerb((unsigned int)v,(unsigned int)size());
@ -113,9 +117,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
RR->sw->requestWhois(tPtr,RR->node->now(),sourceAddress);
return false;
}
} catch (int ztExcCode) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()");
return true;
} catch ( ... ) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()");
return true;
@ -193,59 +194,61 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar
default: break;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateACK(RR->node->now()))
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGateACK(RR->node->now())) {
return true;
}
/* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known
* maximums and detect packet loss. */
if (peer->localMultipathSupport()) {
int32_t ackedBytes;
if (payloadLength() != sizeof(ackedBytes)) {
return true; // ignore
}
memcpy(&ackedBytes, payload(), sizeof(ackedBytes));
_path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes));
peer->inferRemoteMultipathEnabled();
int32_t ackedBytes;
if (payloadLength() != sizeof(ackedBytes)) {
return true; // ignore
}
memcpy(&ackedBytes, payload(), sizeof(ackedBytes));
if (bond) {
bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes));
}
return true;
}
bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateQoS(RR->node->now()))
SharedPtr<Bond> bond = peer->bond();
/* TODO: Fix rate gate issue
if (!bond || !bond->rateGateQoS(RR->node->now())) {
return true;
}
*/
/* Dissect incoming QoS packet. From this we can compute latency values and their variance.
* The latency variance is used as a measure of "jitter". */
if (peer->localMultipathSupport()) {
if (payloadLength() > ZT_PATH_MAX_QOS_PACKET_SZ || payloadLength() < ZT_PATH_MIN_QOS_PACKET_SZ) {
return true; // ignore
}
const int64_t now = RR->node->now();
uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE];
uint16_t rx_ts[ZT_PATH_QOS_TABLE_SIZE];
char *begin = (char *)payload();
char *ptr = begin;
int count = 0;
int len = payloadLength();
// Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet
while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) {
memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t));
ptr+=sizeof(uint64_t);
memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t));
ptr+=sizeof(uint16_t);
count++;
}
_path->receivedQoS(now, count, rx_id, rx_ts);
peer->inferRemoteMultipathEnabled();
if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) {
return true; // ignore
}
const int64_t now = RR->node->now();
uint64_t rx_id[ZT_QOS_TABLE_SIZE];
uint16_t rx_ts[ZT_QOS_TABLE_SIZE];
char *begin = (char *)payload();
char *ptr = begin;
int count = 0;
unsigned int len = payloadLength();
// Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet
while (ptr < (begin + len) && (count < ZT_QOS_TABLE_SIZE)) {
memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t));
ptr+=sizeof(uint64_t);
memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t));
ptr+=sizeof(uint16_t);
count++;
}
if (bond) {
bond->receivedQoS(_path, now, count, rx_id, rx_ts);
}
return true;
}
@ -441,11 +444,12 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool
}
outp.setAt<uint16_t>(worldUpdateSizeAt,(uint16_t)(outp.size() - (worldUpdateSizeAt + 2)));
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),now);
peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -493,7 +497,10 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
}
if (!hops()) {
_path->updateLatency((unsigned int)latency,RR->node->now());
SharedPtr<Bond> bond = peer->bond();
if (!bond) {
_path->updateLatency((unsigned int)latency,RR->node->now());
}
}
peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision);
@ -522,8 +529,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
if (network) {
const MulticastGroup mg(MAC(field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_MAC,6),6),at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_ADI));
const unsigned int count = at<uint16_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 4);
if (((ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6) + (count * 5)) <= size())
RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS));
RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS));
}
} break;
@ -556,7 +562,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
default: break;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW);
return true;
}
@ -591,7 +597,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -615,13 +621,108 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId)
{
int32_t _flowId = ZT_QOS_NO_FLOW;
SharedPtr<Bond> bond = peer->bond();
if (bond && bond->flowHashingEnabled()) {
if (size() > ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD) {
const unsigned int etherType = at<uint16_t>(ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE);
const unsigned int frameLen = size() - ZT_PROTO_VERB_FRAME_IDX_PAYLOAD;
const uint8_t *const frameData = reinterpret_cast<const uint8_t *>(data()) + ZT_PROTO_VERB_FRAME_IDX_PAYLOAD;
if (etherType == ZT_ETHERTYPE_IPV4 && (frameLen >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
uint8_t proto = (reinterpret_cast<const uint8_t *>(frameData)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(frameData)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
//flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (frameLen > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
_flowId = dstPort ^ srcPort ^ proto;
}
break;
}
}
if (etherType == ZT_ETHERTYPE_IPV6 && (frameLen >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)frameData, frameLen, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
//flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (frameLen > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
_flowId = dstPort ^ srcPort ^ proto;
}
break;
default:
break;
}
}
}
}
const uint64_t nwid = at<uint64_t>(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID);
const SharedPtr<Network> network(RR->node->network(nwid));
bool trustEstablished = false;
@ -641,13 +742,12 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar
return false;
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid,_flowId);
return true;
}
bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId)
{
const uint64_t nwid = at<uint64_t>(ZT_PROTO_VERB_EXT_FRAME_IDX_NETWORK_ID);
const SharedPtr<Network> network(RR->node->network(nwid));
@ -676,7 +776,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen);
if ((!from)||(from == network->mac())) {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
@ -687,19 +787,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
network->learnBridgeRoute(from,peer->address());
} else {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
} else if (to != network->mac()) {
if (to.isMulticast()) {
if (network->config().multicastLimit == 0) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
} else if (!network->config().permitsBridging(RR->identity.address())) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
}
@ -715,13 +815,15 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
outp.append((uint8_t)Packet::VERB_EXT_FRAME);
outp.append((uint64_t)packetId());
outp.append((uint64_t)nwid);
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId);
} else {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid,flowId);
}
return true;
@ -729,8 +831,10 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateEchoRequest(RR->node->now()))
uint64_t now = RR->node->now();
if (!peer->rateGateEchoRequest(now)) {
return true;
}
const uint64_t pid = packetId();
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK);
@ -738,10 +842,11 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share
outp.append((uint64_t)pid);
if (size() > ZT_PACKET_IDX_PAYLOAD)
outp.append(reinterpret_cast<const unsigned char *>(data()) + ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD);
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -767,7 +872,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c
RR->mc->add(tPtr,now,nwid,MulticastGroup(MAC(field(ptr + 8,6),6),at<uint32_t>(ptr + 14)),peer->address());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -889,7 +994,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0,ZT_QOS_NO_FLOW);
return true;
}
@ -915,7 +1020,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW);
return true;
}
@ -931,12 +1036,14 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c
outp.append((uint64_t)packetId());
outp.append((uint64_t)network->id());
outp.append((uint64_t)configUpdateId);
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0,ZT_QOS_NO_FLOW);
return true;
}
@ -979,12 +1086,13 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr
outp.append((uint32_t)mg.adi());
const unsigned int gatheredLocally = RR->mc->gather(peer->address(),nwid,mg,outp,gatherLimit);
if (gatheredLocally > 0) {
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),now);
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid,ZT_QOS_NO_FLOW);
return true;
}
@ -1032,19 +1140,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
if (network->config().multicastLimit == 0) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW);
return true;
}
if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) {
if (!to.mac().isMulticast()) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
if ((!from)||(from.isMulticast())||(from == network->mac())) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
@ -1058,7 +1166,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
network->learnBridgeRoute(from,peer->address());
} else {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
}
@ -1076,12 +1184,14 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
outp.append((uint32_t)to.adi());
outp.append((unsigned char)0x02); // flag 0x02 = contains gather results
if (RR->mc->gather(peer->address(),nwid,to,outp,gatherLimit)) {
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW);
} else {
_sendErrorNeedCredentials(RR,tPtr,peer,nwid);
return false;
@ -1094,9 +1204,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
{
const int64_t now = RR->node->now();
// First, subject this to a rate limit
if (!peer->rateGatePushDirectPaths(now)) {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1108,8 +1217,6 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2;
while (count--) { // if ptr overflows Buffer will throw
// TODO: some flags are not yet implemented
unsigned int flags = (*this)[ptr++];
unsigned int extLen = at<uint16_t>(ptr); ptr += 2;
ptr += extLen; // unused right now
@ -1132,6 +1239,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
}
} break;
case 6: {
const InetAddress a(field(ptr,16),16,at<uint16_t>(ptr + 16));
if (
((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget
@ -1149,7 +1257,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
ptr += addrLen;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1165,7 +1273,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con
RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast<const void *>(&um));
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1189,11 +1297,29 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
uint64_t now = RR->node->now();
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGatePathNegotiation(now)) {
return true;
}
if (payloadLength() != sizeof(int16_t)) {
return true;
}
int16_t remoteUtility = 0;
memcpy(&remoteUtility, payload(), sizeof(int16_t));
if (peer->bond()) {
peer->bond()->processIncomingPathNegotiationRequest(now, _path, Utils::ntoh(remoteUtility));
}
return true;
}
void IncomingPacket::_sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,const uint64_t nwid)
{
Packet outp(source(),RR->identity.address(),Packet::VERB_ERROR);

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -100,7 +100,7 @@ public:
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @return True if decoding and processing is complete, false if caller should try again
*/
bool tryDecode(const RuntimeEnvironment *RR,void *tPtr);
bool tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId);
/**
* @return Time of packet receipt / start of decode
@ -117,8 +117,8 @@ private:
bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId);
bool _doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
@ -129,6 +129,7 @@ private:
bool _doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
void _sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,const uint64_t nwid);

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -48,6 +48,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
_networks(8),
_now(now),
_lastPingCheck(0),
_lastGratuitousPingCheck(0),
_lastHousekeepingRun(0),
_lastMemoizedTraceSettings(0)
{
@ -102,8 +103,9 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0);
const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0);
const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0);
const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0);
m = reinterpret_cast<char *>(::malloc(16 + ts + sws + mcs + topologys + sas));
m = reinterpret_cast<char *>(::malloc(16 + ts + sws + mcs + topologys + sas + bc));
if (!m)
throw std::bad_alloc();
RR->rtmem = m;
@ -118,12 +120,15 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
RR->topology = new (m) Topology(RR,tptr);
m += topologys;
RR->sa = new (m) SelfAwareness(RR);
m += sas;
RR->bc = new (m) BondController(RR);
} catch ( ... ) {
if (RR->sa) RR->sa->~SelfAwareness();
if (RR->topology) RR->topology->~Topology();
if (RR->mc) RR->mc->~Multicaster();
if (RR->sw) RR->sw->~Switch();
if (RR->t) RR->t->~Trace();
if (RR->bc) RR->bc->~BondController();
::free(m);
throw;
}
@ -142,6 +147,7 @@ Node::~Node()
if (RR->mc) RR->mc->~Multicaster();
if (RR->sw) RR->sw->~Switch();
if (RR->t) RR->t->~Trace();
if (RR->bc) RR->bc->~BondController();
::free(RR->rtmem);
}
@ -246,9 +252,23 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64
_now = now;
Mutex::Lock bl(_backgroundTasksLock);
unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY;
if (RR->bc->inUse()) {
// Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled)
bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL);
if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) {
Hashtable< Address,std::vector<InetAddress> > alwaysContact;
_PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now);
RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc);
_lastGratuitousPingCheck = now;
}
RR->bc->processBackgroundTasks(tptr, now);
}
unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL;
const int64_t timeSinceLastPingCheck = now - _lastPingCheck;
if (timeSinceLastPingCheck >= ZT_PING_CHECK_INVERVAL) {
if (timeSinceLastPingCheck >= timeUntilNextPingCheck) {
try {
_lastPingCheck = now;
@ -354,7 +374,7 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64
}
try {
*nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY);
*nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(bondCheckInterval,std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now))),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY);
} catch ( ... ) {
return ZT_RESULT_FATAL_ERROR_INTERNAL;
}
@ -461,7 +481,7 @@ ZT_PeerList *Node::peers() const
for(std::vector< std::pair< Address,SharedPtr<Peer> > >::iterator pi(peers.begin());pi!=peers.end();++pi) {
ZT_Peer *p = &(pl->peers[pl->peerCount++]);
p->address = pi->second->address().toInt();
p->hadAggregateLink = 0;
p->isBonded = 0;
if (pi->second->remoteVersionKnown()) {
p->versionMajor = pi->second->remoteVersionMajor();
p->versionMinor = pi->second->remoteVersionMinor();
@ -478,28 +498,22 @@ ZT_PeerList *Node::peers() const
std::vector< SharedPtr<Path> > paths(pi->second->paths(_now));
SharedPtr<Path> bestp(pi->second->getAppropriatePath(_now,false));
p->hadAggregateLink |= pi->second->hasAggregateLink();
p->pathCount = 0;
for(std::vector< SharedPtr<Path> >::iterator path(paths.begin());path!=paths.end();++path) {
memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage));
p->paths[p->pathCount].localSocket = (*path)->localSocket();
p->paths[p->pathCount].lastSend = (*path)->lastOut();
p->paths[p->pathCount].lastReceive = (*path)->lastIn();
p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address());
p->paths[p->pathCount].expired = 0;
p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0;
p->paths[p->pathCount].latency = (float)(*path)->latency();
p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance();
p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient();
p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio();
p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio();
p->paths[p->pathCount].stability = (*path)->lastComputedStability();
p->paths[p->pathCount].throughput = (*path)->meanThroughput();
p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput();
p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255;
p->paths[p->pathCount].ifname = (*path)->getName();
p->paths[p->pathCount].scope = (*path)->ipScope();
++p->pathCount;
}
if (pi->second->bond()) {
p->isBonded = pi->second->bond();
p->bondingPolicy = pi->second->bond()->getPolicy();
}
}
return pl;

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -34,6 +34,7 @@
#include "Salsa20.hpp"
#include "NetworkController.hpp"
#include "Hashtable.hpp"
#include "BondController.hpp"
// Bit mask for "expecting reply" hash
#define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255
@ -186,6 +187,8 @@ public:
inline const Identity &identity() const { return _RR.identity; }
inline BondController *bondController() const { return _RR.bc; }
/**
* Register that we are expecting a reply to a packet ID
*
@ -247,9 +250,6 @@ public:
inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; }
inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; }
inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; }
inline uint8_t getMultipathMode() { return _multipathMode; }
inline bool localControllerHasAuthorized(const int64_t now,const uint64_t nwid,const Address &addr) const
{
_localControllerAuthorizations_m.lock();
@ -306,10 +306,9 @@ private:
Address _remoteTraceTarget;
enum Trace::Level _remoteTraceLevel;
uint8_t _multipathMode;
volatile int64_t _now;
int64_t _lastPingCheck;
int64_t _lastGratuitousPingCheck;
int64_t _lastHousekeepingRun;
int64_t _lastMemoizedTraceSettings;
volatile int64_t _prngState[2];

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -56,7 +56,7 @@
* + Inline push of CertificateOfMembership deprecated
* 9 - 1.2.0 ... 1.2.14
* 10 - 1.4.0 ... CURRENT
* + Multipath capability and load balancing
* + Multipath capability and load balancing (tentative)
*/
#define ZT_PROTO_VERSION 10
@ -931,13 +931,13 @@ public:
*
* Upon receipt of this packet, the local peer will verify that the correct
* number of bytes were received by the remote peer. If these values do
* not agree that could be an indicator of packet loss.
* not agree that could be an indication of packet loss.
*
* Additionally, the local peer knows the interval of time that has
* elapsed since the last received ACK. With this information it can compute
* a rough estimate of the current throughput.
*
* This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL
* This is sent at a maximum rate of once per every ZT_QOS_ACK_INTERVAL
*/
VERB_ACK = 0x12,
@ -963,7 +963,8 @@ public:
* measure of the amount of time between when a packet was received and the
* egress time of its tracking QoS packet.
*
* This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL
* This is sent at a maximum rate of once per every
* ZT_QOS_MEASUREMENT_INTERVAL
*/
VERB_QOS_MEASUREMENT = 0x13,
@ -996,7 +997,34 @@ public:
* node on startup. This is helpful in identifying traces from different
* members of a cluster.
*/
VERB_REMOTE_TRACE = 0x15
VERB_REMOTE_TRACE = 0x15,
/**
* A request to a peer to use a specific path in a multi-path scenario:
* <[2] 16-bit unsigned integer that encodes a path choice utility>
*
* This is sent when a node operating in multipath mode observes that
* its inbound and outbound traffic aren't going over the same path. The
* node will compute its perceived utility for using its chosen outbound
* path and send this to a peer in an attempt to petition it to send
* its traffic over this same path.
*
* Scenarios:
*
* (1) Remote peer utility is GREATER than ours:
* - Remote peer will refuse the petition and continue using current path
* (2) Remote peer utility is LESS than than ours:
* - Remote peer will accept the petition and switch to our chosen path
* (3) Remote peer utility is EQUAL to our own:
* - To prevent confusion and flapping, both side will agree to use the
* numerical values of their identities to determine which path to use.
* The peer with the greatest identity will win.
*
* If a node petitions a peer repeatedly with no effect it will regard
* that as a refusal by the remote peer, in this case if the utility is
* negligible it will voluntarily switch to the remote peer's chosen path.
*/
VERB_PATH_NEGOTIATION_REQUEST = 0x16
};
/**

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -26,10 +26,10 @@
#include "SharedPtr.hpp"
#include "AtomicCounter.hpp"
#include "Utils.hpp"
#include "RingBuffer.hpp"
#include "Packet.hpp"
#include "RingBuffer.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Link.hpp"
/**
* Maximum return value of preferenceRank()
@ -46,7 +46,7 @@ class RuntimeEnvironment;
class Path
{
friend class SharedPtr<Path>;
Phy<Path *> *_phy;
friend class Bond;
public:
/**
@ -85,77 +85,111 @@ public:
_lastOut(0),
_lastIn(0),
_lastTrustEstablishedPacketReceived(0),
_lastPathQualityComputeTime(0),
_localSocket(-1),
_latency(0xffff),
_addr(),
_ipScope(InetAddress::IP_SCOPE_NONE),
_lastAck(0),
_lastThroughputEstimation(0),
_lastAckReceived(0),
_lastAckSent(0),
_lastQoSMeasurement(0),
_lastQoSRecordPurge(0),
_lastThroughputEstimation(0),
_lastRefractoryUpdate(0),
_lastAliveToggle(0),
_lastEligibilityState(false),
_lastTrialBegin(0),
_refractoryPeriod(0),
_monitorInterval(0),
_upDelay(0),
_downDelay(0),
_ipvPref(0),
_mode(0),
_onlyPathOnLink(false),
_enabled(false),
_bonded(false),
_negotiated(false),
_deprecated(false),
_shouldReallocateFlows(false),
_assignedFlowCount(0),
_latencyMean(0),
_latencyVariance(0),
_packetLossRatio(0),
_packetErrorRatio(0),
_throughputMean(0),
_throughputMax(0),
_throughputVariance(0),
_allocation(0),
_byteLoad(0),
_relativeByteLoad(0),
_affinity(0),
_failoverScore(0),
_unackedBytes(0),
_expectingAckAsOf(0),
_packetsReceivedSinceLastAck(0),
_packetsReceivedSinceLastQoS(0),
_maxLifetimeThroughput(0),
_lastComputedMeanThroughput(0),
_bytesAckedSinceLastThroughputEstimation(0),
_lastComputedMeanLatency(0.0),
_lastComputedPacketDelayVariance(0.0),
_lastComputedPacketErrorRatio(0.0),
_lastComputedPacketLossRatio(0),
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0)
{
memset(_ifname, 0, 16);
memset(_addrString, 0, sizeof(_addrString));
}
_packetsIn(0),
_packetsOut(0)
{}
Path(const int64_t localSocket,const InetAddress &addr) :
_lastOut(0),
_lastIn(0),
_lastTrustEstablishedPacketReceived(0),
_lastPathQualityComputeTime(0),
_localSocket(localSocket),
_latency(0xffff),
_addr(addr),
_ipScope(addr.ipScope()),
_lastAck(0),
_lastThroughputEstimation(0),
_lastAckReceived(0),
_lastAckSent(0),
_lastQoSMeasurement(0),
_lastQoSRecordPurge(0),
_lastThroughputEstimation(0),
_lastRefractoryUpdate(0),
_lastAliveToggle(0),
_lastEligibilityState(false),
_lastTrialBegin(0),
_refractoryPeriod(0),
_monitorInterval(0),
_upDelay(0),
_downDelay(0),
_ipvPref(0),
_mode(0),
_onlyPathOnLink(false),
_enabled(false),
_bonded(false),
_negotiated(false),
_deprecated(false),
_shouldReallocateFlows(false),
_assignedFlowCount(0),
_latencyMean(0),
_latencyVariance(0),
_packetLossRatio(0),
_packetErrorRatio(0),
_throughputMean(0),
_throughputMax(0),
_throughputVariance(0),
_allocation(0),
_byteLoad(0),
_relativeByteLoad(0),
_affinity(0),
_failoverScore(0),
_unackedBytes(0),
_expectingAckAsOf(0),
_packetsReceivedSinceLastAck(0),
_packetsReceivedSinceLastQoS(0),
_maxLifetimeThroughput(0),
_lastComputedMeanThroughput(0),
_bytesAckedSinceLastThroughputEstimation(0),
_lastComputedMeanLatency(0.0),
_lastComputedPacketDelayVariance(0.0),
_lastComputedPacketErrorRatio(0.0),
_lastComputedPacketLossRatio(0),
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0)
{
memset(_ifname, 0, 16);
memset(_addrString, 0, sizeof(_addrString));
if (_localSocket != -1) {
_phy->getIfName((PhySocket *) ((uintptr_t) _localSocket), _ifname, 16);
}
}
_packetsIn(0),
_packetsOut(0)
{}
/**
* Called when a packet is received from this remote path, regardless of content
*
* @param t Time of receive
*/
inline void received(const uint64_t t) { _lastIn = t; }
inline void received(const uint64_t t) {
if (!alive(t,_bonded)) {
_lastAliveToggle = _lastIn;
}
_lastIn = t;
}
/**
* Set time last trusted packet was received (done in Peer::received())
@ -195,7 +229,6 @@ public:
else {
_latency = l;
}
_latencySamples.push(l);
}
/**
@ -284,341 +317,32 @@ public:
}
/**
* Record statistics on outgoing packets. Used later to estimate QoS metrics.
*
* @param now Current time
* @param packetId ID of packet
* @param payloadLength Length of payload
* @param verb Packet verb
* @param bonded Whether this path is part of a bond.
*/
inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb)
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_unackedBytes += payloadLength;
// Take note that we're expecting a VERB_ACK on this path as of a specific time
_expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now;
if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) {
_outQoSRecords[packetId] = now;
}
}
}
}
inline void setBonded(bool bonded) { _bonded = bonded; }
/**
* Record statistics on incoming packets. Used later to estimate QoS metrics.
*
* @param now Current time
* @param packetId ID of packet
* @param payloadLength Length of payload
* @param verb Packet verb
* @return True if this path is currently part of a bond.
*/
inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb)
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_inACKRecords[packetId] = payloadLength;
_packetsReceivedSinceLastAck++;
_inQoSRecords[packetId] = now;
_packetsReceivedSinceLastQoS++;
}
_packetValiditySamples.push(true);
}
}
/**
* Record that we've received a VERB_ACK on this path, also compute throughput if required.
*
* @param now Current time
* @param ackedBytes Number of bytes acknowledged by other peer
*/
inline void receivedAck(int64_t now, int32_t ackedBytes)
{
_expectingAckAsOf = 0;
_unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes;
int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation);
if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) {
uint64_t throughput = (uint64_t)((float)(_bytesAckedSinceLastThroughputEstimation * 8) / ((float)timeSinceThroughputEstimate / (float)1000));
_throughputSamples.push(throughput);
_maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput;
_lastThroughputEstimation = now;
_bytesAckedSinceLastThroughputEstimation = 0;
} else {
_bytesAckedSinceLastThroughputEstimation += ackedBytes;
}
}
/**
* @return Number of bytes this peer is responsible for ACKing since last ACK
*/
inline int32_t bytesToAck()
{
Mutex::Lock _l(_statistics_m);
int32_t bytesToAck = 0;
std::map<uint64_t,uint16_t>::iterator it = _inACKRecords.begin();
while (it != _inACKRecords.end()) {
bytesToAck += it->second;
it++;
}
return bytesToAck;
}
/**
* @return Number of bytes thus far sent that have not been acknowledged by the remote peer
*/
inline int64_t unackedSentBytes()
{
return _unackedBytes;
}
/**
* Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers
*
* @param Current time
*/
inline void sentAck(int64_t now)
{
Mutex::Lock _l(_statistics_m);
_inACKRecords.clear();
_packetsReceivedSinceLastAck = 0;
_lastAck = now;
}
/**
* Receive QoS data, match with recorded egress times from this peer, compute latency
* estimates.
*
* @param now Current time
* @param count Number of records
* @param rx_id table of packet IDs
* @param rx_ts table of holding times
*/
inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts)
{
Mutex::Lock _l(_statistics_m);
// Look up egress times and compute latency values for each record
std::map<uint64_t,uint64_t>::iterator it;
for (int j=0; j<count; j++) {
it = _outQoSRecords.find(rx_id[j]);
if (it != _outQoSRecords.end()) {
uint16_t rtt = (uint16_t)(now - it->second);
uint16_t rtt_compensated = rtt - rx_ts[j];
uint16_t latency = rtt_compensated / 2;
updateLatency(latency, now);
_outQoSRecords.erase(it);
}
}
}
/**
* Generate the contents of a VERB_QOS_MEASUREMENT packet.
*
* @param now Current time
* @param qosBuffer destination buffer
* @return Size of payload
*/
inline int32_t generateQoSPacket(int64_t now, char *qosBuffer)
{
Mutex::Lock _l(_statistics_m);
int32_t len = 0;
std::map<uint64_t,uint64_t>::iterator it = _inQoSRecords.begin();
int i=0;
while (i<_packetsReceivedSinceLastQoS && it != _inQoSRecords.end()) {
uint64_t id = it->first;
memcpy(qosBuffer, &id, sizeof(uint64_t));
qosBuffer+=sizeof(uint64_t);
uint16_t holdingTime = (uint16_t)(now - it->second);
memcpy(qosBuffer, &holdingTime, sizeof(uint16_t));
qosBuffer+=sizeof(uint16_t);
len+=sizeof(uint64_t)+sizeof(uint16_t);
_inQoSRecords.erase(it++);
i++;
}
return len;
}
/**
* Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers.
*
* @param Current time
*/
inline void sentQoS(int64_t now) {
_packetsReceivedSinceLastQoS = 0;
_lastQoSMeasurement = now;
}
/**
* @param now Current time
* @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time
*/
inline bool needsToSendAck(int64_t now) {
return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL ||
(_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck;
}
/**
* @param now Current time
* @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time
*/
inline bool needsToSendQoS(int64_t now) {
return ((_packetsReceivedSinceLastQoS >= ZT_PATH_QOS_TABLE_SIZE) ||
((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastQoS;
}
/**
* How much time has elapsed since we've been expecting a VERB_ACK on this path. This value
* is used to determine a more relevant path "age". This lets us penalize paths which are no
* longer ACKing, but not those that simple aren't being used to carry traffic at the
* current time.
*/
inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; }
/**
* The maximum observed throughput (in bits/s) for this path
*/
inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; }
/**
* @return The mean throughput (in bits/s) of this link
*/
inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; }
/**
* Assign a new relative quality value for this path in the aggregate link
*
* @param rq Quality of this path in comparison to other paths available to this peer
*/
inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; }
/**
* @return Quality of this path compared to others in the aggregate link
*/
inline float relativeQuality() { return _lastComputedRelativeQuality; }
/**
* Assign a new allocation value for this path in the aggregate link
*
* @param allocation Percentage of traffic to be sent over this path to a peer
*/
inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; }
/**
* @return Percentage of traffic allocated to this path in the aggregate link
*/
inline unsigned char allocation() { return _lastAllocation; }
/**
* @return Stability estimates can become expensive to compute, we cache the most recent result.
*/
inline float lastComputedStability() { return _lastComputedStability; }
/**
* @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to
*/
inline char *getName() { return _ifname; }
/**
* @return Packet delay variance
*/
inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; }
/**
* @return Previously-computed mean latency
*/
inline float meanLatency() { return _lastComputedMeanLatency; }
/**
* @return Packet loss rate (PLR)
*/
inline float packetLossRatio() { return _lastComputedPacketLossRatio; }
/**
* @return Packet error ratio (PER)
*/
inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; }
/**
* Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now
* contribute to a Packet Error Ratio (PER).
*/
inline void recordInvalidPacket() { _packetValiditySamples.push(false); }
/**
* @return A pointer to a cached copy of the address string for this Path (For debugging only)
*/
inline char *getAddressString() { return _addrString; }
/**
* @return The current throughput disturbance coefficient
*/
inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; }
/**
* Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved"
* this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality".
*
* @param now Current time
*/
inline void processBackgroundPathMeasurements(const int64_t now)
{
if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
Mutex::Lock _l(_statistics_m);
_lastPathQualityComputeTime = now;
address().toString(_addrString);
_lastComputedMeanLatency = _latencySamples.mean();
_lastComputedPacketDelayVariance = _latencySamples.stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689)
_lastComputedMeanThroughput = (uint64_t)_throughputSamples.mean();
// If no packet validity samples, assume PER==0
_lastComputedPacketErrorRatio = 1 - (_packetValiditySamples.count() ? _packetValiditySamples.mean() : 1);
// Compute path stability
// Normalize measurements with wildly different ranges into a reasonable range
float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10);
float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10);
float throughput_cv = _throughputSamples.mean() > 0 ? _throughputSamples.stddev() / _throughputSamples.mean() : 1;
// Form an exponential cutoff and apply contribution weights
float pdv_contrib = expf((-1.0f)*normalized_pdv) * (float)ZT_PATH_CONTRIB_PDV;
float latency_contrib = expf((-1.0f)*normalized_la) * (float)ZT_PATH_CONTRIB_LATENCY;
// Throughput Disturbance Coefficient
float throughput_disturbance_contrib = expf((-1.0f)*throughput_cv) * (float)ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE;
_throughputDisturbanceSamples.push(throughput_cv);
_lastComputedThroughputDistCoeff = _throughputDisturbanceSamples.mean();
// Obey user-defined ignored contributions
pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1;
latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1;
throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1;
// Stability
_lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib;
_lastComputedStability *= 1 - _lastComputedPacketErrorRatio;
// Prevent QoS records from sticking around for too long
std::map<uint64_t,uint64_t>::iterator it = _outQoSRecords.begin();
while (it != _outQoSRecords.end()) {
// Time since egress of tracked packet
if ((now - it->second) >= ZT_PATH_QOS_TIMEOUT) {
_outQoSRecords.erase(it++);
} else { it++; }
}
}
}
inline bool bonded() { return _bonded; }
/**
* @return True if this path is alive (receiving heartbeats)
*/
inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); }
inline bool alive(const int64_t now, bool bondingEnabled = false) const {
return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000));
}
/**
* @return True if this path needs a heartbeat
*/
inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); }
/**
* @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency
*/
inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); }
/**
* @return Last time we sent something
*/
@ -629,62 +353,335 @@ public:
*/
inline int64_t lastIn() const { return _lastIn; }
/**
* @return the age of the path in terms of receiving packets
*/
inline int64_t age(int64_t now) { return (now - _lastIn); }
/**
* @return Time last trust-established packet was received
*/
inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; }
/**
* @return Time since last VERB_ACK was received
*/
inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; }
/**
* Set or update a refractory period for the path.
*
* @param punishment How much a path should be punished
* @param pathFailure Whether this call is the result of a recent path failure
*/
inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) {
if (pathFailure) {
unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment;
_refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD);
_lastRefractoryUpdate = 0;
} else {
uint32_t drainRefractory = 0;
if (_lastRefractoryUpdate) {
drainRefractory = (now - _lastRefractoryUpdate);
} else {
drainRefractory = (now - _lastAliveToggle);
}
_lastRefractoryUpdate = now;
if (_refractoryPeriod > drainRefractory) {
_refractoryPeriod -= drainRefractory;
} else {
_refractoryPeriod = 0;
_lastRefractoryUpdate = 0;
}
}
}
/**
* Determine the current state of eligibility of the path.
*
* @param includeRefractoryPeriod Whether current punishment should be taken into consideration
* @return True if this path can be used in a bond at the current time
*/
inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) {
if (includeRefractoryPeriod && _refractoryPeriod) {
return false;
}
bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs)
bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us
bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online
bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period
bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial);
return currEligibility;
}
/**
* Record when this path first entered the bond. Each path is given a trial period where it is admitted
* to the bond without requiring observations to prove its performance or reliability.
*/
inline void startTrial(uint64_t now) { _lastTrialBegin = now; }
/**
* @return True if a path is permitted to be used in a bond (according to user pref.)
*/
inline bool allowed() {
return _enabled
&& (!_ipvPref
|| ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64))
|| ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64)))));
}
/**
* @return True if a path is preferred over another on the same physical link (according to user pref.)
*/
inline bool preferred() {
return _onlyPathOnLink
|| (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46))
|| (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64));
}
/**
* @param now Current time
* @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time
*/
inline bool needsToSendAck(int64_t now, int ackSendInterval) {
return ((now - _lastAckSent) >= ackSendInterval ||
(_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck;
}
/**
* @param now Current time
* @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time
*/
inline bool needsToSendQoS(int64_t now, int qosSendInterval) {
return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) ||
((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS;
}
/**
* Reset packet counters
*/
inline void resetPacketCounts()
{
_packetsIn = 0;
_packetsOut = 0;
}
private:
Mutex _statistics_m;
volatile int64_t _lastOut;
volatile int64_t _lastIn;
volatile int64_t _lastTrustEstablishedPacketReceived;
volatile int64_t _lastPathQualityComputeTime;
int64_t _localSocket;
volatile unsigned int _latency;
InetAddress _addr;
InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often
AtomicCounter __refCount;
std::map<uint64_t,uint64_t> _outQoSRecords; // id:egress_time
std::map<uint64_t,uint64_t> _inQoSRecords; // id:now
std::map<uint64_t,uint16_t> _inACKRecords; // id:len
std::map<uint64_t,uint64_t> qosStatsOut; // id:egress_time
std::map<uint64_t,uint64_t> qosStatsIn; // id:now
std::map<uint64_t,uint16_t> ackStatsIn; // id:len
int64_t _lastAck;
int64_t _lastThroughputEstimation;
int64_t _lastQoSMeasurement;
int64_t _lastQoSRecordPurge;
RingBuffer<int,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> qosRecordSize;
RingBuffer<float,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> qosRecordLossSamples;
RingBuffer<uint64_t,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> throughputSamples;
RingBuffer<bool,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> packetValiditySamples;
RingBuffer<float,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> _throughputVarianceSamples;
RingBuffer<uint16_t,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> latencySamples;
/**
* Last time that a VERB_ACK was received on this path.
*/
uint64_t _lastAckReceived;
/**
* Last time that a VERB_ACK was sent out on this path.
*/
uint64_t _lastAckSent;
/**
* Last time that a VERB_QOS_MEASUREMENT was sent out on this path.
*/
uint64_t _lastQoSMeasurement;
/**
* Last time that the path's throughput was estimated.
*/
uint64_t _lastThroughputEstimation;
/**
* The last time that the refractory period was updated.
*/
uint64_t _lastRefractoryUpdate;
/**
* The last time that the path was marked as "alive".
*/
uint64_t _lastAliveToggle;
/**
* State of eligibility at last check. Used for determining state changes.
*/
bool _lastEligibilityState;
/**
* Timestamp indicating when this path's trial period began.
*/
uint64_t _lastTrialBegin;
/**
* Amount of time that this path will be prevented from becoming a member of a bond.
*/
uint32_t _refractoryPeriod;
/**
* Monitor interval specific to this path or that was inherited from the bond controller.
*/
int32_t _monitorInterval;
/**
* Up delay interval specific to this path or that was inherited from the bond controller.
*/
uint32_t _upDelay;
/**
* Down delay interval specific to this path or that was inherited from the bond controller.
*/
uint32_t _downDelay;
/**
* IP version preference inherited from the physical link.
*/
uint8_t _ipvPref;
/**
* Mode inherited from the physical link.
*/
uint8_t _mode;
/**
* IP version preference inherited from the physical link.
*/
bool _onlyPathOnLink;
/**
* Enabled state inherited from the physical link.
*/
bool _enabled;
/**
* Whether this path is currently part of a bond.
*/
bool _bonded;
/**
* Whether this path was intentionally negotiated by either peer.
*/
bool _negotiated;
/**
* Whether this path has been deprecated due to performance issues. Current traffic flows
* will be re-allocated to other paths in the most non-disruptive manner (if possible),
* and new traffic will not be allocated to this path.
*/
bool _deprecated;
/**
* Whether flows should be moved from this path. Current traffic flows will be re-allocated
* immediately.
*/
bool _shouldReallocateFlows;
/**
* The number of flows currently assigned to this path.
*/
uint16_t _assignedFlowCount;
/**
* The mean latency (computed from a sliding window.)
*/
float _latencyMean;
/**
* Packet delay variance (computed from a sliding window.)
*/
float _latencyVariance;
/**
* The ratio of lost packets to received packets.
*/
float _packetLossRatio;
/**
* The ratio of packets that failed their MAC/CRC checks to those that did not.
*/
float _packetErrorRatio;
/**
* The estimated mean throughput of this path.
*/
uint64_t _throughputMean;
/**
* The maximum observed throughput of this path.
*/
uint64_t _throughputMax;
/**
* The variance in the estimated throughput of this path.
*/
float _throughputVariance;
/**
* The relative quality of this path to all others in the bond, [0-255].
*/
uint8_t _allocation;
/**
* How much load this path is under.
*/
uint64_t _byteLoad;
/**
* How much load this path is under (relative to other paths in the bond.)
*/
uint8_t _relativeByteLoad;
/**
* Relative value expressing how "deserving" this path is of new traffic.
*/
uint8_t _affinity;
/**
* Score that indicates to what degree this path is preferred over others that
* are available to the bonding policy. (specifically for active-backup)
*/
uint32_t _failoverScore;
/**
* Number of bytes thus far sent that have not been acknowledged by the remote peer.
*/
int64_t _unackedBytes;
int64_t _expectingAckAsOf;
int16_t _packetsReceivedSinceLastAck;
int16_t _packetsReceivedSinceLastQoS;
uint64_t _maxLifetimeThroughput;
uint64_t _lastComputedMeanThroughput;
/**
* Number of packets received since the last VERB_ACK was sent to the remote peer.
*/
int32_t _packetsReceivedSinceLastAck;
/**
* Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer.
*/
int32_t _packetsReceivedSinceLastQoS;
/**
* Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput.
*/
uint64_t _bytesAckedSinceLastThroughputEstimation;
float _lastComputedMeanLatency;
float _lastComputedPacketDelayVariance;
float _lastComputedPacketErrorRatio;
float _lastComputedPacketLossRatio;
// cached estimates
float _lastComputedStability;
float _lastComputedRelativeQuality;
float _lastComputedThroughputDistCoeff;
unsigned char _lastAllocation;
// cached human-readable strings for tracing purposes
char _ifname[16];
char _addrString[256];
RingBuffer<uint64_t,ZT_PATH_QUALITY_METRIC_WIN_SZ> _throughputSamples;
RingBuffer<uint32_t,ZT_PATH_QUALITY_METRIC_WIN_SZ> _latencySamples;
RingBuffer<bool,ZT_PATH_QUALITY_METRIC_WIN_SZ> _packetValiditySamples;
RingBuffer<float,ZT_PATH_QUALITY_METRIC_WIN_SZ> _throughputDisturbanceSamples;
/**
* Counters used for tracking path load.
*/
int _packetsIn;
int _packetsOut;
};
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -14,7 +14,6 @@
#include "../version.h"
#include "Constants.hpp"
#include "Peer.hpp"
#include "Node.hpp"
#include "Switch.hpp"
#include "Network.hpp"
#include "SelfAwareness.hpp"
@ -35,20 +34,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_lastTriedMemorizedPath(0),
_lastDirectPathPushSent(0),
_lastDirectPathPushReceive(0),
_lastEchoRequestReceived(0),
_lastCredentialRequestSent(0),
_lastWhoisRequestReceived(0),
_lastEchoRequestReceived(0),
_lastCredentialsReceived(0),
_lastTrustEstablishedPacketReceived(0),
_lastSentFullHello(0),
_lastACKWindowReset(0),
_lastQoSWindowReset(0),
_lastMultipathCompatibilityCheck(0),
_lastEchoCheck(0),
_freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_vProto(0),
_vMajor(0),
_vMinor(0),
@ -56,14 +49,18 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_id(peerIdentity),
_directPathPushCutoffCount(0),
_credentialsCutoffCount(0),
_linkIsBalanced(false),
_linkIsRedundant(false),
_remotePeerMultipathEnabled(false),
_lastAggregateStatsReport(0),
_lastAggregateAllocation(0)
_echoRequestCutoffCount(0),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_shouldCollectPathStatistics(0),
_bondingPolicy(0),
_lastComputedAggregateMeanLatency(0)
{
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH))
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) {
throw ZT_EXCEPTION_INVALID_ARGUMENT;
}
}
void Peer::received(
@ -76,7 +73,8 @@ void Peer::received(
const uint64_t inRePacketId,
const Packet::Verb inReVerb,
const bool trustEstablished,
const uint64_t networkId)
const uint64_t networkId,
const int32_t flowId)
{
const int64_t now = RR->node->now();
@ -93,7 +91,9 @@ void Peer::received(
break;
}
if (trustEstablished) {
recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, flowId, now);
if (trustEstablished) {
_lastTrustEstablishedPacketReceived = now;
path->trustedPacketReceived(now);
}
@ -139,6 +139,9 @@ void Peer::received(
if (q > replacePathQuality) {
replacePathQuality = q;
replacePath = i;
if (!_paths[i].p->alive(now)) {
break; // Stop searching, we found an identical dead path, replace the object
}
}
} else {
replacePath = i;
@ -216,136 +219,15 @@ void Peer::received(
}
}
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId)
{
_freeRandomByte += (unsigned char)(packetId >> 8); // grab entropy to use in path selection logic for multipath
if (_canUseMultipath) {
path->recordOutgoingPacket(now, packetId, payloadLength, verb);
}
}
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
{
if (_canUseMultipath) {
if (path->needsToSendAck(now)) {
sendACK(tPtr, path, path->localSocket(), path->address(), now);
}
path->recordIncomingPacket(now, packetId, payloadLength, verb);
}
}
void Peer::computeAggregateProportionalAllocation(int64_t now)
{
float maxStability = 0;
float totalRelativeQuality = 0;
float maxThroughput = 1;
float maxScope = 0;
float relStability[ZT_MAX_PEER_NETWORK_PATHS];
float relThroughput[ZT_MAX_PEER_NETWORK_PATHS];
memset(&relStability, 0, sizeof(relStability));
memset(&relThroughput, 0, sizeof(relThroughput));
// Survey all paths
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
relStability[i] = _paths[i].p->lastComputedStability();
relThroughput[i] = (float)_paths[i].p->maxLifetimeThroughput();
maxStability = relStability[i] > maxStability ? relStability[i] : maxStability;
maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput;
maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope;
}
}
// Convert to relative values
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
relStability[i] /= maxStability ? maxStability : 1;
relThroughput[i] /= maxThroughput ? maxThroughput : 1;
float normalized_ma = Utils::normalize((float)_paths[i].p->ackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10);
float age_contrib = exp((-1)*normalized_ma);
float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1));
float relQuality =
(relStability[i] * (float)ZT_PATH_CONTRIB_STABILITY)
+ (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT)
+ relScope * (float)ZT_PATH_CONTRIB_SCOPE;
relQuality *= age_contrib;
// Arbitrary cutoffs
relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f;
relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f;
totalRelativeQuality += relQuality;
_paths[i].p->updateRelativeQuality(relQuality);
}
}
// Convert set of relative performances into an allocation set
for(uint16_t i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255));
}
}
}
int Peer::computeAggregateLinkPacketDelayVariance()
{
float pdv = 0.0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
pdv += _paths[i].p->relativeQuality() * _paths[i].p->packetDelayVariance();
}
}
return (int)pdv;
}
int Peer::computeAggregateLinkMeanLatency()
{
int ml = 0;
int pathCount = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
pathCount++;
ml += (int)(_paths[i].p->relativeQuality() * _paths[i].p->meanLatency());
}
}
return ml / pathCount;
}
int Peer::aggregateLinkPhysicalPathCount()
{
std::map<std::string, bool> ifnamemap;
int pathCount = 0;
int64_t now = RR->node->now();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
if (!ifnamemap[_paths[i].p->getName()]) {
ifnamemap[_paths[i].p->getName()] = true;
pathCount++;
}
}
}
return pathCount;
}
int Peer::aggregateLinkLogicalPathCount()
{
int pathCount = 0;
int64_t now = RR->node->now();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
pathCount++;
}
}
return pathCount;
}
SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
{
Mutex::Lock _l(_paths_m);
unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
/**
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
if (!_canUseMultipath) {
if (!_bondToPeer) {
Mutex::Lock _l(_paths_m);
unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
/**
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
long bestPathQuality = 2147483647;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@ -363,115 +245,7 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
}
return SharedPtr<Path>();
}
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->processBackgroundPathMeasurements(now);
}
}
/**
* Randomly distribute traffic across all paths
*/
int numAlivePaths = 0;
int numStalePaths = 0;
if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) {
int alivePaths[ZT_MAX_PEER_NETWORK_PATHS];
int stalePaths[ZT_MAX_PEER_NETWORK_PATHS];
memset(&alivePaths, -1, sizeof(alivePaths));
memset(&stalePaths, -1, sizeof(stalePaths));
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (_paths[i].p->alive(now)) {
alivePaths[numAlivePaths] = i;
numAlivePaths++;
}
else {
stalePaths[numStalePaths] = i;
numStalePaths++;
}
}
}
unsigned int r = _freeRandomByte;
if (numAlivePaths > 0) {
int rf = r % numAlivePaths;
return _paths[alivePaths[rf]].p;
}
else if(numStalePaths > 0) {
// Resort to trying any non-expired path
int rf = r % numStalePaths;
return _paths[stalePaths[rf]].p;
}
}
/**
* Proportionally allocate traffic according to dynamic path quality measurements
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) {
if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
_lastAggregateAllocation = now;
computeAggregateProportionalAllocation(now);
}
// Randomly choose path according to their allocations
float rf = _freeRandomByte;
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (rf < _paths[i].p->allocation()) {
bestPath = i;
_pathChoiceHist.push(bestPath); // Record which path we chose
break;
}
rf -= _paths[i].p->allocation();
}
}
if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) {
return _paths[bestPath].p;
}
}
return SharedPtr<Path>();
}
char *Peer::interfaceListStr()
{
std::map<std::string, int> ifnamemap;
char tmp[32];
const int64_t now = RR->node->now();
char *ptr = _interfaceListStr;
bool imbalanced = false;
memset(_interfaceListStr, 0, sizeof(_interfaceListStr));
int alivePathCount = aggregateLinkLogicalPathCount();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
int ipv = _paths[i].p->address().isV4();
// If this is acting as an aggregate link, check allocations
float targetAllocation = 1.0f / (float)alivePathCount;
float currentAllocation = 1.0f;
if (alivePathCount > 1) {
currentAllocation = (float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count();
if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) {
imbalanced = true;
}
}
char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6";
sprintf(tmp, "(%s, %s, %.3f)", _paths[i].p->getName(), ipvStr, currentAllocation);
// Prevent duplicates
if(ifnamemap[_paths[i].p->getName()] != ipv) {
memcpy(ptr, tmp, strlen(tmp));
ptr += strlen(tmp);
*ptr = ' ';
ptr++;
ifnamemap[_paths[i].p->getName()] = ipv;
}
}
}
ptr--; // Overwrite trailing space
if (imbalanced) {
sprintf(tmp, ", is asymmetrical");
memcpy(ptr, tmp, sizeof(tmp));
} else {
*ptr = '\0';
}
return _interfaceListStr;
return _bondToPeer->getAppropriatePath(now, flowId);
}
void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &other) const
@ -595,73 +369,6 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &o
}
}
inline void Peer::processBackgroundPeerTasks(const int64_t now)
{
// Determine current multipath compatibility with other peer
if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
//
// Cache number of available paths so that we can short-circuit multipath logic elsewhere
//
// We also take notice of duplicate paths (same IP only) because we may have
// recently received a direct path push from a peer and our list might contain
// a dead path which hasn't been fully recognized as such. In this case we
// don't want the duplicate to trigger execution of multipath code prematurely.
//
// This is done to support the behavior of auto multipath enable/disable
// without user intervention.
//
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_lastMultipathCompatibilityCheck = now;
_localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9));
_remoteMultipathSupported = _vProto > 9;
// If both peers support multipath and more than one path exist, we can use multipath logic
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
}
void Peer::sendACK(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK);
uint32_t bytesToAck = path->bytesToAck();
outp.append<uint32_t>(bytesToAck);
if (atAddress) {
outp.armor(_key,false);
RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size());
} else {
RR->sw->send(tPtr,outp,false);
}
path->sentAck(now);
}
void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
const int64_t _now = RR->node->now();
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT);
char qosData[ZT_PATH_MAX_QOS_PACKET_SZ];
int16_t len = path->generateQoSPacket(_now,qosData);
outp.append(qosData,len);
if (atAddress) {
outp.armor(_key,false);
RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size());
} else {
RR->sw->send(tPtr,outp,false);
}
path->sentQoS(now);
}
void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO);
@ -727,33 +434,58 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now)
}
}
void Peer::performMultipathStateCheck(int64_t now)
{
/**
* Check for conditions required for multipath bonding and create a bond
* if allowed.
*/
_localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9));
if (_localMultipathSupported) {
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_remoteMultipathSupported = _vProto > 9;
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
if (_canUseMultipath && !_bondToPeer) {
if (RR->bc) {
_bondToPeer = RR->bc->createTransportTriggeredBond(RR, this);
/**
* Allow new bond to retroactively learn all paths known to this peer
*/
if (_bondToPeer) {
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_bondToPeer->nominatePath(_paths[i].p, now);
}
}
}
}
}
}
unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
{
unsigned int sent = 0;
Mutex::Lock _l(_paths_m);
performMultipathStateCheck(now);
const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD);
_lastSentFullHello = now;
processBackgroundPeerTasks(now);
// Emit traces regarding aggregate link status
if (_canUseMultipath) {
int alivePathCount = aggregateLinkPhysicalPathCount();
if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) {
_lastAggregateStatsReport = now;
if (alivePathCount) {
RR->t->peerLinkAggregateStatistics(NULL,*this);
}
} if (alivePathCount < 2 && _linkIsRedundant) {
_linkIsRedundant = !_linkIsRedundant;
RR->t->peerLinkNoLongerRedundant(NULL,*this);
} if (alivePathCount > 1 && !_linkIsRedundant) {
_linkIsRedundant = !_linkIsRedundant;
RR->t->peerLinkNowRedundant(NULL,*this);
}
}
// Right now we only keep pinging links that have the maximum priority. The
// priority is used to track cluster redirections, meaning that when a cluster
// redirects us its redirect target links override all other links and we
@ -770,7 +502,8 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
if (_paths[i].p) {
// Clean expired and reduced priority paths
if ( ((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION) && (_paths[i].priority == maxPriority) ) {
if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))) {
if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))
|| (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) {
attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello);
_paths[i].p->sent(now);
sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2;
@ -781,14 +514,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
}
} else break;
}
if (canUseMultipath()) {
while(j < ZT_MAX_PEER_NETWORK_PATHS) {
_paths[j].lr = 0;
_paths[j].p.zero();
_paths[j].priority = 1;
++j;
}
}
return sent;
}
@ -855,4 +580,30 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres
}
}
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now);
}
void Peer::recordIncomingInvalidPacket(const SharedPtr<Path>& path)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordIncomingInvalidPacket(path);
}
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now);
}
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -31,6 +31,8 @@
#include "AtomicCounter.hpp"
#include "Hashtable.hpp"
#include "Mutex.hpp"
#include "Bond.hpp"
#include "BondController.hpp"
#define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2))
@ -42,6 +44,9 @@ namespace ZeroTier {
class Peer
{
friend class SharedPtr<Peer>;
friend class SharedPtr<Bond>;
friend class Switch;
friend class Bond;
private:
Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized
@ -95,7 +100,8 @@ public:
const uint64_t inRePacketId,
const Packet::Verb inReVerb,
const bool trustEstablished,
const uint64_t networkId);
const uint64_t networkId,
const int32_t flowId);
/**
* Check whether we have an active path to this peer via the given address
@ -135,73 +141,39 @@ public:
}
/**
* Record statistics on outgoing packets
*
* @param path Path over which packet was sent
* @param id Packet ID
* @param len Length of packet payload
* @param verb Packet verb
* @param now Current time
*/
void recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now);
/**
* Record statistics on incoming packets
*
* @param path Path over which packet was sent
* @param id Packet ID
* @param len Length of packet payload
* @param verb Packet verb
* @param now Current time
*/
void recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now);
/**
* Send an ACK to peer for the most recent packets received
* Record incoming packets to
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param localSocket Raw socket the ACK packet will be sent over
* @param atAddress Destination for the ACK packet
* @param path Path over which packet was received
* @param packetId Packet ID
* @param payloadLength Length of packet data payload
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void sendACK(void *tPtr, const SharedPtr<Path> &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now);
void recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now);
/**
* Send a QoS packet to peer so that it can evaluate the quality of this link
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param localSocket Raw socket the QoS packet will be sent over
* @param atAddress Destination for the QoS packet
* @param path Path over which packet is being sent
* @param packetId Packet ID
* @param payloadLength Length of packet data payload
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr<Path> &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now);
void recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now);
/**
* Compute relative quality values and allocations for the components of the aggregate link
* Record an invalid incoming packet. This packet failed
* MAC/compression/cipher checks and will now contribute to a
* Packet Error Ratio (PER).
*
* @param now Current time
* @param path Path over which packet was received
*/
void computeAggregateProportionalAllocation(int64_t now);
/**
* @return The aggregate link Packet Delay Variance (PDV)
*/
int computeAggregateLinkPacketDelayVariance();
/**
* @return The aggregate link mean latency
*/
int computeAggregateLinkMeanLatency();
/**
* @return The number of currently alive "physical" paths in the aggregate link
*/
int aggregateLinkPhysicalPathCount();
/**
* @return The number of currently alive "logical" paths in the aggregate link
*/
int aggregateLinkLogicalPathCount();
void recordIncomingInvalidPacket(const SharedPtr<Path>& path);
/**
* Get the most appropriate direct path based on current multipath and QoS configuration
@ -210,13 +182,7 @@ public:
* @param includeExpired If true, include even expired paths
* @return Best current path or NULL if none
*/
SharedPtr<Path> getAppropriatePath(int64_t now, bool includeExpired);
/**
* Generate a human-readable string of interface names making up the aggregate link, also include
* moving allocation and IP version number for each (for tracing)
*/
char *interfaceListStr();
SharedPtr<Path> getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId = -1);
/**
* Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path
@ -258,6 +224,13 @@ public:
*/
void tryMemorizedPath(void *tPtr,int64_t now);
/**
* A check to be performed periodically which determines whether multipath communication is
* possible with this peer. This check should be performed early in the life-cycle of the peer
* as well as during the process of learning new paths.
*/
void performMultipathStateCheck(int64_t now);
/**
* Send pings or keepalives depending on configured timeouts
*
@ -270,16 +243,6 @@ public:
*/
unsigned int doPingAndKeepalive(void *tPtr,int64_t now);
/**
* Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state.
* This should be called frequently so that we can detect and remove unproductive or invalid paths.
*
* Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL.
*
* @return Number of paths that were pruned this round
*/
unsigned int prunePaths();
/**
* Process a cluster redirect sent by this peer
*
@ -341,7 +304,7 @@ public:
inline unsigned int latency(const int64_t now)
{
if (_canUseMultipath) {
return (int)computeAggregateLinkMeanLatency();
return (int)_lastComputedAggregateMeanLatency;
} else {
SharedPtr<Path> bp(getAppropriatePath(now,false));
if (bp)
@ -400,37 +363,6 @@ public:
inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); }
/**
* Periodically update known multipath activation constraints. This is done so that we know when and when
* not to use multipath logic. Doing this once every few seconds is sufficient.
*
* @param now Current time
*/
inline void processBackgroundPeerTasks(const int64_t now);
/**
* Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK
* or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client
* shall assume that multipath is not enabled and should only use classical Protocol 9 logic.
*/
inline void inferRemoteMultipathEnabled() { _remotePeerMultipathEnabled = true; }
/**
* @return Whether the local client supports and is configured to use multipath
*/
inline bool localMultipathSupport() { return _localMultipathSupported; }
/**
* @return Whether the remote peer supports and is configured to use multipath
*/
inline bool remoteMultipathSupport() { return _remoteMultipathSupported; }
/**
* @return Whether this client can use multipath to communicate with this peer. True if both peers are using
* the correct protocol and if both peers have multipath enabled. False if otherwise.
*/
inline bool canUseMultipath() { return _canUseMultipath; }
/**
* @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms
*/
@ -485,50 +417,35 @@ public:
}
/**
* Rate limit gate for inbound ECHO requests
* Rate limit gate for inbound ECHO requests. This rate limiter works
* by draining a certain number of requests per unit time. Each peer may
* theoretically receive up to ZT_ECHO_CUTOFF_LIMIT requests per second.
*/
inline bool rateGateEchoRequest(const int64_t now)
{
if ((now - _lastEchoRequestReceived) >= ZT_PEER_GENERAL_RATE_LIMIT) {
_lastEchoRequestReceived = now;
return true;
}
return false;
}
/**
* Rate limit gate for VERB_ACK
*/
inline bool rateGateACK(const int64_t now)
{
if ((now - _lastACKWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) {
_lastACKWindowReset = now;
_ACKCutoffCount = 0;
/*
// TODO: Rethink this
if (_canUseMultipath) {
_echoRequestCutoffCount++;
int numToDrain = (now - _lastEchoCheck) / ZT_ECHO_DRAINAGE_DIVISOR;
_lastEchoCheck = now;
fprintf(stderr, "ZT_ECHO_CUTOFF_LIMIT=%d, (now - _lastEchoCheck)=%d, numToDrain=%d, ZT_ECHO_DRAINAGE_DIVISOR=%d\n", ZT_ECHO_CUTOFF_LIMIT, (now - _lastEchoCheck), numToDrain, ZT_ECHO_DRAINAGE_DIVISOR);
if (_echoRequestCutoffCount > numToDrain) {
_echoRequestCutoffCount-=numToDrain;
}
else {
_echoRequestCutoffCount = 0;
}
return (_echoRequestCutoffCount < ZT_ECHO_CUTOFF_LIMIT);
} else {
++_ACKCutoffCount;
if ((now - _lastEchoRequestReceived) >= (ZT_PEER_GENERAL_RATE_LIMIT)) {
_lastEchoRequestReceived = now;
return true;
}
return false;
}
return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_QOS_MEASUREMENT
*/
inline bool rateGateQoS(const int64_t now)
{
if ((now - _lastQoSWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) {
_lastQoSWindowReset = now;
_QoSCutoffCount = 0;
} else {
++_QoSCutoffCount;
}
return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* @return Whether this peer is reachable via an aggregate link
*/
inline bool hasAggregateLink() {
return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled;
*/
return true;
}
/**
@ -603,6 +520,18 @@ public:
}
}
/**
*
* @return
*/
SharedPtr<Bond> bond() { return _bondToPeer; }
/**
*
* @return
*/
inline int8_t bondingPolicy() { return _bondingPolicy; }
private:
struct _PeerPath
{
@ -621,25 +550,16 @@ private:
int64_t _lastTriedMemorizedPath;
int64_t _lastDirectPathPushSent;
int64_t _lastDirectPathPushReceive;
int64_t _lastEchoRequestReceived;
int64_t _lastCredentialRequestSent;
int64_t _lastWhoisRequestReceived;
int64_t _lastEchoRequestReceived;
int64_t _lastCredentialsReceived;
int64_t _lastTrustEstablishedPacketReceived;
int64_t _lastSentFullHello;
int64_t _lastPathPrune;
int64_t _lastACKWindowReset;
int64_t _lastQoSWindowReset;
int64_t _lastMultipathCompatibilityCheck;
int64_t _lastEchoCheck;
unsigned char _freeRandomByte;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
uint16_t _vProto;
uint16_t _vMajor;
uint16_t _vMinor;
@ -652,21 +572,22 @@ private:
unsigned int _directPathPushCutoffCount;
unsigned int _credentialsCutoffCount;
unsigned int _QoSCutoffCount;
unsigned int _ACKCutoffCount;
unsigned int _echoRequestCutoffCount;
AtomicCounter __refCount;
RingBuffer<int,ZT_MULTIPATH_PROPORTION_WIN_SZ> _pathChoiceHist;
bool _linkIsBalanced;
bool _linkIsRedundant;
bool _remotePeerMultipathEnabled;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
int64_t _lastAggregateStatsReport;
int64_t _lastAggregateAllocation;
volatile bool _shouldCollectPathStatistics;
volatile int8_t _bondingPolicy;
char _interfaceListStr[256]; // 16 characters * 16 paths in a link
int32_t _lastComputedAggregateMeanLatency;
SharedPtr<Bond> _bondToPeer;
};
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -238,6 +238,21 @@ public:
return curr_cnt ? subtotal / (float)curr_cnt : 0;
}
/**
* @return The sum of the contents of the buffer
*/
inline float sum()
{
size_t iterator = begin;
float total = 0;
size_t curr_cnt = count();
for (size_t i=0; i<curr_cnt; i++) {
iterator = (iterator + S - 1) % curr_cnt;
total += (float)*(buf + iterator);
}
return total;
}
/**
* @return The sample standard deviation of element values
*/
@ -306,10 +321,10 @@ public:
for (size_t i=0; i<S; i++) {
iterator = (iterator + S - 1) % S;
if (typeid(T) == typeid(int)) {
//DEBUG_INFO("buf[%2zu]=%2d", iterator, (int)*(buf + iterator));
fprintf(stderr, "buf[%2zu]=%2d\n", iterator, (int)*(buf + iterator));
}
else {
//DEBUG_INFO("buf[%2zu]=%2f", iterator, (float)*(buf + iterator));
fprintf(stderr, "buf[%2zu]=%2f\n", iterator, (float)*(buf + iterator));
}
}
}

View File

@ -30,6 +30,7 @@ class Multicaster;
class NetworkController;
class SelfAwareness;
class Trace;
class BondController;
/**
* Holds global state for an instance of ZeroTier::Node
@ -75,6 +76,7 @@ public:
Multicaster *mc;
Topology *topology;
SelfAwareness *sa;
BondController *bc;
// This node's identity and string representations thereof
Identity identity;

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -42,8 +42,38 @@ Switch::Switch(const RuntimeEnvironment *renv) :
{
}
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len)
{
int32_t flowId = ZT_QOS_NO_FLOW;
try {
const int64_t now = RR->node->now();
@ -112,6 +142,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
if (rq->packetId != fragmentPacketId) {
// No packet found, so we received a fragment without its head.
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = fragmentPacketId;
rq->frags[fragmentNumber - 1] = fragment;
@ -130,7 +161,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
for(unsigned int f=1;f<totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,tPtr)) {
if (rq->frag0.tryDecode(RR,tPtr,flowId)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
@ -195,6 +226,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
if (rq->packetId != packetId) {
// If we have no other fragments yet, create an entry and save the head
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = packetId;
rq->frag0.init(data,len,path,now);
@ -211,7 +243,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
for(unsigned int f=1;f<rq->totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,tPtr)) {
if (rq->frag0.tryDecode(RR,tPtr,flowId)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
@ -224,9 +256,10 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
} else {
// Packet is unfragmented, so just process it
IncomingPacket packet(data,len,path,now);
if (!packet.tryDecode(RR,tPtr)) {
if (!packet.tryDecode(RR,tPtr,flowId)) {
RXQueueEntry *const rq = _nextRXQueueEntry();
Mutex::Lock rql(rq->lock);
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = packet.packetId();
rq->frag0 = packet;
@ -256,7 +289,75 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
}
}
uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET;
uint8_t qosBucket = ZT_AQM_DEFAULT_BUCKET;
/**
* A pseudo-unique identifier used by balancing and bonding policies to
* categorize individual flows/conversations for assignment to a specific
* physical path. This identifier consists of the source port and
* destination port of the encapsulated frame.
*
* A flowId of -1 will indicate that there is no preference for how this
* packet shall be sent. An example of this would be an ICMP packet.
*/
int32_t flowId = ZT_QOS_NO_FLOW;
if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
uint8_t proto = (reinterpret_cast<const uint8_t *>(data)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(data)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
//flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = dstPort ^ srcPort ^ proto;
}
break;
}
}
if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)data, len, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
//flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = dstPort ^ srcPort ^ proto;
}
break;
default:
break;
}
}
if (to.isMulticast()) {
MulticastGroup multicastGroup(to,0);
@ -267,7 +368,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
* otherwise a straightforward Ethernet switch emulation. Vanilla ARP
* is dumb old broadcast and simply doesn't scale. ZeroTier multicast
* groups have an additional field called ADI (additional distinguishing
* information) which was added specifically for ARP though it could
* information) which was added specifically for ARP though it could
* be used for other things too. We then take ARP broadcasts and turn
* them into multicasts by stuffing the IP address being queried into
* the 32-bit ADI field. In practice this uses our multicast pub/sub
@ -416,7 +517,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
outp.append(data,len);
if (!network->config().disableCompression())
outp.compress();
aqm_enqueue(tPtr,network,outp,true,qosBucket);
aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId);
} else {
Packet outp(toZT,RR->identity.address(),Packet::VERB_FRAME);
outp.append(network->id());
@ -424,7 +525,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
outp.append(data,len);
if (!network->config().disableCompression())
outp.compress();
aqm_enqueue(tPtr,network,outp,true,qosBucket);
aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId);
}
} else {
// Destination is bridged behind a remote peer
@ -480,7 +581,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
outp.append(data,len);
if (!network->config().disableCompression())
outp.compress();
aqm_enqueue(tPtr,network,outp,true,qosBucket);
aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId);
} else {
RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked (bridge replication)");
}
@ -488,28 +589,25 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
}
}
void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket)
void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId)
{
if(!network->qosEnabled()) {
send(tPtr, packet, encrypt);
send(tPtr, packet, encrypt, flowId);
return;
}
NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()];
if (!nqcb) {
// DEBUG_INFO("creating network QoS control block (NQCB) for network %llx", network->id());
nqcb = new NetworkQoSControlBlock();
_netQueueControlBlock[network->id()] = nqcb;
// Initialize ZT_QOS_NUM_BUCKETS queues and place them in the INACTIVE list
// These queues will be shuffled between the new/old/inactive lists by the enqueue/dequeue algorithm
for (int i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (int i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
nqcb->inactiveQueues.push_back(new ManagedQueue(i));
}
}
// Don't apply QoS scheduling to ZT protocol traffic
if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) {
// DEBUG_INFO("skipping, no QoS for this packet, verb=%x", packet.verb());
// just send packet normally, no QoS for ZT protocol traffic
send(tPtr, packet, encrypt);
send(tPtr, packet, encrypt, flowId);
}
_aqm_m.lock();
@ -517,10 +615,10 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
// Enqueue packet and move queue to appropriate list
const Address dest(packet.destination());
TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt);
TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId);
ManagedQueue *selectedQueue = nullptr;
for (size_t i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (size_t i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
if (i < nqcb->oldQueues.size()) { // search old queues first (I think this is best since old would imply most recent usage of the queue)
if (nqcb->oldQueues[i]->id == qosBucket) {
selectedQueue = nqcb->oldQueues[i];
@ -533,7 +631,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
if (nqcb->inactiveQueues[i]->id == qosBucket) {
selectedQueue = nqcb->inactiveQueues[i];
// move queue to end of NEW queue list
selectedQueue->byteCredit = ZT_QOS_QUANTUM;
selectedQueue->byteCredit = ZT_AQM_QUANTUM;
// DEBUG_INFO("moving q=%p from INACTIVE to NEW list", selectedQueue);
nqcb->newQueues.push_back(selectedQueue);
nqcb->inactiveQueues.erase(nqcb->inactiveQueues.begin() + i);
@ -552,11 +650,11 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
// Drop a packet if necessary
ManagedQueue *selectedQueueToDropFrom = nullptr;
if (nqcb->_currEnqueuedPackets > ZT_QOS_MAX_ENQUEUED_PACKETS)
if (nqcb->_currEnqueuedPackets > ZT_AQM_MAX_ENQUEUED_PACKETS)
{
// DEBUG_INFO("too many enqueued packets (%d), finding packet to drop", nqcb->_currEnqueuedPackets);
int maxQueueLength = 0;
for (size_t i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (size_t i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
if (i < nqcb->oldQueues.size()) {
if (nqcb->oldQueues[i]->byteLength > maxQueueLength) {
maxQueueLength = nqcb->oldQueues[i]->byteLength;
@ -589,7 +687,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
uint64_t Switch::control_law(uint64_t t, int count)
{
return (uint64_t)(t + ZT_QOS_INTERVAL / sqrt(count));
return (uint64_t)(t + ZT_AQM_INTERVAL / sqrt(count));
}
Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now)
@ -603,14 +701,14 @@ Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now)
return r;
}
uint64_t sojourn_time = now - r.p->creationTime;
if (sojourn_time < ZT_QOS_TARGET || q->byteLength <= ZT_DEFAULT_MTU) {
if (sojourn_time < ZT_AQM_TARGET || q->byteLength <= ZT_DEFAULT_MTU) {
// went below - stay below for at least interval
q->first_above_time = 0;
} else {
if (q->first_above_time == 0) {
// just went above from below. if still above at
// first_above_time, will say it's ok to drop.
q->first_above_time = now + ZT_QOS_INTERVAL;
q->first_above_time = now + ZT_AQM_INTERVAL;
} else if (now >= q->first_above_time) {
r.ok_to_drop = true;
}
@ -642,7 +740,7 @@ Switch::TXQueueEntry * Switch::CoDelDequeue(ManagedQueue *q, bool isNew, uint64_
q->q.pop_front(); // drop
r = dodequeue(q, now);
q->dropping = true;
q->count = (q->count > 2 && now - q->drop_next < 8*ZT_QOS_INTERVAL)?
q->count = (q->count > 2 && now - q->drop_next < 8*ZT_AQM_INTERVAL)?
q->count - 2 : 1;
q->drop_next = control_law(now, q->count);
}
@ -670,7 +768,7 @@ void Switch::aqm_dequeue(void *tPtr)
while (currQueues->size()) {
ManagedQueue *queueAtFrontOfList = currQueues->front();
if (queueAtFrontOfList->byteCredit < 0) {
queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM;
queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM;
// Move to list of OLD queues
// DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList);
oldQueues->push_back(queueAtFrontOfList);
@ -689,7 +787,7 @@ void Switch::aqm_dequeue(void *tPtr)
queueAtFrontOfList->byteCredit -= len;
// Send the packet!
queueAtFrontOfList->q.pop_front();
send(tPtr, entryToEmit->packet, entryToEmit->encrypt);
send(tPtr, entryToEmit->packet, entryToEmit->encrypt, entryToEmit->flowId);
(*nqcb).second->_currEnqueuedPackets--;
}
if (queueAtFrontOfList) {
@ -705,7 +803,7 @@ void Switch::aqm_dequeue(void *tPtr)
while (currQueues->size()) {
ManagedQueue *queueAtFrontOfList = currQueues->front();
if (queueAtFrontOfList->byteCredit < 0) {
queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM;
queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM;
oldQueues->push_back(queueAtFrontOfList);
currQueues->erase(currQueues->begin());
} else {
@ -721,7 +819,7 @@ void Switch::aqm_dequeue(void *tPtr)
queueAtFrontOfList->byteLength -= len;
queueAtFrontOfList->byteCredit -= len;
queueAtFrontOfList->q.pop_front();
send(tPtr, entryToEmit->packet, entryToEmit->encrypt);
send(tPtr, entryToEmit->packet, entryToEmit->encrypt, entryToEmit->flowId);
(*nqcb).second->_currEnqueuedPackets--;
}
if (queueAtFrontOfList) {
@ -745,18 +843,18 @@ void Switch::removeNetworkQoSControlBlock(uint64_t nwid)
}
}
void Switch::send(void *tPtr,Packet &packet,bool encrypt)
void Switch::send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId)
{
const Address dest(packet.destination());
if (dest == RR->identity.address())
return;
if (!_trySend(tPtr,packet,encrypt)) {
if (!_trySend(tPtr,packet,encrypt,flowId)) {
{
Mutex::Lock _l(_txQueue_m);
if (_txQueue.size() >= ZT_TX_QUEUE_SIZE) {
_txQueue.pop_front();
}
_txQueue.push_back(TXQueueEntry(dest,RR->node->now(),packet,encrypt));
_txQueue.push_back(TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId));
}
if (!RR->topology->getPeer(tPtr,dest))
requestWhois(tPtr,RR->node->now(),dest);
@ -778,10 +876,11 @@ void Switch::requestWhois(void *tPtr,const int64_t now,const Address &addr)
const SharedPtr<Peer> upstream(RR->topology->getUpstreamPeer());
if (upstream) {
int32_t flowId = ZT_QOS_NO_FLOW;
Packet outp(upstream->address(),RR->identity.address(),Packet::VERB_WHOIS);
addr.appendTo(outp);
RR->node->expectReplyTo(outp.packetId());
send(tPtr,outp,true);
send(tPtr,outp,true,flowId);
}
}
@ -797,7 +896,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer)
RXQueueEntry *const rq = &(_rxQueue[ptr]);
Mutex::Lock rql(rq->lock);
if ((rq->timestamp)&&(rq->complete)) {
if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT))
if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT))
rq->timestamp = 0;
}
}
@ -806,7 +905,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer)
Mutex::Lock _l(_txQueue_m);
for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) {
if (txi->dest == peer->address()) {
if (_trySend(tPtr,txi->packet,txi->encrypt)) {
if (_trySend(tPtr,txi->packet,txi->encrypt,txi->flowId)) {
_txQueue.erase(txi++);
} else {
++txi;
@ -830,7 +929,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now)
Mutex::Lock _l(_txQueue_m);
for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) {
if (_trySend(tPtr,txi->packet,txi->encrypt)) {
if (_trySend(tPtr,txi->packet,txi->encrypt,txi->flowId)) {
_txQueue.erase(txi++);
} else if ((now - txi->creationTime) > ZT_TRANSMIT_QUEUE_TIMEOUT) {
_txQueue.erase(txi++);
@ -848,7 +947,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now)
RXQueueEntry *const rq = &(_rxQueue[ptr]);
Mutex::Lock rql(rq->lock);
if ((rq->timestamp)&&(rq->complete)) {
if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) {
if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) {
rq->timestamp = 0;
} else {
const Address src(rq->frag0.source());
@ -894,7 +993,7 @@ bool Switch::_shouldUnite(const int64_t now,const Address &source,const Address
return false;
}
bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId)
{
SharedPtr<Path> viaPath;
const int64_t now = RR->node->now();
@ -902,19 +1001,40 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
const SharedPtr<Peer> peer(RR->topology->getPeer(tPtr,destination));
if (peer) {
viaPath = peer->getAppropriatePath(now,false);
if (!viaPath) {
peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known
if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST)
&& (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) {
const SharedPtr<Peer> relay(RR->topology->getUpstreamPeer());
if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false))) ) {
if (!(viaPath = peer->getAppropriatePath(now,true)))
return false;
Mutex::Lock _l(peer->_paths_m);
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (peer->_paths[i].p && peer->_paths[i].p->alive(now)) {
char pathStr[128];
peer->_paths[i].p->address().toString(pathStr);
_sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId);
}
}
return true;
}
else {
viaPath = peer->getAppropriatePath(now,false,flowId);
if (!viaPath) {
peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known
const SharedPtr<Peer> relay(RR->topology->getUpstreamPeer());
if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false,flowId))) ) {
if (!(viaPath = peer->getAppropriatePath(now,true,flowId)))
return false;
}
}
if (viaPath) {
_sendViaSpecificPath(tPtr,peer,viaPath,now,packet,encrypt,flowId);
return true;
}
}
} else {
return false;
}
return false;
}
void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId)
{
unsigned int mtu = ZT_DEFAULT_PHYSMTU;
uint64_t trustedPathId = 0;
RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId);
@ -922,7 +1042,7 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
unsigned int chunkSize = std::min(packet.size(),mtu);
packet.setFragmented(chunkSize < packet.size());
peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now);
peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), flowId, now);
if (trustedPathId) {
packet.setTrusted(trustedPathId);
@ -949,8 +1069,6 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
}
}
}
return true;
}
} // namespace ZeroTier

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -59,6 +59,8 @@ class Switch
struct ManagedQueue;
struct TXQueueEntry;
friend class SharedPtr<Peer>;
typedef struct {
TXQueueEntry *p;
bool ok_to_drop;
@ -78,6 +80,11 @@ public:
*/
void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len);
/**
* Returns whether our bonding or balancing policy is aware of flows.
*/
bool isFlowAware();
/**
* Called when a packet comes from a local Ethernet tap
*
@ -118,7 +125,7 @@ public:
* @param encrypt Encrypt packet payload? (always true except for HELLO)
* @param qosBucket Which bucket the rule-system determined this packet should fall into
*/
void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket);
void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW);
/**
* Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks
@ -164,7 +171,7 @@ public:
* @param packet Packet to send (buffer may be modified)
* @param encrypt Encrypt packet payload? (always true except for HELLO)
*/
void send(void *tPtr,Packet &packet,bool encrypt);
void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW);
/**
* Request WHOIS on a given address
@ -199,7 +206,8 @@ public:
private:
bool _shouldUnite(const int64_t now,const Address &source,const Address &destination);
bool _trySend(void *tPtr,Packet &packet,bool encrypt); // packet is modified if return is true
bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true
void _sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId);
const RuntimeEnvironment *const RR;
int64_t _lastBeaconResponse;
@ -220,6 +228,7 @@ private:
unsigned int totalFragments; // 0 if only frag0 received, waiting for frags
uint32_t haveFragments; // bit mask, LSB to MSB
volatile bool complete; // if true, packet is complete
volatile int32_t flowId;
Mutex lock;
};
RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];
@ -248,16 +257,18 @@ private:
struct TXQueueEntry
{
TXQueueEntry() {}
TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc) :
TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) :
dest(d),
creationTime(ct),
packet(p),
encrypt(enc) {}
encrypt(enc),
flowId(fid) {}
Address dest;
uint64_t creationTime;
Packet packet; // unencrypted/unMAC'd packet -- this is done at send time
bool encrypt;
int32_t flowId;
};
std::list< TXQueueEntry > _txQueue;
Mutex _txQueue_m;
@ -289,7 +300,7 @@ private:
{
ManagedQueue(int id) :
id(id),
byteCredit(ZT_QOS_QUANTUM),
byteCredit(ZT_AQM_QUANTUM),
byteLength(0),
dropping(false)
{}

View File

@ -96,22 +96,24 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,
void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer)
{
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt());
//ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt());
}
void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer)
{
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt());
//ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt());
}
void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer)
{
/*
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)",
peer.address().toInt(),
peer.aggregateLinkPhysicalPathCount(),
peer.interfaceListStr(),
peer.computeAggregateLinkPacketDelayVariance(),
peer.computeAggregateLinkMeanLatency());
*/
}
void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr<Path> &newPath,const uint64_t packetId)

View File

@ -214,12 +214,12 @@ public:
return l;
}
static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax)
static inline float normalize(float value, float bigMin, float bigMax, float targetMin, float targetMax)
{
int64_t bigSpan = bigMax - bigMin;
int64_t smallSpan = targetMax - targetMin;
float valueScaled = (value - (float)bigMin) / (float)bigSpan;
return (float)targetMin + valueScaled * (float)smallSpan;
float bigSpan = bigMax - bigMin;
float smallSpan = targetMax - targetMin;
float valueScaled = (value - bigMin) / bigSpan;
return targetMin + valueScaled * smallSpan;
}
/**
@ -253,6 +253,7 @@ public:
static inline int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); }
static inline unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); }
static inline long strToLong(const char *s) { return strtol(s,(char **)0,10); }
static inline double strToDouble(const char *s) { return strtod(s,NULL); }
static inline unsigned long long strToU64(const char *s)
{
#ifdef __WINDOWS__

View File

@ -24,7 +24,9 @@ CORE_OBJS=\
node/Tag.o \
node/Topology.o \
node/Trace.o \
node/Utils.o
node/Utils.o \
node/Bond.o \
node/BondController.o
ONE_OBJS=\
controller/EmbeddedNetworkController.o \

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -347,6 +347,27 @@ public:
}
}
// Generate set of unique interface names (used for formation of logical link set in multipath code)
// TODO: Could be gated not to run if multipath is not enabled.
for(std::map<InetAddress,std::string>::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) {
linkIfNames.insert(ii->second);
}
for (std::set<std::string>::iterator si(linkIfNames.begin());si!=linkIfNames.end();) {
bool bFoundMatch = false;
for(std::map<InetAddress,std::string>::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) {
if (ii->second == *si) {
bFoundMatch = true;
break;
}
}
if (!bFoundMatch) {
linkIfNames.erase(si++);
}
else {
++si;
}
}
// Create new bindings for those not already bound
for(std::map<InetAddress,std::string>::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) {
unsigned int bi = 0;
@ -444,7 +465,15 @@ public:
return false;
}
inline std::set<std::string> getLinkInterfaceNames()
{
Mutex::Lock _l(_lock);
return linkIfNames;
}
private:
std::set<std::string> linkIfNames;
_Binding _bindings[ZT_BINDER_MAX_BINDINGS];
std::atomic<unsigned int> _bindingCount;
Mutex _lock;

237
osdep/Link.hpp Normal file
View File

@ -0,0 +1,237 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_LINK_HPP
#define ZT_LINK_HPP
#include <string>
#include "../node/AtomicCounter.hpp"
namespace ZeroTier {
class Link
{
friend class SharedPtr<Link>;
public:
Link() {}
/**
*
* @param ifnameStr
* @param ipvPref
* @param speed
* @param enabled
* @param mode
* @param failoverToLinkStr
* @param userSpecifiedAlloc
*/
Link(std::string& ifnameStr,
uint8_t ipvPref,
uint32_t speed,
uint32_t linkMonitorInterval,
uint32_t upDelay,
uint32_t downDelay,
bool enabled,
uint8_t mode,
std::string failoverToLinkStr,
float userSpecifiedAlloc) :
_ifnameStr(ifnameStr),
_ipvPref(ipvPref),
_speed(speed),
_relativeSpeed(0),
_linkMonitorInterval(linkMonitorInterval),
_upDelay(upDelay),
_downDelay(downDelay),
_enabled(enabled),
_mode(mode),
_failoverToLinkStr(failoverToLinkStr),
_userSpecifiedAlloc(userSpecifiedAlloc),
_isUserSpecified(false)
{}
/**
* @return The string representation of this link's underlying interface's system name.
*/
inline std::string ifname() { return _ifnameStr; }
/**
* @return Whether this link is designated as a primary.
*/
inline bool primary() { return _mode == ZT_MULTIPATH_SLAVE_MODE_PRIMARY; }
/**
* @return Whether this link is designated as a spare.
*/
inline bool spare() { return _mode == ZT_MULTIPATH_SLAVE_MODE_SPARE; }
/**
* @return The name of the link interface that should be used in the event of a failure.
*/
inline std::string failoverToLink() { return _failoverToLinkStr; }
/**
* @return Whether this link interface was specified by the user or auto-detected.
*/
inline bool isUserSpecified() { return _isUserSpecified; }
/**
* Signify that this link was specified by the user and not the result of auto-detection.
*
* @param isUserSpecified
*/
inline void setAsUserSpecified(bool isUserSpecified) { _isUserSpecified = isUserSpecified; }
/**
* @return Whether or not the user has specified failover instructions.
*/
inline bool userHasSpecifiedFailoverInstructions() { return _failoverToLinkStr.length(); }
/**
* @return The speed of the link relative to others in the bond.
*/
inline uint8_t relativeSpeed() { return _relativeSpeed; }
/**
* Sets the speed of the link relative to others in the bond.
*
* @param relativeSpeed The speed relative to the rest of the link.
*/
inline void setRelativeSpeed(uint8_t relativeSpeed) { _relativeSpeed = relativeSpeed; }
/**
* Sets the speed of the link relative to others in the bond.
*
* @param relativeSpeed
*/
inline void setMonitorInterval(uint32_t interval) { _linkMonitorInterval = interval; }
/**
* @return The absolute speed of the link (as specified by the user.)
*/
inline uint32_t monitorInterval() { return _linkMonitorInterval; }
/**
* @return The absolute speed of the link (as specified by the user.)
*/
inline uint32_t speed() { return _speed; }
/**
* @return The address preference for this link (as specified by the user.)
*/
inline uint8_t ipvPref() { return _ipvPref; }
/**
* @return The mode (e.g. primary/spare) for this link (as specified by the user.)
*/
inline uint8_t mode() { return _mode; }
/**
* @return The upDelay parameter for all paths on this link.
*/
inline uint32_t upDelay() { return _upDelay; }
/**
* @return The downDelay parameter for all paths on this link.
*/
inline uint32_t downDelay() { return _downDelay; }
/**
* @return Whether this link is enabled or disabled
*/
inline uint8_t enabled() { return _enabled; }
private:
/**
* String representation of underlying interface's system name
*/
std::string _ifnameStr;
/**
* What preference (if any) a user has for IP protocol version used in
* path aggregations. Preference is expressed in the order of the digits:
*
* 0: no preference
* 4: IPv4 only
* 6: IPv6 only
* 46: IPv4 over IPv6
* 64: IPv6 over IPv4
*/
uint8_t _ipvPref;
/**
* User-specified speed of this link
*/
uint32_t _speed;
/**
* Speed relative to other specified links (computed by Bond)
*/
uint8_t _relativeSpeed;
/**
* User-specified interval for monitoring paths on this specific link
* instead of using the more generic interval specified for the entire
* bond.
*/
uint32_t _linkMonitorInterval;
/**
* How long before a path is considered to be usable after coming online. (when using policies that
* support fail-over events).
*/
uint32_t _upDelay;
/**
* How long before a path is considered to be dead (when using policies that
* support fail-over events).
*/
uint32_t _downDelay;
/**
* Whether this link is enabled, or (disabled (possibly bad config))
*/
uint8_t _enabled;
/**
* Whether this link is designated as a primary, a spare, or no preference.
*/
uint8_t _mode;
/**
* The specific name of the link to be used in the event that this
* link fails.
*/
std::string _failoverToLinkStr;
/**
* User-specified allocation
*/
float _userSpecifiedAlloc;
/**
* Whether or not this link was created as a result of manual user specification. This is
* important to know because certain policy decisions are dependent on whether the user
* intents to use a specific set of interfaces.
*/
bool _isUserSpecified;
AtomicCounter __refCount;
};
} // namespace ZeroTier
#endif

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -459,6 +459,22 @@ uint64_t OSUtils::jsonInt(const nlohmann::json &jv,const uint64_t dfl)
return dfl;
}
double OSUtils::jsonDouble(const nlohmann::json &jv,const double dfl)
{
try {
if (jv.is_number()) {
return (double)jv;
}
else if (jv.is_string()) {
std::string s = jv;
return Utils::strToDouble(s.c_str());
} else if (jv.is_boolean()) {
return (double)jv;
}
} catch ( ... ) {}
return dfl;
}
uint64_t OSUtils::jsonIntHex(const nlohmann::json &jv,const uint64_t dfl)
{
try {

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -277,6 +277,7 @@ public:
static nlohmann::json jsonParse(const std::string &buf);
static std::string jsonDump(const nlohmann::json &j,int indentation = 1);
static uint64_t jsonInt(const nlohmann::json &jv,const uint64_t dfl);
static double jsonDouble(const nlohmann::json &jv,const double dfl);
static uint64_t jsonIntHex(const nlohmann::json &jv,const uint64_t dfl);
static bool jsonBool(const nlohmann::json &jv,const bool dfl);
static std::string jsonString(const nlohmann::json &jv,const char *dfl);

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -261,46 +261,6 @@ public:
}
}
/**
* Whether or not the socket object is in a closed state
*
* @param s Socket object
* @return true if socket is closed, false if otherwise
*/
inline bool isClosed(PhySocket *s)
{
PhySocketImpl *sws = (reinterpret_cast<PhySocketImpl *>(s));
return sws->type == ZT_PHY_SOCKET_CLOSED;
}
/**
* Get state of socket object
*
* @param s Socket object
* @return State of socket
*/
inline int getState(PhySocket *s)
{
PhySocketImpl *sws = (reinterpret_cast<PhySocketImpl *>(s));
return sws->type;
}
/**
* In the event that this socket is erased, we need a way to convey to the multipath logic
* that this path is no longer valid.
*
* @param s Socket object
* @return Whether the state of this socket is within an acceptable range of values
*/
inline bool isValidState(PhySocket *s)
{
if (s) {
PhySocketImpl *sws = (reinterpret_cast<PhySocketImpl *>(s));
return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN;
}
return false;
}
/**
* Cause poll() to stop waiting immediately
*

247
service/MULTIPATH.md Normal file
View File

@ -0,0 +1,247 @@
### Bonding (link aggregation)
Link aggregation allows the simultaneous (or conditional) use of multiple physical links to enable increased throughput, load balancing, redundancy, and fault tolerance. There are a variety of standard policies available that can be used right out of the box with little to no configuration. These policies are directly inspired by [the policies offered by the Linux kernel](https://www.kernel.org/doc/Documentation/networking/bonding.txt) but are now offered in user-space and hence available on all platforms that ZeroTier supports.
#### Standard policies
| Policy name | Fault tolerance | Min. failover (sec.) | Default Failover (sec.)| Balancing | Aggregation efficiency | Redundancy | Sequence Reordering |
|--------------------|:---------------------:|---------------------:|-----------------------:|----------------------:|-----------------------:|-----------:|--------------------:|
| `none` | None | `60+` | `60+` | none | `none` |1 | No
| `active-backup` | Brief interruption | `0.25` | `10` | none | `low` |1 | Only during failover
| `broadcast` | Fully tolerant | `N/A` | `N/A` | none | `very low` |N | Often
| `balance-rr` | Self-healing | `0.25` | `10` | packet-based | `high` |1 | Often
| `balance-xor` | Self-healing | `0.25` | `10` | flow-based | `very high` |1 | Only during failover
| `balance-aware` | Self-healing | `0.25` | `10` | *adaptive* flow-based | `very high` |1 | Only during failover and re-balance
A policy can be used easily without specifying any additional parameters:
```
{
"settings": {
"defaultBondingPolicy": "active-backup"
}
}
```
#### Custom policies
To customize a bonding policy for your use-case simply specify a `basePolicy` and override chosen parameters. For example, to create a more aggressive `active-backup` policy with low monitoring overhead that will failover `0.250` seconds after it detects a link failure, one could do the following:
```
{
"settings":
{
"defaultBondingPolicy": "aggressive-active-backup",
"policies":
{
"aggressive-active-backup":
{
"failoverInterval": 250,
"pathMonitorStrategy": "dynamic",
"basePolicy": "active-backup"
}
}
}
}
```
#### Specifying links
Bonds are composed of multiple `links`. Different sets of links can be constructed for different bonding policies and used simultaneously. One can specify the links that ZeroTier should use in any given bonding policy simply by providing an array of links with names corresponding to interface names. If a user doesn't specify a set of interfaces to use, ZeroTier will assume every system interface is available for use. However, if the user **does** specify a set of interfaces, ZeroTier will only use what is specified. The same applies to failover rules, if none are specified, ZeroTier will failover to any operational link. On the other hand, if the user does specify failover rules and there is ever a situation where a link is available for usage but does not fit within the rules specified by the user, it will go unused.
To specify that ZeroTier should only use `eth0` and `eth1` as primary links, and `eth2` as a backup spare and that it should prefer IPv4 over IPv6 except on `eth2` where only IPv6 is allowed:
```
{
"settings": {
"defaultBondingPolicy": "aggressive-active-backup",
"policies": {
"aggressive-active-backup": {
"links": {
"eth0": {
"ipvPref": 46,
"failoverTo": "eth2",
"mode": "primary"
},
"eth1": {
"ipvPref": 46,
"failoverTo": "eth2",
"mode": "primary"
},
"eth2": {
"ipvPref": 6,
"mode": "spare"
}
}
}
}
}
}
```
Additional link-specific parameters:
```
"links":
{
"interfaceName": /* System-name of the network interface. */
{
"failoverInterval": 0-65535, /* (optional) How quickly a path on this link should failover after a detected failure. */
"ipvPref": [0,4,6,46,64], /* (optional) IP version preference for detected paths on a link. */
"speed": 0-1000000, /* (optional) How fast this link is (in arbitrary units). This is a useful way to manually allocate a bond. */
"alloc": 0-255, /* (optional) A relative value representing a desired allocation. */
"upDelay": 0-65535, /* (optional) How long after a path becomes alive before it is added to the bond. */
"downDelay": 0-65535, /* (optional) How long after a path fails before it is removed from the bond. */
"failoverTo": "spareInterfaceName", /* (optional) Which link should be used next after a failure of this link. */
"enabled": true|false, /* (optional) Whether any paths on this link are allowed to be used this bond. */
"mode": "primary"|"spare" /* (optional) Whether this link is used by default or only after failover events. */
}
}
```
#### Peer-specific bonds
It is possible to direct ZeroTier to form a certain type of bond with specific peers of your choice. For instance, if one were to want `active-backup` by default but for certain peers to be bonded with a custom load-balanced bond such as `my-custom-balance-aware` one could do the following:
```
{
"settings":
{
"defaultBondingPolicy": "active-backup",
"policies":
{
"my-custom-balance-aware":
{
"failoverInterval": 2000,
"monitorStrategy": "dynamic",
"basePolicy": "balance-aware"
}
},
"peerSpecificBonds":
{
"f6203a2db3":"my-custom-balance-aware",
"45b0301da2":"my-custom-balance-aware",
"a92cb526fa":"my-custom-balance-aware"
}
}
}
```
#### Active backup (`active-backup`)
Traffic is sent only on (one) path at any given time. A different path becomes active if the current path fails. This mode provides fault tolerance with a nearly immediate fail-over. This mode **does not** increase total throughput.
- `mode`: `primary, spare` Link option which specifies which link is the primary device. The specified device is intended to always be the active link while it is available. There are exceptions to this behavior when using different `linkSelectMethod` modes. There can only be one `primary` link in this bonding policy.
- `linkSelectMethod`: Specifies the selection policy for the active link during failure and/or recovery events. This is similar to the Linux Kernel's `primary_reselect` option but with a minor extension:
- `optimize`: **(default if user provides no failover guidance)** The primary link can change periodically if a superior path is detected.
- `always`: **(default when links are explicitly specified)**: Primary link regains status as active link whenever it comes back up.
- `better`: Primary link regains status as active link when it comes back up and (if) it is better than the currently-active link.
- `failure`: Primary link regains status as active link only if the currently-active link fails.
```
{
"settings":
{
"defaultBondingPolicy": "active-backup",
"active-backup":
{
"linkSelectMethod": "always",
"links":
{
"eth0": { "failoverTo": "eth1", "mode": "primary" },
"eth1": { "mode": "spare" },
"eth2": { "mode": "spare" },
"eth3": { "mode": "spare" }
}
}
}
}
```
#### Broadcast (`broadcast`)
Traffic is sent on (all) available paths simultaneously. This mode provides fault tolerance and effectively immediate failover due to transmission redundancy. This mode is a poor utilization of throughput resources and will **not** increase throughput but can prevent packet loss during a link failure. The only option available is `dedup` which will de-duplicate all packets on the receiving end if set to `true`.
#### Balance round robin (`balance-rr`)
Traffic is striped across multiple paths. Offers partial fault tolerance immediately, full fault tolerance eventually. This policy is unaware of protocols and is primarily intended for use with protocols that are not sensitive to reordering delays. The only option available for this policy is `packetsPerLink` which specifies the number of packets to transmit via a path before moving to the next in the RR sequence. When set to `0` a path is chosen at random for each outgoing packet. The default value is `8`, low values can begin to add overhead to packet processing.
#### Balance XOR (`balance-xor`, similar to the Linux kernel's [balance-xor](https://www.kernel.org/doc/Documentation/networking/bonding.txt) with `xmit_hash_policy=layer3+4`)
Traffic is categorized into *flows* based on *source port*, *destination port*, and *protocol type* these flows are then hashed onto available links. Each flow will persist on its assigned link interface for its entire life-cycle. Traffic that does not have an assigned port (such as ICMP pings) will be randomly distributed across links. The hash function is simply: `src_port ^ dst_port ^ proto`.
#### Balance aware (`balance-aware`, similar to Linux kernel's [`balance-*lb`](https://www.kernel.org/doc/Documentation/networking/bonding.txt) modes)
Traffic is dynamically allocated and balanced across multiple links simultaneously according to the target allocation. Options allow for *packet* or *flow-based* processing, and active-flow reassignment. Flows mediated over a recently failed links will be reassigned in a manner that respects the target allocation of the bond. An optional `balancePolicy` can be specified with the following effects: `flow-dynamic` (default) will hash flows onto links according to target allocation and may perform periodic re-assignments in order to preserve balance. `flow-static`, will hash flows onto links according to target allocation but will not re-assign flows unless a failure occurs or the link is no longer operating within acceptable parameters. And lastly `packet` which simply load balances packets across links according to target allocation but with no concern for sequence reordering.
```
{
"settings":
{
"defaultBondingPolicy": "balance-aware",
"balance-aware": {
"balancePolicy": "flow-dynamic"|"flow-static"|"packet"
}
}
}
```
#### Link quality
ZeroTier measures various properties of a link (such as latency, throughput, jitter, packet loss ratio, etc) in order to arrive at a quality estimate. This estimate is used by bonding policies to make allocation and failover decisions:
| Policy name | Role |
|:---------------|:-----|
|`active-backup` | Determines the order of the failover queue. And if `activeReselect=optimize` whether a new active link is selected. |
|`broadcast` | Does not use quality measurements. |
|`balance-rr` | May trigger removal of link from bond. |
|`balance-xor` | May trigger removal of link from bond. |
|`balance-aware` | Informs flow assignments and (re-)assignments. May trigger removal of link from bond. |
A link's eligibility for being included in a bond is dependent on more than perceived quality. If a path on a link begins to exhibit disruptive behavior such as extremely high packet loss, corruption, or periodic inability to process traffic it will be removed from the bond, its traffic will be appropriately reallocated and it will be punished. Punishments gradually fade and a link can be readmitted to the bond over time. However, punishments increase exponentially if applied more than once within a given window of time.
#### Asymmetric links
In cases where it is necessary to bond physical links that vary radically in terms of cost, throughput, latency, and or reliability, there are a couple of ways to automatically (or manually) allocate traffic among them. Traffic distribution and balancing can be either `packet` or `flow` based. Where packet-based is suitable for protocols not susceptible to reordering penalties and flow-based is suitable for protocols such as TCP where it is desirable to keep a conversation on a single link unless we can't avoid having to re-assign it. Additionally, a *target allocation* of traffic used by the bonding policy can be derived/specified in the following ways:
- **Automatically**: This is the easiest and requires no user configuration. The bonding layer measures and senses the link properties and determines a target allocation based on perceived quality and capacity. Weaker, less reliable links will have less traffic allocated to them and stronger, more reliable links will have more traffic allocated to them. Optionally, the user can specify a set of weights (totaling `1.0`) to inform the bonding layer how important certain link properties are. For instance, one may primarily be concerned with latency and jitter but not total throughput:
```
"balance-aware": {
"quality": {
"lat": 0.3, /* Moving average of latency in milliseconds */
"ltm": 0.2, /* Maximum observed latency in milliseconds */
"pdv": 0.3, /* Packet delay variance in milliseconds. Similar to jitter */
"plr": 0.1, /* Packet loss ratio */
"per": 0.1, /* Packet error ratio */
"avl": 0.0, /* Availability */
}
}
```
In the absence of user guidance ZeroTier will attempt to form an understanding of each link's speed and capacity but this value can be inaccurate if the links are not routinely saturated. Therefore we provide a way to explicitly signal the capacity of each link in terms of arbitrary but relative values:
```
"links": {
"eth0": { "speed": 10000 },
"eth1": { "speed": 1000 },
"eth2": { "speed": 100 }
}
```
The user specifies allocation percentages (totaling `1.0`). In this case quality measurements will only be used to determine a link's eligibility to be a member of a bond, now how much traffic it will carry:
```
"links": {
"eth0": { "alloc": 0.50 },
"eth1": { "alloc": 0.25 },
"eth2": { "alloc": 0.25 }
}
```
#### Performance and overhead considerations
- Only packets with internal IDs divisible by `16` are included in measurements, this amounts to about `6.25%` of all traffic.
- `failoverInterval` specifies how quickly failover should occur during a link failure. In order to accomplish this a combination of active and passive measurement techniques are employed which may result in `VERB_HELLO` probes being sent every `failoverInterval / 4` time units. As a mitigation `monitorStrategy` may be set to `dynamic` so that probe frequency directly correlates with native application traffic.

View File

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -39,6 +39,8 @@
#include "../node/Salsa20.hpp"
#include "../node/Poly1305.hpp"
#include "../node/SHA512.hpp"
#include "../node/Bond.hpp"
#include "../node/Peer.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Thread.hpp"
@ -48,6 +50,7 @@
#include "../osdep/Binder.hpp"
#include "../osdep/ManagedRoute.hpp"
#include "../osdep/BlockingQueue.hpp"
#include "../osdep/Link.hpp"
#include "OneService.hpp"
#include "SoftwareUpdater.hpp"
@ -268,37 +271,43 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer)
pj["paths"] = pa;
}
static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer)
static void _peerBondToJson(nlohmann::json &pj,const ZT_Peer *peer)
{
char tmp[256];
OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address);
pj["aggregateLinkLatency"] = peer->latency;
//pj["aggregateLinkLatency"] = peer->latency;
std::string policyStr = BondController::getPolicyStrByCode(peer->bondingPolicy);
pj["policy"] = policyStr;
nlohmann::json pa = nlohmann::json::array();
for(unsigned int i=0;i<peer->pathCount;++i) {
int64_t lastSend = peer->paths[i].lastSend;
int64_t lastReceive = peer->paths[i].lastReceive;
nlohmann::json j;
j["address"] = reinterpret_cast<const InetAddress *>(&(peer->paths[i].address))->toString(tmp);
j["lastSend"] = (lastSend < 0) ? 0 : lastSend;
j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive;
j["ifname"] = std::string(peer->paths[i].ifname);
j["path"] = reinterpret_cast<const InetAddress *>(&(peer->paths[i].address))->toString(tmp);
j["lastTX"] = (lastSend < 0) ? 0 : lastSend;
j["lastRX"] = (lastReceive < 0) ? 0 : lastReceive;
j["lat"] = peer->paths[i].latencyMean;
j["pdv"] = peer->paths[i].latencyVariance;
//j["trustedPathId"] = peer->paths[i].trustedPathId;
//j["active"] = (bool)(peer->paths[i].expired == 0);
//j["expired"] = (bool)(peer->paths[i].expired != 0);
//j["preferred"] = (bool)(peer->paths[i].preferred != 0);
j["latency"] = peer->paths[i].latency;
j["pdv"] = peer->paths[i].packetDelayVariance;
//j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff;
//j["packetErrorRatio"] = peer->paths[i].packetErrorRatio;
//j["packetLossRatio"] = peer->paths[i].packetLossRatio;
j["stability"] = peer->paths[i].stability;
j["throughput"] = peer->paths[i].throughput;
//j["maxThroughput"] = peer->paths[i].maxThroughput;
j["allocation"] = peer->paths[i].allocation;
j["ifname"] = peer->paths[i].ifname;
//j["ltm"] = peer->paths[i].latencyMax;
//j["plr"] = peer->paths[i].packetLossRatio;
//j["per"] = peer->paths[i].packetErrorRatio;
//j["thr"] = peer->paths[i].throughputMean;
//j["thm"] = peer->paths[i].throughputMax;
//j["thv"] = peer->paths[i].throughputVariance;
//j["avl"] = peer->paths[i].availability;
//j["age"] = peer->paths[i].age;
//j["alloc"] = peer->paths[i].allocation;
//j["ifname"] = peer->paths[i].ifname;
pa.push_back(j);
}
pj["paths"] = pa;
pj["links"] = pa;
}
static void _moonToJson(nlohmann::json &mj,const World &world)
@ -431,7 +440,7 @@ public:
bool _updateAutoApply;
bool _allowTcpFallbackRelay;
bool _allowSecondaryPort;
unsigned int _multipathMode;
unsigned int _primaryPort;
unsigned int _secondaryPort;
unsigned int _tertiaryPort;
@ -724,6 +733,7 @@ public:
}
}
#endif
// Delete legacy iddb.d if present (cleanup)
OSUtils::rmDashRf((_homePath + ZT_PATH_SEPARATOR_S "iddb.d").c_str());
@ -758,7 +768,6 @@ public:
int64_t lastTapMulticastGroupCheck = 0;
int64_t lastBindRefresh = 0;
int64_t lastUpdateCheck = clockShouldBe;
int64_t lastMultipathModeUpdate = 0;
int64_t lastCleanedPeersDb = 0;
int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle
int64_t lastLocalConfFileCheck = OSUtils::now();
@ -804,7 +813,7 @@ public:
}
// Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default)
if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) {
if (((now - lastBindRefresh) >= (_node->bondController()->inUse() ? ZT_BINDER_REFRESH_PERIOD / 4 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) {
lastBindRefresh = now;
unsigned int p[3];
unsigned int pc = 0;
@ -821,11 +830,6 @@ public:
}
}
}
// Update multipath mode (if needed)
if (((now - lastMultipathModeUpdate) >= ZT_BINDER_REFRESH_PERIOD / 8)||(restarted)) {
lastMultipathModeUpdate = now;
_node->setMultipathMode(_multipathMode);
}
// Run background task processor in core if it's time to do so
int64_t dl = _nextBackgroundTaskDeadline;
@ -861,7 +865,7 @@ public:
}
// Sync information about physical network interfaces
if ((now - lastLocalInterfaceAddressCheck) >= (_multipathMode ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) {
if ((now - lastLocalInterfaceAddressCheck) >= (_node->bondController()->inUse() ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) {
lastLocalInterfaceAddressCheck = now;
_node->clearLocalInterfaceAddresses();
@ -875,8 +879,9 @@ public:
#endif
std::vector<InetAddress> boundAddrs(_binder.allBoundLocalInterfaceAddresses());
for(std::vector<InetAddress>::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i)
for(std::vector<InetAddress>::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i) {
_node->addLocalInterfaceAddress(reinterpret_cast<const struct sockaddr_storage *>(&(*i)));
}
}
// Clean peers.d periodically
@ -1225,15 +1230,15 @@ public:
settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff;
settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay);
if (_multipathMode) {
json &multipathConfig = res["multipath"];
if (_node->bondController()->inUse()) {
json &multipathConfig = res["bonds"];
ZT_PeerList *pl = _node->peers();
char peerAddrStr[256];
if (pl) {
for(unsigned long i=0;i<pl->peerCount;++i) {
if (pl->peers[i].hadAggregateLink) {
if (pl->peers[i].isBonded) {
nlohmann::json pj;
_peerAggregateLinkToJson(pj,&(pl->peers[i]));
_peerBondToJson(pj,&(pl->peers[i]));
OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address);
multipathConfig[peerAddrStr] = (pj);
}
@ -1362,8 +1367,8 @@ public:
if (j.is_object()) {
seed = Utils::hexStrToU64(OSUtils::jsonString(j["seed"],"0").c_str());
}
} catch (std::exception &exc) {
} catch ( ... ) {
// discard invalid JSON
}
std::vector<World> moons(_node->moons());
@ -1412,8 +1417,8 @@ public:
json &allowDefault = j["allowDefault"];
if (allowDefault.is_boolean()) localSettings.allowDefault = (bool)allowDefault;
}
} catch (std::exception &exc) {
} catch ( ... ) {
// discard invalid JSON
}
setNetworkSettings(nws->networks[i].nwid,localSettings);
@ -1567,19 +1572,143 @@ public:
json &settings = lc["settings"];
if (!_node->bondController()->inUse()) {
// defaultBondingPolicy
std::string defaultBondingPolicyStr(OSUtils::jsonString(settings["defaultBondingPolicy"],""));
int defaultBondingPolicy = _node->bondController()->getPolicyCodeByStr(defaultBondingPolicyStr);
_node->bondController()->setBondingLayerDefaultPolicy(defaultBondingPolicy);
_node->bondController()->setBondingLayerDefaultPolicyStr(defaultBondingPolicyStr); // Used if custom policy
// Custom Policies
json &customBondingPolicies = settings["policies"];
for (json::iterator policyItr = customBondingPolicies.begin(); policyItr != customBondingPolicies.end();++policyItr) {
//fprintf(stderr, "\n\n--- (%s)\n", policyItr.key().c_str());
// Custom Policy
std::string customPolicyStr(policyItr.key());
json &customPolicy = policyItr.value();
std::string basePolicyStr(OSUtils::jsonString(customPolicy["basePolicy"],""));
if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BONDING_POLICY_NONE) {
fprintf(stderr, "error: custom policy (%s) is invalid, unknown base policy (%s).\n",
customPolicyStr.c_str(), basePolicyStr.c_str());
continue;
} if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BONDING_POLICY_NONE) {
fprintf(stderr, "error: custom policy (%s) will be ignored, cannot use standard policy names for custom policies.\n",
customPolicyStr.c_str());
continue;
}
// New bond, used as a copy template for new instances
SharedPtr<Bond> newTemplateBond = new Bond(NULL, basePolicyStr, customPolicyStr, SharedPtr<Peer>());
// Acceptable ranges
newTemplateBond->setMaxAcceptableLatency(OSUtils::jsonInt(customPolicy["maxAcceptableLatency"],-1));
newTemplateBond->setMaxAcceptableMeanLatency(OSUtils::jsonInt(customPolicy["maxAcceptableMeanLatency"],-1));
newTemplateBond->setMaxAcceptablePacketDelayVariance(OSUtils::jsonInt(customPolicy["maxAcceptablePacketDelayVariance"],-1));
newTemplateBond->setMaxAcceptablePacketLossRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketLossRatio"],-1));
newTemplateBond->setMaxAcceptablePacketErrorRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketErrorRatio"],-1));
newTemplateBond->setMinAcceptableAllocation((float)OSUtils::jsonDouble(customPolicy["minAcceptableAllocation"],0));
// Quality weights
json &qualityWeights = customPolicy["qualityWeights"];
if (qualityWeights.size() == ZT_QOS_WEIGHT_SIZE) { // TODO: Generalize this
float weights[ZT_QOS_WEIGHT_SIZE];
weights[ZT_QOS_LAT_IDX] = (float)OSUtils::jsonDouble(qualityWeights["lat"],0.0);
weights[ZT_QOS_LTM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["ltm"],0.0);
weights[ZT_QOS_PDV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["pdv"],0.0);
weights[ZT_QOS_PLR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["plr"],0.0);
weights[ZT_QOS_PER_IDX] = (float)OSUtils::jsonDouble(qualityWeights["per"],0.0);
weights[ZT_QOS_THR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thr"],0.0);
weights[ZT_QOS_THM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thm"],0.0);
weights[ZT_QOS_THV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thv"],0.0);
newTemplateBond->setUserQualityWeights(weights,ZT_QOS_WEIGHT_SIZE);
}
// Bond-specific properties
newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1));
newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1));
newTemplateBond->setFlowRebalanceStrategy(OSUtils::jsonInt(customPolicy["flowRebalanceStrategy"],(uint64_t)0));
newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0));
newTemplateBond->setPacketsPerLink(OSUtils::jsonInt(customPolicy["packetsPerLink"],-1));
std::string linkMonitorStrategyStr(OSUtils::jsonString(customPolicy["linkMonitorStrategy"],""));
uint8_t linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT;
if (linkMonitorStrategyStr == "passive") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE; }
if (linkMonitorStrategyStr == "active") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE; }
if (linkMonitorStrategyStr == "dynamic") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; }
newTemplateBond->setLinkMonitorStrategy(linkMonitorStrategy);
// Policy-Specific link set
json &links = customPolicy["links"];
for (json::iterator linkItr = links.begin(); linkItr != links.end();++linkItr) {
//fprintf(stderr, "\t--- link (%s)\n", linkItr.key().c_str());
std::string linkNameStr(linkItr.key());
json &link = linkItr.value();
bool enabled = OSUtils::jsonInt(link["enabled"],true);
uint32_t speed = OSUtils::jsonInt(link["speed"],0);
float alloc = (float)OSUtils::jsonDouble(link["alloc"],0);
if (speed && alloc) {
fprintf(stderr, "error: cannot specify both speed (%d) and alloc (%f) for link (%s), pick one, link disabled.\n",
speed, alloc, linkNameStr.c_str());
enabled = false;
}
uint32_t upDelay = OSUtils::jsonInt(link["upDelay"],-1);
uint32_t downDelay = OSUtils::jsonInt(link["downDelay"],-1);
uint8_t ipvPref = OSUtils::jsonInt(link["ipvPref"],0);
uint32_t linkMonitorInterval = OSUtils::jsonInt(link["monitorInterval"],(uint64_t)0);
std::string failoverToStr(OSUtils::jsonString(link["failoverTo"],""));
// Mode
std::string linkModeStr(OSUtils::jsonString(link["mode"],"spare"));
uint8_t linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE;
if (linkModeStr == "primary") { linkMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; }
if (linkModeStr == "spare") { linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; }
// ipvPref
if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) {
fprintf(stderr, "error: invalid ipvPref value (%d), link disabled.\n", ipvPref);
enabled = false;
}
if (linkMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) {
fprintf(stderr, "error: cannot specify failover links for spares, link disabled.\n");
failoverToStr = "";
enabled = false;
}
_node->bondController()->addCustomLink(customPolicyStr, new Link(linkNameStr,ipvPref,speed,linkMonitorInterval,upDelay,downDelay,enabled,linkMode,failoverToStr,alloc));
}
// TODO: This is dumb
std::string linkSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize"));
if (linkSelectMethodStr == "always") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); }
if (linkSelectMethodStr == "better") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); }
if (linkSelectMethodStr == "failure") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); }
if (linkSelectMethodStr == "optimize") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); }
if (newTemplateBond->getLinkSelectMethod() < 0 || newTemplateBond->getLinkSelectMethod() > 3) {
fprintf(stderr, "warning: invalid value (%s) for linkSelectMethod, assuming mode: always\n", linkSelectMethodStr.c_str());
}
/*
newBond->setPolicy(_node->bondController()->getPolicyCodeByStr(basePolicyStr));
newBond->setFlowHashing((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowFlowHashing"],(bool)allowFlowHashing));
newBond->setBondMonitorInterval((unsigned int)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["monitorInterval"],(uint64_t)0));
newBond->setAllowPathNegotiation((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowPathNegotiation"],(bool)false));
*/
if (!_node->bondController()->addCustomPolicy(newTemplateBond)) {
fprintf(stderr, "error: a custom policy of this name (%s) already exists.\n", customPolicyStr.c_str());
}
}
// Peer-specific bonding
json &peerSpecificBonds = settings["peerSpecificBonds"];
for (json::iterator peerItr = peerSpecificBonds.begin(); peerItr != peerSpecificBonds.end();++peerItr) {
_node->bondController()->assignBondingPolicyToPeer(std::stoull(peerItr.key(),0,16), peerItr.value());
}
// Check settings
if (defaultBondingPolicyStr.length() && !defaultBondingPolicy && !_node->bondController()->inUse()) {
fprintf(stderr, "error: unknown policy (%s) specified by defaultBondingPolicy, link disabled.\n", defaultBondingPolicyStr.c_str());
}
}
// bondingPolicy cannot be used with allowTcpFallbackRelay
_allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !(_node->bondController()->inUse());
_primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff;
_allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true);
_allowSecondaryPort = OSUtils::jsonBool(settings["allowSecondaryPort"],true);
_secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0);
_tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0);
if (_secondaryPort != 0 || _tertiaryPort != 0) {
fprintf(stderr,"WARNING: using manually-specified ports. This can cause NAT issues." ZT_EOL_S);
}
_multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0);
if (_multipathMode != 0 && _allowTcpFallbackRelay) {
fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay" ZT_EOL_S);
_allowTcpFallbackRelay = false;
}
_portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true);
#ifndef ZT_SDK
@ -1724,9 +1853,8 @@ public:
}
}
#ifdef __SYNOLOGY__
if (!n.tap->addIps(newManagedIps)) {
if (!n.tap->addIpSyn(newManagedIps))
fprintf(stderr,"ERROR: unable to add ip addresses to ifcfg" ZT_EOL_S);
}
#else
for(std::vector<InetAddress>::iterator ip(newManagedIps.begin());ip!=newManagedIps.end();++ip) {
if (std::find(n.managedIps.begin(),n.managedIps.end(),*ip) == n.managedIps.end()) {
@ -2044,8 +2172,6 @@ public:
return;
}
} catch (std::exception &exc) {
_phy.close(sock);
} catch ( ... ) {
_phy.close(sock);
}
@ -2154,8 +2280,6 @@ public:
#endif
_nets.erase(nwid);
return -999;
} catch (int exc) {
return -999;
} catch ( ... ) {
return -999; // tap init failed
}
@ -2762,6 +2886,7 @@ public:
if (!strncmp(p->c_str(),ifname,p->length()))
return false;
}
return _node->bondController()->allowedToBind(std::string(ifname));
}
{
// Check global blacklists