diff --git a/node/Constants.hpp b/node/Constants.hpp index 20da46c1a..2b9580889 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -179,7 +179,7 @@ error_no_byte_order_defined; #define ZT_MAC_FIRST_OCTET 0x32 /** - * Length of secret key in bytes + * Length of secret key in bytes -- 256-bit for Salsa20 */ #define ZT_PEER_SECRET_KEY_LENGTH 32 @@ -189,17 +189,17 @@ error_no_byte_order_defined; #define ZT_DB_CLEAN_PERIOD 300000 /** - * How long to remember peers in RAM if they haven't been used + * How long to remember peer records in RAM if they haven't been used */ #define ZT_PEER_IN_MEMORY_EXPIRATION 600000 /** * Delay between WHOIS retries in ms */ -#define ZT_WHOIS_RETRY_DELAY 350 +#define ZT_WHOIS_RETRY_DELAY 500 /** - * Maximum identity WHOIS retries + * Maximum identity WHOIS retries (each attempt tries consulting a different peer) */ #define ZT_MAX_WHOIS_RETRIES 3 @@ -214,7 +214,7 @@ error_no_byte_order_defined; #define ZT_RECEIVE_QUEUE_TIMEOUT (ZT_WHOIS_RETRY_DELAY * (ZT_MAX_WHOIS_RETRIES + 1)) /** - * Maximum number of ZT hops allowed + * Maximum number of ZT hops allowed (this is not IP hops/TTL) * * The protocol allows up to 7, but we limit it to something smaller. */ @@ -248,7 +248,7 @@ error_no_byte_order_defined; #define ZT_MULTICAST_LIKE_EXPIRE 120000 /** - * Time between polls of local taps for multicast membership changes + * Time between polls of local tap devices for multicast membership changes */ #define ZT_MULTICAST_LOCAL_POLL_PERIOD 10000 @@ -275,6 +275,17 @@ error_no_byte_order_defined; */ #define ZT_FIREWALL_OPENER_DELAY 30000 +/** + * Number of hops to open via firewall opener packets + * + * The firewall opener code iterates from 1 to this value (inclusive), sending + * a tiny packet with each TTL value. + * + * 2 should permit traversal of double-NAT configurations, such as from inside + * a VM running behind local NAT on a host that is itself behind NAT. + */ +#define ZT_FIREWALL_OPENER_HOPS 2 + /** * Delay between requests for updated network autoconf information */ @@ -291,14 +302,12 @@ error_no_byte_order_defined; #define ZT_PING_UNANSWERED_AFTER 1500 /** - * Try to ping supernodes this often until we get something from somewhere + * Try to ping supernodes this often until we get something from them */ #define ZT_STARTUP_AGGRO (ZT_PING_UNANSWERED_AFTER * 2) /** - * Maximum delay between runs of the main service loop - * - * This is the shortest of the check delays/periods. + * Maximum delay between runs of the main loop in Node.cpp */ #define ZT_MAX_SERVICE_LOOP_INTERVAL ZT_STARTUP_AGGRO @@ -308,28 +317,26 @@ error_no_byte_order_defined; #define ZT_TCP_TUNNEL_FAILOVER_TIMEOUT (ZT_STARTUP_AGGRO * 5) /** - * Path activity timeout + * Path activity timeout (for non-fixed paths) */ #define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + ZT_PING_CHECK_DELAY) /** - * Close TCP tunnels if unused for this long (used in SocketManager) + * Close TCP sockets if unused for this long (SocketManager) */ #define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT /** - * Stop relaying via peers that have not responded to direct sends in this long + * Stop relaying via peers that have not responded to direct sends + * + * When we send something (including frames), we generally expect a response. + * Switching relays if no response in a short period of time causes more + * rapid failover if a supernode goes down or becomes unreachable. In the + * mistaken case, little harm is done as it'll pick the next-fastest + * supernode and will switch back eventually. */ #define ZT_PEER_RELAY_CONVERSATION_LATENCY_THRESHOLD 10000 -/** - * IP hops (a.k.a. TTL) to set for firewall opener packets - * - * 2 should permit traversal of double-NAT configurations, such as from inside - * a VM running behind local NAT on a host that is itself behind NAT. - */ -#define ZT_FIREWALL_OPENER_HOPS 2 - /** * Delay sleep overshoot for detection of a probable sleep/wake event */ @@ -342,6 +349,10 @@ error_no_byte_order_defined; /** * Minimum interval between attempts by relays to unite peers + * + * When a relay gets a packet destined for another peer, it sends both peers + * a RENDEZVOUS message no more than this often. This instructs the peers + * to attempt NAT-t and gives each the other's corresponding IP:port pair. */ #define ZT_MIN_UNITE_INTERVAL 30000 @@ -361,7 +372,7 @@ error_no_byte_order_defined; #define ZT_UPDATE_MAX_INTERVAL 7200000 /** - * Update HTTP timeout in seconds + * Software update HTTP timeout in seconds */ #define ZT_UPDATE_HTTP_TIMEOUT 30 diff --git a/node/Peer.cpp b/node/Peer.cpp index 24e3c869f..86a0da0fd 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -121,18 +121,23 @@ bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,u { Mutex::Lock _l(_lock); + /* For sending ordinary packets, paths are divided into two categories: + * "normal" and "TCP out." Normal includes UDP and incoming TCP. We want + * to treat outbound TCP differently since if we use it it may end up + * overriding UDP and UDP performs much better. We only want to initiate + * TCP if it looks like UDP isn't available. */ Path *bestNormalPath = (Path *)0; Path *bestTcpOutPath = (Path *)0; uint64_t bestNormalPathLastReceived = 0; uint64_t bestTcpOutPathLastReceived = 0; for(std::vector::iterator p(_paths.begin());p!=_paths.end();++p) { uint64_t lr = p->lastReceived(); - if (p->type() == Path::PATH_TYPE_TCP_OUT) { // TCP_OUT paths initiate TCP connections + if (p->type() == Path::PATH_TYPE_TCP_OUT) { if (lr >= bestTcpOutPathLastReceived) { bestTcpOutPathLastReceived = lr; bestTcpOutPath = &(*p); } - } else { // paths other than TCP_OUT are considered "normal" + } else { if (lr >= bestNormalPathLastReceived) { bestNormalPathLastReceived = lr; bestNormalPath = &(*p); @@ -141,20 +146,23 @@ bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,u } Path *bestPath = (Path *)0; - if (!_r->tcpTunnelingEnabled) { // TCP tunneling master switch is off, use normal path + if (bestTcpOutPath) { // we have a TCP out path + if (bestNormalPath) { // we have both paths, decide which to use + if (_r->tcpTunnelingEnabled) { // TCP tunneling is enabled, so use normal path only if it looks alive + if ((bestNormalPathLastReceived > _r->timeOfLastResynchronize)&&((now - bestNormalPathLastReceived) < ZT_PEER_PATH_ACTIVITY_TIMEOUT)) + bestPath = bestNormalPath; + else bestPath = bestTcpOutPath; + } else { // TCP tunneling is disabled, use normal path + bestPath = bestNormalPath; + } + } else { // we only have a TCP_OUT path, so use it regardless + bestPath = bestTcpOutPath; + } + } else { // we only have a normal path (or none at all, that case is caught below) bestPath = bestNormalPath; - } else if (bestNormalPath) { // we have a normal path, so use if it looks active - if ((bestNormalPathLastReceived > _r->timeOfLastResynchronize)&&((now - bestNormalPathLastReceived) < ZT_PEER_PATH_ACTIVITY_TIMEOUT)) - bestPath = bestNormalPath; - else bestPath = bestTcpOutPath; - } else { // no normal path available - bestPath = bestTcpOutPath; } - if (!bestPath) - return false; - - if (_r->sm->send(bestPath->address(),bestPath->tcp(),bestPath->type() == Path::PATH_TYPE_TCP_OUT,data,len)) { + if ((bestPath)&&(_r->sm->send(bestPath->address(),bestPath->tcp(),bestPath->type() == Path::PATH_TYPE_TCP_OUT,data,len))) { bestPath->sent(now); return true; } @@ -167,8 +175,10 @@ bool Peer::sendFirewallOpener(const RuntimeEnvironment *_r,uint64_t now) Mutex::Lock _l(_lock); for(std::vector::iterator p(_paths.begin());p!=_paths.end();++p) { - if (!p->tcp()) - sent |= _r->sm->sendFirewallOpener(p->address(),ZT_FIREWALL_OPENER_HOPS); + if (p->type() == Path::PATH_TYPE_UDP) { + for(unsigned int h=1;h<=ZT_FIREWALL_OPENER_HOPS;++h) + sent |= _r->sm->sendFirewallOpener(p->address(),h); + } } return sent; @@ -180,23 +190,26 @@ bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now) SharedPtr self(this); Mutex::Lock _l(_lock); - uint64_t lastUdpPingSent = 0; - uint64_t lastUdpReceive = 0; - bool haveUdp = false; + /* Ping (and thus open) outbound TCP connections if we have no other options + * or if the TCP tunneling master switch is enabled and pings have been + * unanswered for ZT_TCP_TUNNEL_FAILOVER_TIMEOUT ms over normal channels. */ + uint64_t lastNormalPingSent = 0; + uint64_t lastNormalReceive = 0; + bool haveNormal = false; for(std::vector::const_iterator p(_paths.begin());p!=_paths.end();++p) { - if (p->type() == Path::PATH_TYPE_UDP) { - lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing()); - lastUdpReceive = std::max(lastUdpReceive,p->lastReceived()); - haveUdp = true; + if (p->type() != Path::PATH_TYPE_TCP_OUT) { + lastNormalPingSent = std::max(lastNormalPingSent,p->lastPing()); + lastNormalReceive = std::max(lastNormalReceive,p->lastReceived()); + haveNormal = true; } } - bool useTcpOut = ( (!haveUdp) || ( (_r->tcpTunnelingEnabled) && (lastUdpPingSent > lastUdpReceive) && ((now - lastUdpReceive) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) ) ); + const bool useTcpOut = ( (!haveNormal) || ( (_r->tcpTunnelingEnabled) && (lastNormalPingSent > _r->timeOfLastResynchronize) && (lastNormalPingSent > lastNormalReceive) && ((lastNormalPingSent - lastNormalReceive) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) ) ); TRACE("PING %s (useTcpOut==%d)",_id.address().toString().c_str(),(int)useTcpOut); for(std::vector::iterator p(_paths.begin());p!=_paths.end();++p) { if ((useTcpOut)||(p->type() != Path::PATH_TYPE_TCP_OUT)) { - p->pinged(now); // we log pings sent even if the send "fails", since what we want to track is when we last tried to ping + p->pinged(now); // attempts to ping are logged whether they look successful or not if (_r->sw->sendHELLO(self,*p)) { p->sent(now); sent = true; @@ -212,7 +225,7 @@ void Peer::clean(uint64_t now) Mutex::Lock _l(_lock); unsigned long i = 0,o = 0,l = (unsigned long)_paths.size(); while (i != l) { - if (_paths[i].active(now)) + if (_paths[i].active(now)) // active includes fixed _paths[o++] = _paths[i]; ++i; } diff --git a/node/Peer.hpp b/node/Peer.hpp index 9baafb62f..3a5316d45 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -80,20 +80,14 @@ public: /** * @return Time peer record was last used in any way */ - inline uint64_t lastUsed() const - throw() - { - return _lastUsed; - } + inline uint64_t lastUsed() const throw() { return _lastUsed; } /** + * Log a use of this peer record (done by Topology when peers are looked up) + * * @param now New time of last use */ - inline void use(uint64_t now) - throw() - { - _lastUsed = now; - } + inline void use(uint64_t now) throw() { _lastUsed = now; } /** * @return This peer's ZT address (short for identity().address()) @@ -106,7 +100,10 @@ public: inline const Identity &identity() const throw() { return _id; } /** - * Must be called on authenticated packet receive from this peer + * Log receipt of an authenticated packet + * + * This is called by the decode pipe when a packet is proven to be authentic + * and appears to be valid. * * @param _r Runtime environment * @param fromSock Socket from which packet was received @@ -130,10 +127,10 @@ public: uint64_t now); /** - * Send a packet to this peer using the most recently active direct path + * Send a packet directly to this peer * - * This does not relay. It returns false if there are no available active - * paths. + * This sends only via direct paths if available and does not handle + * finding of relays. That is done in the send logic in Switch. * * @param _r Runtime environment * @param data Data to send @@ -370,11 +367,6 @@ public: return std::string("?.?.?"); } - /** - * @return True if this Peer is initialized with something - */ - inline operator bool() const throw() { return (_id); } - /** * Get most recently active UDP path addresses for IPv4 and/or IPv6 *