diff --git a/CMakeLists.txt b/CMakeLists.txt index ffdf598b9..307a48458 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,6 +121,7 @@ if ( message("Adding SSE and AES-NI flags for processor ${CMAKE_SYSTEM_PROCESSOR}") add_compile_options( -maes + -mmmx -mrdrnd -mpclmul -msse diff --git a/node/AES-aesni.c b/node/AES-aesni.c index a5fc81e6d..d89765773 100644 --- a/node/AES-aesni.c +++ b/node/AES-aesni.c @@ -19,20 +19,14 @@ #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) #include -#include -#include -#include #include #include #include -#include -/* #define register */ +#define ZT_AES_CTR_AESNI_ROUND(kk) c0 = _mm_aesenc_si128(c0,kk); c1 = _mm_aesenc_si128(c1,kk); c2 = _mm_aesenc_si128(c2,kk); c3 = _mm_aesenc_si128(c3,kk); void zt_crypt_ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t *in,unsigned int len,uint8_t *out) { - _mm_prefetch(in,_MM_HINT_NTA); - /* Because our CTR supports full 128-bit nonces, we must do a full 128-bit (big-endian) * increment to be compatible with canonical NIST-certified CTR implementations. That's * because it's possible to have a lot of bit saturation in the least significant 64 @@ -55,82 +49,61 @@ void zt_crypt_ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t register __m128i ctr3 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 3ULL),3LL)),swap128); ctr0 = _mm_shuffle_epi8(ctr0,swap128); + register __m128i k0 = key[0]; + register __m128i k1 = key[1]; + while (len >= 64) { - _mm_prefetch(in + 64,_MM_HINT_NTA); - register __m128i ka = key[0]; - register __m128i c0 = _mm_xor_si128(ctr0,ka); + register __m128i ka = key[2]; + register __m128i c0 = _mm_xor_si128(ctr0,k0); + register __m128i c1 = _mm_xor_si128(ctr1,k0); + register __m128i c2 = _mm_xor_si128(ctr2,k0); + register __m128i c3 = _mm_xor_si128(ctr3,k0); ctr0 = _mm_shuffle_epi8(ctr0,swap128); notctr0msq = ~((uint64_t)_mm_extract_epi64(ctr0,0)); - register __m128i c1 = _mm_xor_si128(ctr1,ka); - register __m128i c2 = _mm_xor_si128(ctr2,ka); - register __m128i c3 = _mm_xor_si128(ctr3,ka); - register __m128i kb = key[1]; ctr1 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 5ULL),5LL)),swap128); ctr2 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 6ULL),6LL)),swap128); - register __m128i kc = key[2]; ctr3 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 7ULL),7LL)),swap128); ctr0 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 4ULL),4LL)),swap128); - register __m128i kd = key[3]; -#define ZT_AES_CTR_AESNI_ROUND(kk) \ - c0 = _mm_aesenc_si128(c0,kk); \ - c1 = _mm_aesenc_si128(c1,kk); \ - c2 = _mm_aesenc_si128(c2,kk); \ - c3 = _mm_aesenc_si128(c3,kk); - ka = key[4]; - ZT_AES_CTR_AESNI_ROUND(kb); - kb = key[5]; - ZT_AES_CTR_AESNI_ROUND(kc); - kc = key[6]; - ZT_AES_CTR_AESNI_ROUND(kd); - kd = key[7]; + register __m128i kb = key[3]; + ZT_AES_CTR_AESNI_ROUND(k1); + register __m128i kc = key[4]; ZT_AES_CTR_AESNI_ROUND(ka); - ka = key[8]; + register __m128i kd = key[5]; ZT_AES_CTR_AESNI_ROUND(kb); - kb = key[9]; + ka = key[6]; ZT_AES_CTR_AESNI_ROUND(kc); - kc = key[10]; + kb = key[7]; ZT_AES_CTR_AESNI_ROUND(kd); - kd = key[11]; + kc = key[8]; ZT_AES_CTR_AESNI_ROUND(ka); - ka = key[12]; + kd = key[9]; ZT_AES_CTR_AESNI_ROUND(kb); - kb = key[13]; + ka = key[10]; ZT_AES_CTR_AESNI_ROUND(kc); - kc = key[14]; + kb = key[11]; ZT_AES_CTR_AESNI_ROUND(kd); + kc = key[12]; ZT_AES_CTR_AESNI_ROUND(ka); + kd = key[13]; ZT_AES_CTR_AESNI_ROUND(kb); -#undef ZT_AES_CTR_AESNI_ROUND - register __m128i d0 = _mm_loadu_si128((const __m128i *)in); - register __m128i d1 = _mm_loadu_si128((const __m128i *)(in + 16)); - register __m128i d2 = _mm_loadu_si128((const __m128i *)(in + 32)); - register __m128i d3 = _mm_loadu_si128((const __m128i *)(in + 48)); - c0 = _mm_aesenclast_si128(c0,kc); - c1 = _mm_aesenclast_si128(c1,kc); - c2 = _mm_aesenclast_si128(c2,kc); - c3 = _mm_aesenclast_si128(c3,kc); - d0 = _mm_xor_si128(d0,c0); - d1 = _mm_xor_si128(d1,c1); - d2 = _mm_xor_si128(d2,c2); - d3 = _mm_xor_si128(d3,c3); - _mm_storeu_si128((__m128i *)out,d0); - _mm_storeu_si128((__m128i *)(out + 16),d1); - _mm_storeu_si128((__m128i *)(out + 32),d2); - _mm_storeu_si128((__m128i *)(out + 48),d3); + ka = key[14]; + ZT_AES_CTR_AESNI_ROUND(kc); + ZT_AES_CTR_AESNI_ROUND(kd); + _mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,ka))); + _mm_storeu_si128((__m128i *)(out + 16),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 16)),_mm_aesenclast_si128(c1,ka))); + _mm_storeu_si128((__m128i *)(out + 32),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 32)),_mm_aesenclast_si128(c2,ka))); + _mm_storeu_si128((__m128i *)(out + 48),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 48)),_mm_aesenclast_si128(c3,ka))); in += 64; out += 64; len -= 64; } - register __m128i k0 = key[0]; - register __m128i k1 = key[1]; register __m128i k2 = key[2]; register __m128i k3 = key[3]; register __m128i k4 = key[4]; register __m128i k5 = key[5]; register __m128i k6 = key[6]; register __m128i k7 = key[7]; - /* not enough XMM registers for all of them, but it helps slightly... */ while (len >= 16) { register __m128i c0 = _mm_xor_si128(ctr0,k0); diff --git a/node/AES.hpp b/node/AES.hpp index a4a35ea03..15f8077c9 100644 --- a/node/AES.hpp +++ b/node/AES.hpp @@ -23,7 +23,6 @@ #include #include #include -#include #define ZT_AES_AESNI 1 diff --git a/node/Constants.hpp b/node/Constants.hpp index 29deb8ff5..a13b58856 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -25,13 +25,6 @@ #define ZEROTIER_ONE_VERSION_BUILD 255 #endif -#ifndef ZT_BUILD_ARCHITECTURE -#define ZT_BUILD_ARCHITECTURE 0 -#endif -#ifndef ZT_BUILD_PLATFORM -#define ZT_BUILD_PLATFORM 0 -#endif - // // This include file also auto-detects and canonicalizes some environment // information defines: @@ -106,7 +99,7 @@ #ifdef __NetBSD__ #ifndef RTF_MULTICAST -#define RTF_MULTICAST 0x20000000 +#define RTF_MULTICAST 0x20000000 #endif #endif @@ -126,6 +119,9 @@ #define __LITTLE_ENDIAN 1234 #define __BYTE_ORDER 1234 #endif +#ifndef __BYTE_ORDER +#include +#endif #ifdef __WINDOWS__ #define ZT_PATH_SEPARATOR '\\' @@ -137,10 +133,6 @@ #define ZT_EOL_S "\n" #endif -#ifndef __BYTE_ORDER -#include -#endif - #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) #define ZT_ALWAYS_INLINE inline __attribute__((always_inline)) #ifndef likely @@ -516,7 +508,7 @@ #define ZT_MAX_BRIDGE_SPAM 32 /** - * Interval between direct path pushes in milliseconds + * Interval between direct path pushes in milliseconds if we don't have a path */ #define ZT_DIRECT_PATH_PUSH_INTERVAL 15000 @@ -562,7 +554,7 @@ /** * General rate limit for other kinds of rate-limited packets (HELLO, credential request, etc.) both inbound and outbound */ -#define ZT_PEER_GENERAL_RATE_LIMIT 1000 +#define ZT_PEER_GENERAL_RATE_LIMIT 500 /** * Don't do expensive identity validation more often than this @@ -584,11 +576,6 @@ #endif #endif -/** - * How long is a path or peer considered to have a trust relationship with us (for e.g. relay policy) since last trusted established packet? - */ -#define ZT_TRUST_EXPIRATION 600000 - /** * Size of a buffer to store either a C25519 or an ECC P-384 signature * diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index cf411ec69..4490af56a 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -11,12 +11,6 @@ */ /****/ -#include -#include -#include - -#include - #include "../include/ZeroTierOne.h" #include "Constants.hpp" @@ -35,6 +29,12 @@ #include "Revocation.hpp" #include "Trace.hpp" +#include +#include +#include + +#include + namespace ZeroTier { bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) diff --git a/node/Root.hpp b/node/Root.hpp index 8173a3905..c526007da 100644 --- a/node/Root.hpp +++ b/node/Root.hpp @@ -42,7 +42,7 @@ namespace ZeroTier { class Root { public: - inline Root() : _dnsPublicKeySize(0) {} + ZT_ALWAYS_INLINE Root() : _dnsPublicKeySize(0) {} /** * Create a new root entry @@ -54,7 +54,7 @@ public: * @param dflAddrs Default IP addresses if DNS is not available */ template - inline Root(S dn,const uint8_t *const dnspk,const unsigned int dnspksize,const Identity &dflId,const std::vector &dflAddrs) : + ZT_ALWAYS_INLINE Root(S dn,const uint8_t *const dnspk,const unsigned int dnspksize,const Identity &dflId,const std::vector &dflAddrs) : _defaultIdentity(dflId), _defaultAddresses(dflAddrs), _dnsName(dn), @@ -70,7 +70,7 @@ public: /** * @return Current identity (either default or latest locator) */ - inline const Identity id() const + ZT_ALWAYS_INLINE const Identity id() const { if (_lastFetchedLocator.id()) return _lastFetchedLocator.id(); @@ -81,7 +81,7 @@ public: * @param id Identity to check * @return True if identity equals this root's current identity */ - inline bool is(const Identity &id) const + ZT_ALWAYS_INLINE bool is(const Identity &id) const { return ((_lastFetchedLocator.id()) ? (id == _lastFetchedLocator.id()) : (id == _defaultIdentity)); } @@ -89,7 +89,7 @@ public: /** * @return Current ZeroTier address (either default or latest locator) */ - inline const Address address() const + ZT_ALWAYS_INLINE const Address address() const { if (_lastFetchedLocator.id()) return _lastFetchedLocator.id().address(); @@ -99,31 +99,22 @@ public: /** * @return DNS name for this root or empty string if static entry with no DNS */ - inline const Str dnsName() const - { - return _dnsName; - } + ZT_ALWAYS_INLINE const Str dnsName() const { return _dnsName; } /** * @return Latest locator or NIL locator object if none */ - inline Locator locator() const - { - return _lastFetchedLocator; - } + ZT_ALWAYS_INLINE Locator locator() const { return _lastFetchedLocator; } /** * @return Timestamp of latest retrieved locator or 0 if none */ - inline int64_t locatorTimestamp() const - { - return _lastFetchedLocator.timestamp(); - } + ZT_ALWAYS_INLINE int64_t locatorTimestamp() const { return _lastFetchedLocator.timestamp(); } /** * Update locator, returning true if new locator is valid and newer than existing */ - inline bool updateLocator(const Locator &loc) + ZT_ALWAYS_INLINE bool updateLocator(const Locator &loc) { if (!loc.verify()) return false; @@ -138,7 +129,7 @@ public: * Update this root's locator from a series of TXT records */ template - inline bool updateLocatorFromTxt(I start,I end) + ZT_ALWAYS_INLINE bool updateLocatorFromTxt(I start,I end) { try { if (_dnsPublicKeySize != ZT_ECC384_PUBLIC_KEY_SIZE) @@ -161,7 +152,7 @@ public: * @param addressFamily AF_INET or AF_INET6 * @return Address or InetAddress::NIL if no addresses exist for the given family */ - inline const InetAddress &pickPhysical(const int addressFamily) const + ZT_ALWAYS_INLINE const InetAddress &pickPhysical(const int addressFamily) const { std::vector pickList; const std::vector *const av = (_lastFetchedLocator) ? &(_lastFetchedLocator.phy()) : &_defaultAddresses; diff --git a/node/RuntimeEnvironment.hpp b/node/RuntimeEnvironment.hpp index 07ab43331..3829e4620 100644 --- a/node/RuntimeEnvironment.hpp +++ b/node/RuntimeEnvironment.hpp @@ -37,7 +37,7 @@ class Trace; class RuntimeEnvironment { public: - RuntimeEnvironment(Node *n) : + ZT_ALWAYS_INLINE RuntimeEnvironment(Node *n) : node(n) ,localNetworkController((NetworkController *)0) ,rtmem((void *)0) @@ -50,10 +50,7 @@ public: secretIdentityStr[0] = (char)0; } - ~RuntimeEnvironment() - { - Utils::burn(secretIdentityStr,sizeof(secretIdentityStr)); - } + ZT_ALWAYS_INLINE ~RuntimeEnvironment() { Utils::burn(secretIdentityStr,sizeof(secretIdentityStr)); } // Node instance that owns this RuntimeEnvironment Node *const node; diff --git a/node/Switch.cpp b/node/Switch.cpp index c34fc8e0b..1b76bf082 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -35,7 +35,6 @@ namespace ZeroTier { Switch::Switch(const RuntimeEnvironment *renv) : RR(renv), - _lastBeaconResponse(0), _lastCheckedQueues(0), _lastUniteAttempt(8) // only really used on root servers and upstreams, and it'll grow there just fine { diff --git a/node/Switch.hpp b/node/Switch.hpp index b3a4b0b41..66ea340d8 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -202,7 +202,6 @@ private: bool _trySend(void *tPtr,Packet &packet,bool encrypt); // packet is modified if return is true const RuntimeEnvironment *const RR; - int64_t _lastBeaconResponse; volatile int64_t _lastCheckedQueues; // Time we last sent a WHOIS request for each address diff --git a/node/Utils.cpp b/node/Utils.cpp index 34690decc..e5c506c9d 100644 --- a/node/Utils.cpp +++ b/node/Utils.cpp @@ -43,6 +43,7 @@ namespace ZeroTier { #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) +#include static bool _zt_rdrand_supported() { #ifdef __WINDOWS__