Optimization stuff

This commit is contained in:
Adam Ierymenko 2019-08-26 18:15:32 -07:00
parent 990333e7ec
commit b9ef09dd58
No known key found for this signature in database
GPG Key ID: C8877CF2D7A5D7F3
3 changed files with 97 additions and 63 deletions

View File

@ -69,7 +69,7 @@ static bool _zt_aesni_supported()
return ((ecx & (1 << 25)) != 0);
#endif
}
const bool AES::HW_ACCEL = _zt_aesni_supported();
const bool AES::HW_ACCEL = false; //_zt_aesni_supported();
#else
const bool AES::HW_ACCEL = false;
#endif
@ -116,20 +116,17 @@ void AES::_initSW(const uint8_t key[32])
void AES::_encryptSW(const uint8_t in[16],uint8_t out[16]) const
{
const uint32_t *rk = _k.sw.ek;
uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
s0 = readuint32_t(in) ^ rk[0];
s1 = readuint32_t(in + 4) ^ rk[1];
s2 = readuint32_t(in + 8) ^ rk[2];
s3 = readuint32_t(in + 12) ^ rk[3];
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
const uint32_t *const rk = _k.sw.ek;
uint32_t s0 = readuint32_t(in) ^ rk[0];
uint32_t s1 = readuint32_t(in + 4) ^ rk[1];
uint32_t s2 = readuint32_t(in + 8) ^ rk[2];
uint32_t s3 = readuint32_t(in + 12) ^ rk[3];
uint32_t t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[4];
uint32_t t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[5];
uint32_t t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[6];
uint32_t t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[7];
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[8];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[9];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
@ -176,16 +173,10 @@ void AES::_encryptSW(const uint8_t in[16],uint8_t out[16]) const
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
rk += 56;
s0 = (Te2[(t0 >> 24)] & 0xff000000) ^ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t3) & 0xff] & 0x000000ff) ^ rk[0];
writeuint32_t(out, s0);
s1 = (Te2[(t1 >> 24)] & 0xff000000) ^ (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t0) & 0xff] & 0x000000ff) ^ rk[1];
writeuint32_t(out + 4, s1);
s2 = (Te2[(t2 >> 24)] & 0xff000000) ^ (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t1) & 0xff] & 0x000000ff) ^ rk[2];
writeuint32_t(out + 8, s2);
s3 = (Te2[(t3 >> 24)] & 0xff000000) ^ (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t2) & 0xff] & 0x000000ff) ^ rk[3];
writeuint32_t(out + 12, s3);
writeuint32_t(out,(Te2[(t0 >> 24)] & 0xff000000) ^ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t3) & 0xff] & 0x000000ff) ^ rk[56]);
writeuint32_t(out + 4,(Te2[(t1 >> 24)] & 0xff000000) ^ (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t0) & 0xff] & 0x000000ff) ^ rk[57]);
writeuint32_t(out + 8,(Te2[(t2 >> 24)] & 0xff000000) ^ (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t1) & 0xff] & 0x000000ff) ^ rk[58]);
writeuint32_t(out + 12,(Te2[(t3 >> 24)] & 0xff000000) ^ (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t2) & 0xff] & 0x000000ff) ^ rk[59]);
}
#if (defined(__GNUC__) || defined(__clang)) && (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64) || defined(__aarch64__))
@ -334,6 +325,7 @@ static ZT_ALWAYS_INLINE void s_gfmul(const uint64_t h_high,const uint64_t h_low,
y0 = Utils::hton(z_high_h);
y1 = Utils::hton(z_high_l);
}
#endif
void AES::_gmacSW(const uint8_t iv[12],const uint8_t *in,unsigned int len,uint8_t out[16]) const
@ -344,10 +336,15 @@ void AES::_gmacSW(const uint8_t iv[12],const uint8_t *in,unsigned int len,uint8_
uint64_t y0 = 0,y1 = 0;
while (len >= 16) {
#ifdef ZT_NO_TYPE_PUNNING
for(unsigned int i=0;i<8;++i) ((uint8_t *)&y0)[i] ^= *(in++);
for(unsigned int i=0;i<8;++i) ((uint8_t *)&y1)[i] ^= *(in++);
#else
y0 ^= *((const uint64_t *)in);
in += 8;
y1 ^= *((const uint64_t *)in);
in += 8;
#endif
s_gfmul(h0,h1,y0,y1);
len -= 16;
}
@ -372,8 +369,13 @@ void AES::_gmacSW(const uint8_t iv[12],const uint8_t *in,unsigned int len,uint8_
((uint8_t *)iv2)[14] = 0;
((uint8_t *)iv2)[15] = 1;
_encryptSW((const uint8_t *)iv2,(uint8_t *)iv2);
#ifdef ZT_NO_TYPE_PUNNING
for(unsigned int i=0;i<8;++i) out[i] = ((const uint8_t *)&y0)[i] ^ ((const uint8_t *)iv2)[i];
for(unsigned int i=8;i<16;++i) out[i] = ((const uint8_t *)&y1)[i-8] ^ ((const uint8_t *)iv2)[i];
#else
((uint64_t *)out)[0] = y0 ^ iv2[0];
((uint64_t *)out)[1] = y1 ^ iv2[1];
#endif
}
} // namespace ZeroTier

View File

@ -135,11 +135,21 @@ public:
const uint8_t *i = (const uint8_t *)in;
uint8_t *o = (uint8_t *)out;
while (len >= 16) {
_encryptSW((const uint8_t *)ctr,(uint8_t *)cenc);
ctr[1] = Utils::hton(++bctr);
#ifdef ZT_NO_TYPE_PUNNING
for(unsigned int k=0;k<16;++k)
*(o++) = *(i++) ^ ((uint8_t *)cenc)[k];
#else
*((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[0];
o += 8;
i += 8;
*((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[1];
o += 8;
i += 8;
#endif
len -= 16;
}
@ -280,7 +290,7 @@ private:
#endif
struct {
uint64_t h[2];
uint32_t ek[30];
uint32_t ek[60];
} sw;
} _k;
/**************************************************************************/

View File

@ -48,7 +48,7 @@ public:
* @param len Length of strings
* @return True if strings are equal
*/
static inline bool secureEq(const void *a,const void *b,unsigned int len)
static ZT_ALWAYS_INLINE bool secureEq(const void *a,const void *b,unsigned int len)
{
uint8_t diff = 0;
for(unsigned int i=0;i<len;++i)
@ -167,7 +167,7 @@ public:
/**
* Get a 64-bit unsigned secure random number
*/
static inline uint64_t getSecureRandom64()
static ZT_ALWAYS_INLINE uint64_t getSecureRandom64()
{
uint64_t x;
getSecureRandom(&x,sizeof(x));
@ -177,7 +177,7 @@ public:
static int b32e(const uint8_t *data,int length,char *result,int bufSize);
static int b32d(const char *encoded, uint8_t *result, int bufSize);
static inline unsigned int b64MaxEncodedSize(const unsigned int s) { return ((((s + 2) / 3) * 4) + 1); }
static ZT_ALWAYS_INLINE unsigned int b64MaxEncodedSize(const unsigned int s) { return ((((s + 2) / 3) * 4) + 1); }
static unsigned int b64e(const uint8_t *in,unsigned int inlen,char *out,unsigned int outlen);
static unsigned int b64d(const char *in,uint8_t *out,unsigned int outlen);
@ -186,7 +186,7 @@ public:
*/
static uint64_t random();
static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax)
static ZT_ALWAYS_INLINE float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax)
{
int64_t bigSpan = bigMax - bigMin;
int64_t smallSpan = targetMax - targetMin;
@ -201,7 +201,7 @@ public:
* @param delim Delimiters
* @param saveptr Pointer to a char * for temporary reentrant storage
*/
static inline char *stok(char *str,const char *delim,char **saveptr)
static ZT_ALWAYS_INLINE char *stok(char *str,const char *delim,char **saveptr)
{
#ifdef __WINDOWS__
return strtok_s(str,delim,saveptr);
@ -210,11 +210,11 @@ public:
#endif
}
static inline unsigned int strToUInt(const char *s) { return (unsigned int)strtoul(s,(char **)0,10); }
static inline int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); }
static inline unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); }
static inline long strToLong(const char *s) { return strtol(s,(char **)0,10); }
static inline unsigned long long strToU64(const char *s)
static ZT_ALWAYS_INLINE unsigned int strToUInt(const char *s) { return (unsigned int)strtoul(s,(char **)0,10); }
static ZT_ALWAYS_INLINE int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); }
static ZT_ALWAYS_INLINE unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); }
static ZT_ALWAYS_INLINE long strToLong(const char *s) { return strtol(s,(char **)0,10); }
static ZT_ALWAYS_INLINE unsigned long long strToU64(const char *s)
{
#ifdef __WINDOWS__
return (unsigned long long)_strtoui64(s,(char **)0,10);
@ -222,7 +222,7 @@ public:
return strtoull(s,(char **)0,10);
#endif
}
static inline long long strTo64(const char *s)
static ZT_ALWAYS_INLINE long long strTo64(const char *s)
{
#ifdef __WINDOWS__
return (long long)_strtoi64(s,(char **)0,10);
@ -230,11 +230,11 @@ public:
return strtoll(s,(char **)0,10);
#endif
}
static inline unsigned int hexStrToUInt(const char *s) { return (unsigned int)strtoul(s,(char **)0,16); }
static inline int hexStrToInt(const char *s) { return (int)strtol(s,(char **)0,16); }
static inline unsigned long hexStrToULong(const char *s) { return strtoul(s,(char **)0,16); }
static inline long hexStrToLong(const char *s) { return strtol(s,(char **)0,16); }
static inline unsigned long long hexStrToU64(const char *s)
static ZT_ALWAYS_INLINE unsigned int hexStrToUInt(const char *s) { return (unsigned int)strtoul(s,(char **)0,16); }
static ZT_ALWAYS_INLINE int hexStrToInt(const char *s) { return (int)strtol(s,(char **)0,16); }
static ZT_ALWAYS_INLINE unsigned long hexStrToULong(const char *s) { return strtoul(s,(char **)0,16); }
static ZT_ALWAYS_INLINE long hexStrToLong(const char *s) { return strtol(s,(char **)0,16); }
static ZT_ALWAYS_INLINE unsigned long long hexStrToU64(const char *s)
{
#ifdef __WINDOWS__
return (unsigned long long)_strtoui64(s,(char **)0,16);
@ -242,7 +242,7 @@ public:
return strtoull(s,(char **)0,16);
#endif
}
static inline long long hexStrTo64(const char *s)
static ZT_ALWAYS_INLINE long long hexStrTo64(const char *s)
{
#ifdef __WINDOWS__
return (long long)_strtoi64(s,(char **)0,16);
@ -262,7 +262,7 @@ public:
* @param src Source string (if NULL, dest will receive a zero-length string and true is returned)
* @return True on success, false on overflow (buffer will still be 0-terminated)
*/
static inline bool scopy(char *dest,unsigned int len,const char *src)
static ZT_ALWAYS_INLINE bool scopy(char *dest,unsigned int len,const char *src)
{
if (!len)
return false; // sanity check
@ -287,7 +287,7 @@ public:
* @return Number of bits set in this integer (0-bits in integer)
*/
template<typename T>
static inline uint64_t countBits(T v)
static ZT_ALWAYS_INLINE uint64_t countBits(T v)
{
v = v - ((v >> 1) & (T)~(T)0/3);
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
@ -297,13 +297,24 @@ public:
// Byte swappers for big/little endian conversion
#if __BYTE_ORDER == __LITTLE_ENDIAN
static inline uint8_t hton(uint8_t n) { return n; }
static inline int8_t hton(int8_t n) { return n; }
static inline uint16_t hton(uint16_t n) { return htons(n); }
static inline int16_t hton(int16_t n) { return (int16_t)htons((uint16_t)n); }
static inline uint32_t hton(uint32_t n) { return htonl(n); }
static inline int32_t hton(int32_t n) { return (int32_t)htonl((uint32_t)n); }
static inline uint64_t hton(uint64_t n)
static ZT_ALWAYS_INLINE uint8_t hton(uint8_t n) { return n; }
static ZT_ALWAYS_INLINE int8_t hton(int8_t n) { return n; }
static ZT_ALWAYS_INLINE uint16_t hton(uint16_t n) { return htons(n); }
static ZT_ALWAYS_INLINE int16_t hton(int16_t n) { return (int16_t)Utils::hton((uint16_t)n); }
static ZT_ALWAYS_INLINE uint32_t hton(uint32_t n)
{
#if defined(__GNUC__)
#if defined(__FreeBSD__)
return htonl(n);
#elif (!defined(__OpenBSD__))
return __builtin_bswap32(n);
#endif
#else
return htonl(n);
#endif
}
static ZT_ALWAYS_INLINE int32_t hton(int32_t n) { return (int32_t)Utils::hton((uint32_t)n); }
static ZT_ALWAYS_INLINE uint64_t hton(uint64_t n)
{
#if defined(__GNUC__)
#if defined(__FreeBSD__)
@ -324,20 +335,31 @@ public:
);
#endif
}
static inline int64_t hton(int64_t n) { return (int64_t)hton((uint64_t)n); }
static ZT_ALWAYS_INLINE int64_t hton(int64_t n) { return (int64_t)hton((uint64_t)n); }
#else
template<typename T>
static inline T hton(T n) { return n; }
static ZT_ALWAYS_INLINE T hton(T n) { return n; }
#endif
#if __BYTE_ORDER == __LITTLE_ENDIAN
static inline uint8_t ntoh(uint8_t n) { return n; }
static inline int8_t ntoh(int8_t n) { return n; }
static inline uint16_t ntoh(uint16_t n) { return ntohs(n); }
static inline int16_t ntoh(int16_t n) { return (int16_t)ntohs((uint16_t)n); }
static inline uint32_t ntoh(uint32_t n) { return ntohl(n); }
static inline int32_t ntoh(int32_t n) { return (int32_t)ntohl((uint32_t)n); }
static inline uint64_t ntoh(uint64_t n)
static ZT_ALWAYS_INLINE uint8_t ntoh(uint8_t n) { return n; }
static ZT_ALWAYS_INLINE int8_t ntoh(int8_t n) { return n; }
static ZT_ALWAYS_INLINE uint16_t ntoh(uint16_t n) { return ntohs(n); }
static ZT_ALWAYS_INLINE int16_t ntoh(int16_t n) { return (int16_t)Utils::ntoh((uint16_t)n); }
static ZT_ALWAYS_INLINE uint32_t ntoh(uint32_t n)
{
#if defined(__GNUC__)
#if defined(__FreeBSD__)
return ntohl(n);
#elif (!defined(__OpenBSD__))
return __builtin_bswap32(n);
#endif
#else
return ntohl(n);
#endif
}
static ZT_ALWAYS_INLINE int32_t ntoh(int32_t n) { return (int32_t)Utils::ntoh((uint32_t)n); }
static ZT_ALWAYS_INLINE uint64_t ntoh(uint64_t n)
{
#if defined(__GNUC__)
#if defined(__FreeBSD__)
@ -358,10 +380,10 @@ public:
);
#endif
}
static inline int64_t ntoh(int64_t n) { return (int64_t)ntoh((uint64_t)n); }
static ZT_ALWAYS_INLINE int64_t ntoh(int64_t n) { return (int64_t)ntoh((uint64_t)n); }
#else
template<typename T>
static inline T ntoh(T n) { return n; }
static ZT_ALWAYS_INLINE T ntoh(T n) { return n; }
#endif
};