diff --git a/node/Packet.cpp b/node/Packet.cpp index 85b18cff6..8a57dd559 100644 --- a/node/Packet.cpp +++ b/node/Packet.cpp @@ -1109,38 +1109,26 @@ void Packet::armor(const void *key,bool encryptPayload,unsigned int counter) setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE); _salsa20MangleKey((const unsigned char *)key,mangledKey); - if (ZT_HAS_FAST_CRYPTO()) { - const unsigned int payloadLen = (encryptPayload) ? (size() - ZT_PACKET_IDX_VERB) : 0; + const unsigned int encryptLen = (encryptPayload) ? (size() - ZT_PACKET_IDX_VERB) : 0; uint64_t keyStream[(ZT_PROTO_MAX_PACKET_LENGTH + 64 + 8) / 8]; - ZT_FAST_SINGLE_PASS_SALSA2012(keyStream,payloadLen + 64,(data + ZT_PACKET_IDX_IV),mangledKey); - - uint64_t *ksptr = keyStream + 8; // encryption starts after first Salsa20 block - uint8_t *dptr = data + ZT_PACKET_IDX_VERB; - unsigned int ksrem = payloadLen; - while (ksrem >= 8) { - ksrem -= 8; - *(reinterpret_cast(dptr)) ^= *(ksptr++); - dptr += 8; - } - for(unsigned int i=0;i(ksptr)[i]; - } - + ZT_FAST_SINGLE_PASS_SALSA2012(keyStream,encryptLen + 64,(data + ZT_PACKET_IDX_IV),mangledKey); + Salsa20::memxor(data + ZT_PACKET_IDX_VERB,reinterpret_cast(keyStream + 8),encryptLen); uint64_t mac[2]; Poly1305::compute(mac,data + ZT_PACKET_IDX_VERB,size() - ZT_PACKET_IDX_VERB,keyStream); +#ifdef ZT_NO_TYPE_PUNNING memcpy(data + ZT_PACKET_IDX_MAC,mac,8); +#else + (*reinterpret_cast(data + ZT_PACKET_IDX_MAC)) = mac[0]; +#endif } else { Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV); - uint64_t macKey[4]; s20.crypt12(ZERO_KEY,macKey,sizeof(macKey)); - uint8_t *const payload = data + ZT_PACKET_IDX_VERB; const unsigned int payloadLen = size() - ZT_PACKET_IDX_VERB; if (encryptPayload) s20.crypt12(payload,payload,payloadLen); - uint64_t mac[2]; Poly1305::compute(mac,payload,payloadLen,macKey); memcpy(data + ZT_PACKET_IDX_MAC,mac,8); @@ -1157,39 +1145,33 @@ bool Packet::dearmor(const void *key) if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) { _salsa20MangleKey((const unsigned char *)key,mangledKey); - if (ZT_HAS_FAST_CRYPTO()) { uint64_t keyStream[(ZT_PROTO_MAX_PACKET_LENGTH + 64 + 8) / 8]; ZT_FAST_SINGLE_PASS_SALSA2012(keyStream,((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) ? (payloadLen + 64) : 64),(data + ZT_PACKET_IDX_IV),mangledKey); - uint64_t mac[2]; Poly1305::compute(mac,payload,payloadLen,keyStream); +#ifdef ZT_NO_TYPE_PUNNING if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8)) - return false; // MAC failed, packet is corrupt, modified, or is not from the sender - - if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) { - uint64_t *ksptr = keyStream + 8; // encryption starts after first Salsa20 block - uint8_t *dptr = data + ZT_PACKET_IDX_VERB; - unsigned int ksrem = payloadLen; - while (ksrem >= 8) { - ksrem -= 8; - *(reinterpret_cast(dptr)) ^= *(ksptr++); - dptr += 8; - } - for(unsigned int i=0;i(ksptr)[i]; - } - } + return false; +#else + if ((*reinterpret_cast(data + ZT_PACKET_IDX_MAC)) != mac[0]) // also secure, constant time + return false; +#endif + if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) + Salsa20::memxor(data + ZT_PACKET_IDX_VERB,reinterpret_cast(keyStream + 8),payloadLen); } else { Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV); - uint64_t macKey[4]; s20.crypt12(ZERO_KEY,macKey,sizeof(macKey)); uint64_t mac[2]; Poly1305::compute(mac,payload,payloadLen,macKey); +#ifdef ZT_NO_TYPE_PUNNING if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8)) - return false; // MAC failed, packet is corrupt, modified, or is not from the sender - + return false; +#else + if ((*reinterpret_cast(data + ZT_PACKET_IDX_MAC)) != mac[0]) // also secure, constant time + return false; +#endif if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) s20.crypt12(payload,payload,payloadLen); } diff --git a/node/Salsa20.cpp b/node/Salsa20.cpp index 2a8025557..1d4117e31 100644 --- a/node/Salsa20.cpp +++ b/node/Salsa20.cpp @@ -69,46 +69,42 @@ namespace ZeroTier { void Salsa20::init(const void *key,const void *iv) { #ifdef ZT_SALSA20_SSE - const uint32_t *k = (const uint32_t *)key; - + const uint32_t *const k = (const uint32_t *)key; _state.i[0] = 0x61707865; - _state.i[3] = 0x6b206574; - _state.i[13] = k[0]; - _state.i[10] = k[1]; - _state.i[7] = k[2]; - _state.i[4] = k[3]; - k += 4; _state.i[1] = 0x3320646e; _state.i[2] = 0x79622d32; - _state.i[15] = k[0]; - _state.i[12] = k[1]; - _state.i[9] = k[2]; - _state.i[6] = k[3]; - _state.i[14] = ((const uint32_t *)iv)[0]; - _state.i[11] = ((const uint32_t *)iv)[1]; + _state.i[3] = 0x6b206574; + _state.i[4] = k[3]; _state.i[5] = 0; + _state.i[6] = k[7]; + _state.i[7] = k[2]; _state.i[8] = 0; + _state.i[9] = k[6]; + _state.i[10] = k[1]; + _state.i[11] = ((const uint32_t *)iv)[1]; + _state.i[12] = k[5]; + _state.i[13] = k[0]; + _state.i[14] = ((const uint32_t *)iv)[0]; + _state.i[15] = k[4]; #else const char *const constants = "expand 32-byte k"; - const uint8_t *k = (const uint8_t *)key; - + const uint8_t *const k = (const uint8_t *)key; + _state.i[0] = U8TO32_LITTLE(constants + 0); _state.i[1] = U8TO32_LITTLE(k + 0); _state.i[2] = U8TO32_LITTLE(k + 4); _state.i[3] = U8TO32_LITTLE(k + 8); _state.i[4] = U8TO32_LITTLE(k + 12); - k += 16; _state.i[5] = U8TO32_LITTLE(constants + 4); _state.i[6] = U8TO32_LITTLE(((const uint8_t *)iv) + 0); _state.i[7] = U8TO32_LITTLE(((const uint8_t *)iv) + 4); _state.i[8] = 0; _state.i[9] = 0; _state.i[10] = U8TO32_LITTLE(constants + 8); - _state.i[11] = U8TO32_LITTLE(k + 0); - _state.i[12] = U8TO32_LITTLE(k + 4); - _state.i[13] = U8TO32_LITTLE(k + 8); - _state.i[14] = U8TO32_LITTLE(k + 12); + _state.i[11] = U8TO32_LITTLE(k + 16); + _state.i[12] = U8TO32_LITTLE(k + 20); + _state.i[13] = U8TO32_LITTLE(k + 24); + _state.i[14] = U8TO32_LITTLE(k + 28); _state.i[15] = U8TO32_LITTLE(constants + 12); - _state.i[0] = U8TO32_LITTLE(constants + 0); #endif } diff --git a/node/Salsa20.hpp b/node/Salsa20.hpp index 448c8f1ad..52592602c 100644 --- a/node/Salsa20.hpp +++ b/node/Salsa20.hpp @@ -34,6 +34,43 @@ public: Salsa20() {} ~Salsa20() { Utils::burn(&_state,sizeof(_state)); } + /** + * XOR d with s + * + * This is done efficiently using e.g. SSE if available. It's used when + * alternative Salsa20 implementations are used in Packet and is here + * since this is where all the SSE stuff is already included. + * + * @param d Destination to XOR + * @param s Source bytes to XOR with destination + * @param len Length of s and d + */ + static inline void memxor(uint8_t *d,const uint8_t *s,unsigned int len) + { +#ifdef ZT_SALSA20_SSE + while (len >= 16) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(d),_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<__m128i *>(d)),_mm_loadu_si128(reinterpret_cast(s)))); + s += 16; + d += 16; + len -= 16; + } +#else +#ifndef ZT_NO_TYPE_PUNNING + while (len >= 16) { + (*reinterpret_cast(d)) ^= (*reinterpret_cast(s)); + s += 8; + d += 8; + (*reinterpret_cast(d)) ^= (*reinterpret_cast(s)); + s += 8; + d += 8; + len -= 16; + } +#endif +#endif + while (len--) + *(d++) ^= *(s++); + } + /** * @param key 256-bit (32 byte) key * @param iv 64-bit initialization vector diff --git a/selftest.cpp b/selftest.cpp index 6df2e272d..91d304a62 100644 --- a/selftest.cpp +++ b/selftest.cpp @@ -141,8 +141,6 @@ static const C25519TestVector C25519_TEST_VECTORS[ZT_NUM_C25519_TEST_VECTORS] = ////////////////////////////////////////////////////////////////////////////// -static unsigned char fuzzbuf[1048576]; - static int testCrypto() { unsigned char buf1[16384];