mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-01-29 15:43:52 +00:00
Unroll Salsa20 fully for a little more speed (non-SSE now almost as fast as SSE)
This commit is contained in:
parent
3fa6dd377f
commit
0c498556d5
@ -41,7 +41,6 @@
|
||||
|
||||
#define ZT_IDENTITY_GEN_HASHCASH_FIRST_BYTE_LESS_THAN 17
|
||||
#define ZT_IDENTITY_GEN_MEMORY 2097152
|
||||
#define ZT_IDENTITY_GEN_SALSA20_ROUNDS 20
|
||||
|
||||
namespace ZeroTier {
|
||||
|
||||
@ -55,8 +54,8 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub
|
||||
// ordinary Salsa20 is randomly seekable. This is good for a cipher
|
||||
// but is not what we want for sequential memory-harndess.
|
||||
memset(genmem,0,ZT_IDENTITY_GEN_MEMORY);
|
||||
Salsa20 s20(digest,256,(char *)digest + 32,ZT_IDENTITY_GEN_SALSA20_ROUNDS);
|
||||
s20.encrypt((char *)genmem,(char *)genmem,64);
|
||||
Salsa20 s20(digest,256,(char *)digest + 32);
|
||||
s20.encrypt20((char *)genmem,(char *)genmem,64);
|
||||
for(unsigned long i=64;i<ZT_IDENTITY_GEN_MEMORY;i+=64) {
|
||||
unsigned long k = i - 64;
|
||||
*((uint64_t *)((char *)genmem + i)) = *((uint64_t *)((char *)genmem + k));
|
||||
@ -67,7 +66,7 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub
|
||||
*((uint64_t *)((char *)genmem + i + 40)) = *((uint64_t *)((char *)genmem + k + 40));
|
||||
*((uint64_t *)((char *)genmem + i + 48)) = *((uint64_t *)((char *)genmem + k + 48));
|
||||
*((uint64_t *)((char *)genmem + i + 56)) = *((uint64_t *)((char *)genmem + k + 56));
|
||||
s20.encrypt((char *)genmem + i,(char *)genmem + i,64);
|
||||
s20.encrypt20((char *)genmem + i,(char *)genmem + i,64);
|
||||
}
|
||||
|
||||
// Render final digest using genmem as a lookup table
|
||||
@ -77,7 +76,7 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub
|
||||
uint64_t tmp = ((uint64_t *)genmem)[idx2];
|
||||
((uint64_t *)genmem)[idx2] = ((uint64_t *)digest)[idx1];
|
||||
((uint64_t *)digest)[idx1] = tmp;
|
||||
s20.encrypt(digest,digest,64);
|
||||
s20.encrypt20(digest,digest,64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1149,9 +1149,9 @@ try_salsa2012sha512_again:
|
||||
++*(reinterpret_cast<volatile uint64_t *>(candidate));
|
||||
|
||||
SHA512::hash(shabuf,candidate,16 + challengeLength);
|
||||
s20.init(shabuf,256,&s20iv,12);
|
||||
s20.init(shabuf,256,&s20iv);
|
||||
memset(salsabuf,0,sizeof(salsabuf));
|
||||
s20.encrypt(salsabuf,salsabuf,sizeof(salsabuf));
|
||||
s20.encrypt12(salsabuf,salsabuf,sizeof(salsabuf));
|
||||
SHA512::hash(shabuf,salsabuf,sizeof(salsabuf));
|
||||
|
||||
d = difficulty;
|
||||
@ -1186,9 +1186,9 @@ bool IncomingPacket::testSalsa2012Sha512ProofOfWorkResult(unsigned int difficult
|
||||
memcpy(candidate + 16,challenge,challengeLength);
|
||||
|
||||
SHA512::hash(shabuf,candidate,16 + challengeLength);
|
||||
s20.init(shabuf,256,&s20iv,12);
|
||||
s20.init(shabuf,256,&s20iv);
|
||||
memset(salsabuf,0,sizeof(salsabuf));
|
||||
s20.encrypt(salsabuf,salsabuf,sizeof(salsabuf));
|
||||
s20.encrypt12(salsabuf,salsabuf,sizeof(salsabuf));
|
||||
SHA512::hash(shabuf,salsabuf,sizeof(salsabuf));
|
||||
|
||||
d = difficulty;
|
||||
|
@ -88,9 +88,9 @@ Node::Node(
|
||||
{
|
||||
char foo[32];
|
||||
Utils::getSecureRandom(foo,32);
|
||||
_prng.init(foo,256,foo,8);
|
||||
_prng.init(foo,256,foo);
|
||||
memset(_prngStream,0,sizeof(_prngStream));
|
||||
_prng.encrypt(_prngStream,_prngStream,sizeof(_prngStream));
|
||||
_prng.encrypt12(_prngStream,_prngStream,sizeof(_prngStream));
|
||||
}
|
||||
|
||||
std::string idtmp(dataStoreGet("identity.secret"));
|
||||
@ -574,7 +574,7 @@ uint64_t Node::prng()
|
||||
{
|
||||
unsigned int p = (++_prngStreamPtr % (sizeof(_prngStream) / sizeof(uint64_t)));
|
||||
if (!p)
|
||||
_prng.encrypt(_prngStream,_prngStream,sizeof(_prngStream));
|
||||
_prng.encrypt12(_prngStream,_prngStream,sizeof(_prngStream));
|
||||
return _prngStream[p];
|
||||
}
|
||||
|
||||
|
@ -92,14 +92,14 @@ void Packet::armor(const void *key,bool encryptPayload)
|
||||
setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE);
|
||||
|
||||
_salsa20MangleKey((const unsigned char *)key,mangledKey);
|
||||
Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8),ZT_PROTO_SALSA20_ROUNDS);
|
||||
Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8)/*,ZT_PROTO_SALSA20_ROUNDS*/);
|
||||
|
||||
// MAC key is always the first 32 bytes of the Salsa20 key stream
|
||||
// This is the same construction DJB's NaCl library uses
|
||||
s20.encrypt(ZERO_KEY,macKey,sizeof(macKey));
|
||||
s20.encrypt12(ZERO_KEY,macKey,sizeof(macKey));
|
||||
|
||||
if (encryptPayload)
|
||||
s20.encrypt(payload,payload,payloadLen);
|
||||
s20.encrypt12(payload,payload,payloadLen);
|
||||
|
||||
Poly1305::compute(mac,payload,payloadLen,macKey);
|
||||
memcpy(field(ZT_PACKET_IDX_MAC,8),mac,8);
|
||||
@ -116,15 +116,15 @@ bool Packet::dearmor(const void *key)
|
||||
|
||||
if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) {
|
||||
_salsa20MangleKey((const unsigned char *)key,mangledKey);
|
||||
Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8),ZT_PROTO_SALSA20_ROUNDS);
|
||||
Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8)/*,ZT_PROTO_SALSA20_ROUNDS*/);
|
||||
|
||||
s20.encrypt(ZERO_KEY,macKey,sizeof(macKey));
|
||||
s20.encrypt12(ZERO_KEY,macKey,sizeof(macKey));
|
||||
Poly1305::compute(mac,payload,payloadLen,macKey);
|
||||
if (!Utils::secureEq(mac,field(ZT_PACKET_IDX_MAC,8),8))
|
||||
return false;
|
||||
|
||||
if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)
|
||||
s20.decrypt(payload,payload,payloadLen);
|
||||
s20.decrypt12(payload,payload,payloadLen);
|
||||
|
||||
return true;
|
||||
} else return false; // unrecognized cipher suite
|
||||
|
1192
node/Salsa20.cpp
1192
node/Salsa20.cpp
File diff suppressed because it is too large
Load Diff
@ -35,12 +35,11 @@ public:
|
||||
* @param key Key bits
|
||||
* @param kbits Number of key bits: 128 or 256 (recommended)
|
||||
* @param iv 64-bit initialization vector
|
||||
* @param rounds Number of rounds: 8, 12, or 20
|
||||
*/
|
||||
Salsa20(const void *key,unsigned int kbits,const void *iv,unsigned int rounds)
|
||||
Salsa20(const void *key,unsigned int kbits,const void *iv)
|
||||
throw()
|
||||
{
|
||||
init(key,kbits,iv,rounds);
|
||||
init(key,kbits,iv);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -49,19 +48,28 @@ public:
|
||||
* @param key Key bits
|
||||
* @param kbits Number of key bits: 128 or 256 (recommended)
|
||||
* @param iv 64-bit initialization vector
|
||||
* @param rounds Number of rounds: 8, 12, or 20
|
||||
*/
|
||||
void init(const void *key,unsigned int kbits,const void *iv,unsigned int rounds)
|
||||
void init(const void *key,unsigned int kbits,const void *iv)
|
||||
throw();
|
||||
|
||||
/**
|
||||
* Encrypt data
|
||||
* Encrypt data using Salsa20/12
|
||||
*
|
||||
* @param in Input data
|
||||
* @param out Output buffer
|
||||
* @param bytes Length of data
|
||||
*/
|
||||
void encrypt(const void *in,void *out,unsigned int bytes)
|
||||
void encrypt12(const void *in,void *out,unsigned int bytes)
|
||||
throw();
|
||||
|
||||
/**
|
||||
* Encrypt data using Salsa20/20
|
||||
*
|
||||
* @param in Input data
|
||||
* @param out Output buffer
|
||||
* @param bytes Length of data
|
||||
*/
|
||||
void encrypt20(const void *in,void *out,unsigned int bytes)
|
||||
throw();
|
||||
|
||||
/**
|
||||
@ -71,10 +79,23 @@ public:
|
||||
* @param out Output buffer
|
||||
* @param bytes Length of data
|
||||
*/
|
||||
inline void decrypt(const void *in,void *out,unsigned int bytes)
|
||||
inline void decrypt12(const void *in,void *out,unsigned int bytes)
|
||||
throw()
|
||||
{
|
||||
encrypt(in,out,bytes);
|
||||
encrypt12(in,out,bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt data
|
||||
*
|
||||
* @param in Input data
|
||||
* @param out Output buffer
|
||||
* @param bytes Length of data
|
||||
*/
|
||||
inline void decrypt20(const void *in,void *out,unsigned int bytes)
|
||||
throw()
|
||||
{
|
||||
encrypt20(in,out,bytes);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -84,7 +105,6 @@ private:
|
||||
#endif // ZT_SALSA20_SSE
|
||||
uint32_t i[16];
|
||||
} _state;
|
||||
unsigned int _roundsDiv4;
|
||||
};
|
||||
|
||||
} // namespace ZeroTier
|
||||
|
42
selftest.cpp
42
selftest.cpp
@ -162,27 +162,27 @@ static int testCrypto()
|
||||
memset(buf2,0,sizeof(buf2));
|
||||
memset(buf3,0,sizeof(buf3));
|
||||
Salsa20 s20;
|
||||
s20.init("12345678123456781234567812345678",256,"12345678",20);
|
||||
s20.encrypt(buf1,buf2,sizeof(buf1));
|
||||
s20.init("12345678123456781234567812345678",256,"12345678",20);
|
||||
s20.decrypt(buf2,buf3,sizeof(buf2));
|
||||
s20.init("12345678123456781234567812345678",256,"12345678");
|
||||
s20.encrypt20(buf1,buf2,sizeof(buf1));
|
||||
s20.init("12345678123456781234567812345678",256,"12345678");
|
||||
s20.decrypt20(buf2,buf3,sizeof(buf2));
|
||||
if (memcmp(buf1,buf3,sizeof(buf1))) {
|
||||
std::cout << "FAIL (encrypt/decrypt test)" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv,20);
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv);
|
||||
memset(buf1,0,sizeof(buf1));
|
||||
memset(buf2,0,sizeof(buf2));
|
||||
s20.encrypt(buf1,buf2,64);
|
||||
s20.encrypt20(buf1,buf2,64);
|
||||
if (memcmp(buf2,s20TV0Ks,64)) {
|
||||
std::cout << "FAIL (test vector 0)" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
s20.init(s2012TV0Key,256,s2012TV0Iv,12);
|
||||
s20.init(s2012TV0Key,256,s2012TV0Iv);
|
||||
memset(buf1,0,sizeof(buf1));
|
||||
memset(buf2,0,sizeof(buf2));
|
||||
s20.encrypt(buf1,buf2,64);
|
||||
s20.encrypt12(buf1,buf2,64);
|
||||
if (memcmp(buf2,s2012TV0Ks,64)) {
|
||||
std::cout << "FAIL (test vector 1)" << std::endl;
|
||||
return -1;
|
||||
@ -195,34 +195,16 @@ static int testCrypto()
|
||||
std::cout << "[crypto] Salsa20 SSE: DISABLED" << std::endl;
|
||||
#endif
|
||||
|
||||
std::cout << "[crypto] Benchmarking Salsa20/8... "; std::cout.flush();
|
||||
{
|
||||
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||
for(unsigned int i=0;i<1234567;++i)
|
||||
bb[i] = (unsigned char)i;
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv,8);
|
||||
double bytes = 0.0;
|
||||
uint64_t start = OSUtils::now();
|
||||
for(unsigned int i=0;i<200;++i) {
|
||||
s20.encrypt(bb,bb,1234567);
|
||||
bytes += 1234567.0;
|
||||
}
|
||||
uint64_t end = OSUtils::now();
|
||||
SHA512::hash(buf1,bb,1234567);
|
||||
std::cout << ((bytes / 1048576.0) / ((double)(end - start) / 1000.0)) << " MiB/second (" << Utils::hex(buf1,16) << ')' << std::endl;
|
||||
::free((void *)bb);
|
||||
}
|
||||
|
||||
std::cout << "[crypto] Benchmarking Salsa20/12... "; std::cout.flush();
|
||||
{
|
||||
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||
for(unsigned int i=0;i<1234567;++i)
|
||||
bb[i] = (unsigned char)i;
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv,12);
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv);
|
||||
double bytes = 0.0;
|
||||
uint64_t start = OSUtils::now();
|
||||
for(unsigned int i=0;i<200;++i) {
|
||||
s20.encrypt(bb,bb,1234567);
|
||||
s20.encrypt12(bb,bb,1234567);
|
||||
bytes += 1234567.0;
|
||||
}
|
||||
uint64_t end = OSUtils::now();
|
||||
@ -236,11 +218,11 @@ static int testCrypto()
|
||||
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||
for(unsigned int i=0;i<1234567;++i)
|
||||
bb[i] = (unsigned char)i;
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv,20);
|
||||
Salsa20 s20(s20TV0Key,256,s20TV0Iv);
|
||||
double bytes = 0.0;
|
||||
uint64_t start = OSUtils::now();
|
||||
for(unsigned int i=0;i<200;++i) {
|
||||
s20.encrypt(bb,bb,1234567);
|
||||
s20.encrypt20(bb,bb,1234567);
|
||||
bytes += 1234567.0;
|
||||
}
|
||||
uint64_t end = OSUtils::now();
|
||||
|
Loading…
x
Reference in New Issue
Block a user