mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2024-12-18 20:47:53 +00:00
Just incorporate the X64 ASM version of Salsa20/12 for X64 platforms. This gives us (for example) 1.5gb/sec encryption on a Core i5 2.8ghz.
This commit is contained in:
parent
4938e82795
commit
a1e94154be
6
ext/x64-salsa2012-asm/README.md
Normal file
6
ext/x64-salsa2012-asm/README.md
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
Blazingly fast X64 ASM implementation of Salsa20/12
|
||||||
|
======
|
||||||
|
|
||||||
|
This is ripped from the [cnacl](https://github.com/cjdelisle/cnacl) source. The actual code is by Danial J. Bernstein and is in the public domain.
|
||||||
|
|
||||||
|
This is included on Linux and Mac 64-bit builds and is significantly faster than the SSE intrinsics or C versions. It's used for packet encode/decode only since its use differs a bit from the regular Salsa20 C++ class. Specifically it lacks the ability to be called on multiple blocks, preferring instead to take a key and a single stream to encrypt and that's it.
|
13
ext/x64-salsa2012-asm/salsa2012.h
Normal file
13
ext/x64-salsa2012-asm/salsa2012.h
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// output, outlen, nonce, key (256-bit / 32-byte)
|
||||||
|
extern int zt_salsa2012_amd64_xmm6(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
|
||||||
|
|
||||||
|
// ciphertext, message, mlen, nonce, key
|
||||||
|
extern int zt_salsa2012_amd64_xmm6_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
4488
ext/x64-salsa2012-asm/salsa2012.s
Normal file
4488
ext/x64-salsa2012-asm/salsa2012.s
Normal file
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,10 @@ ifeq ($(ZT_ENABLE_CLUSTER),1)
|
|||||||
DEFS+=-DZT_ENABLE_CLUSTER
|
DEFS+=-DZT_ENABLE_CLUSTER
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# Use fast ASM Salsa20/12 for x64 processors
|
||||||
|
DEFS+=-DZT_USE_X64_ASM_SALSA2012
|
||||||
|
OBJS+=ext/x64-salsa2012-asm/salsa2012.o
|
||||||
|
|
||||||
# Build miniupnpc and nat-pmp as included libraries -- extra defs are required for these sources
|
# Build miniupnpc and nat-pmp as included libraries -- extra defs are required for these sources
|
||||||
DEFS+=-DMACOSX -DZT_USE_MINIUPNPC -DMINIUPNP_STATICLIB -D_DARWIN_C_SOURCE -DMINIUPNPC_SET_SOCKET_TIMEOUT -DMINIUPNPC_GET_SRC_ADDR -D_BSD_SOURCE -D_DEFAULT_SOURCE -DOS_STRING=\"Darwin/15.0.0\" -DMINIUPNPC_VERSION_STRING=\"2.0\" -DUPNP_VERSION_STRING=\"UPnP/1.1\" -DENABLE_STRNATPMPERR
|
DEFS+=-DMACOSX -DZT_USE_MINIUPNPC -DMINIUPNP_STATICLIB -D_DARWIN_C_SOURCE -DMINIUPNPC_SET_SOCKET_TIMEOUT -DMINIUPNPC_GET_SRC_ADDR -D_BSD_SOURCE -D_DEFAULT_SOURCE -DOS_STRING=\"Darwin/15.0.0\" -DMINIUPNPC_VERSION_STRING=\"2.0\" -DUPNP_VERSION_STRING=\"UPnP/1.1\" -DENABLE_STRNATPMPERR
|
||||||
OBJS+=ext/libnatpmp/natpmp.o ext/libnatpmp/getgateway.o ext/miniupnpc/connecthostport.o ext/miniupnpc/igd_desc_parse.o ext/miniupnpc/minisoap.o ext/miniupnpc/minissdpc.o ext/miniupnpc/miniupnpc.o ext/miniupnpc/miniwget.o ext/miniupnpc/minixml.o ext/miniupnpc/portlistingparse.o ext/miniupnpc/receivedata.o ext/miniupnpc/upnpcommands.o ext/miniupnpc/upnpdev.o ext/miniupnpc/upnperrors.o ext/miniupnpc/upnpreplyparse.o osdep/PortMapper.o
|
OBJS+=ext/libnatpmp/natpmp.o ext/libnatpmp/getgateway.o ext/miniupnpc/connecthostport.o ext/miniupnpc/igd_desc_parse.o ext/miniupnpc/minisoap.o ext/miniupnpc/minissdpc.o ext/miniupnpc/miniupnpc.o ext/miniupnpc/miniwget.o ext/miniupnpc/minixml.o ext/miniupnpc/portlistingparse.o ext/miniupnpc/receivedata.o ext/miniupnpc/upnpcommands.o ext/miniupnpc/upnpdev.o ext/miniupnpc/upnperrors.o ext/miniupnpc/upnpreplyparse.o osdep/PortMapper.o
|
||||||
@ -57,6 +61,9 @@ endif
|
|||||||
|
|
||||||
CXXFLAGS=$(CFLAGS) -std=c++11 -stdlib=libc++
|
CXXFLAGS=$(CFLAGS) -std=c++11 -stdlib=libc++
|
||||||
|
|
||||||
|
ext/x64-salsa2012-asm/salsa2012.o:
|
||||||
|
$(CC) $(CFLAGS) -c ext/x64-salsa2012-asm/salsa2012.s -o ext/x64-salsa2012-asm/salsa2012.o
|
||||||
|
|
||||||
all: one macui
|
all: one macui
|
||||||
|
|
||||||
one: $(OBJS) service/OneService.o one.o
|
one: $(OBJS) service/OneService.o one.o
|
||||||
|
@ -24,6 +24,10 @@
|
|||||||
|
|
||||||
#include "Packet.hpp"
|
#include "Packet.hpp"
|
||||||
|
|
||||||
|
#ifdef ZT_USE_X64_ASM_SALSA2012
|
||||||
|
#include "../ext/x64-salsa2012-asm/salsa2012.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#define FORCE_INLINE static __forceinline
|
#define FORCE_INLINE static __forceinline
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
@ -1064,7 +1068,7 @@ const char *Packet::errorString(ErrorCode e)
|
|||||||
|
|
||||||
void Packet::armor(const void *key,bool encryptPayload,unsigned int counter)
|
void Packet::armor(const void *key,bool encryptPayload,unsigned int counter)
|
||||||
{
|
{
|
||||||
uint8_t mangledKey[32],macKey[32],mac[16];
|
uint8_t mangledKey[32];
|
||||||
uint8_t *const data = reinterpret_cast<uint8_t *>(unsafeData());
|
uint8_t *const data = reinterpret_cast<uint8_t *>(unsafeData());
|
||||||
|
|
||||||
// Mask least significant 3 bits of packet ID with counter to embed packet send counter for QoS use
|
// Mask least significant 3 bits of packet ID with counter to embed packet send counter for QoS use
|
||||||
@ -1074,23 +1078,47 @@ void Packet::armor(const void *key,bool encryptPayload,unsigned int counter)
|
|||||||
setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE);
|
setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE);
|
||||||
|
|
||||||
_salsa20MangleKey((const unsigned char *)key,mangledKey);
|
_salsa20MangleKey((const unsigned char *)key,mangledKey);
|
||||||
|
|
||||||
|
#ifdef ZT_USE_X64_ASM_SALSA2012
|
||||||
|
const unsigned int payloadLen = (encryptPayload) ? (size() - ZT_PACKET_IDX_VERB) : 0;
|
||||||
|
uint64_t keyStream[(ZT_PROTO_MAX_PACKET_LENGTH + 64 + 8) / 8];
|
||||||
|
zt_salsa2012_amd64_xmm6(reinterpret_cast<unsigned char *>(keyStream),payloadLen + 64,reinterpret_cast<const unsigned char *>(data + ZT_PACKET_IDX_IV),reinterpret_cast<const unsigned char *>(mangledKey));
|
||||||
|
|
||||||
|
uint64_t *ksptr = keyStream + 8; // encryption starts after first Salsa20 block
|
||||||
|
uint8_t *dptr = data + ZT_PACKET_IDX_VERB;
|
||||||
|
unsigned int ksrem = payloadLen;
|
||||||
|
while (ksrem >= 8) {
|
||||||
|
ksrem -= 8;
|
||||||
|
*(reinterpret_cast<uint64_t *>(dptr)) ^= *(ksptr++);
|
||||||
|
dptr += 8;
|
||||||
|
}
|
||||||
|
for(unsigned int i=0;i<ksrem;++i) {
|
||||||
|
dptr[i] ^= reinterpret_cast<const uint8_t *>(ksptr)[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t mac[2];
|
||||||
|
Poly1305::compute(mac,data + ZT_PACKET_IDX_VERB,size() - ZT_PACKET_IDX_VERB,keyStream);
|
||||||
|
memcpy(data + ZT_PACKET_IDX_MAC,mac,8);
|
||||||
|
#else
|
||||||
Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
|
Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
|
||||||
|
|
||||||
// MAC key is always the first 32 bytes of the Salsa20 key stream
|
uint64_t macKey[4];
|
||||||
// This is the same construction DJB's NaCl library uses
|
|
||||||
s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
|
s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
|
||||||
|
|
||||||
uint8_t *const payload = data + ZT_PACKET_IDX_VERB;
|
uint8_t *const payload = data + ZT_PACKET_IDX_VERB;
|
||||||
const unsigned int payloadLen = size() - ZT_PACKET_IDX_VERB;
|
const unsigned int payloadLen = size() - ZT_PACKET_IDX_VERB;
|
||||||
if (encryptPayload)
|
if (encryptPayload)
|
||||||
s20.crypt12(payload,payload,payloadLen);
|
s20.crypt12(payload,payload,payloadLen);
|
||||||
|
|
||||||
|
uint64_t mac[2];
|
||||||
Poly1305::compute(mac,payload,payloadLen,macKey);
|
Poly1305::compute(mac,payload,payloadLen,macKey);
|
||||||
memcpy(data + ZT_PACKET_IDX_MAC,mac,8);
|
memcpy(data + ZT_PACKET_IDX_MAC,mac,8);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Packet::dearmor(const void *key)
|
bool Packet::dearmor(const void *key)
|
||||||
{
|
{
|
||||||
uint8_t mangledKey[32],macKey[32],mac[16];
|
uint8_t mangledKey[32];
|
||||||
uint8_t *const data = reinterpret_cast<uint8_t *>(unsafeData());
|
uint8_t *const data = reinterpret_cast<uint8_t *>(unsafeData());
|
||||||
const unsigned int payloadLen = size() - ZT_PACKET_IDX_VERB;
|
const unsigned int payloadLen = size() - ZT_PACKET_IDX_VERB;
|
||||||
unsigned char *const payload = data + ZT_PACKET_IDX_VERB;
|
unsigned char *const payload = data + ZT_PACKET_IDX_VERB;
|
||||||
@ -1098,9 +1126,37 @@ bool Packet::dearmor(const void *key)
|
|||||||
|
|
||||||
if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) {
|
if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) {
|
||||||
_salsa20MangleKey((const unsigned char *)key,mangledKey);
|
_salsa20MangleKey((const unsigned char *)key,mangledKey);
|
||||||
|
|
||||||
|
#ifdef ZT_USE_X64_ASM_SALSA2012
|
||||||
|
uint64_t keyStream[(ZT_PROTO_MAX_PACKET_LENGTH + 64 + 8) / 8];
|
||||||
|
zt_salsa2012_amd64_xmm6(reinterpret_cast<unsigned char *>(keyStream),((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) ? (payloadLen + 64) : 64),reinterpret_cast<const unsigned char *>(data + ZT_PACKET_IDX_IV),reinterpret_cast<const unsigned char *>(mangledKey));
|
||||||
|
|
||||||
|
uint64_t mac[2];
|
||||||
|
Poly1305::compute(mac,payload,payloadLen,keyStream);
|
||||||
|
if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8))
|
||||||
|
return false; // MAC failed, packet is corrupt, modified, or is not from the sender
|
||||||
|
|
||||||
|
if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) {
|
||||||
|
uint64_t *ksptr = keyStream + 8; // encryption starts after first Salsa20 block
|
||||||
|
uint8_t *dptr = data + ZT_PACKET_IDX_VERB;
|
||||||
|
unsigned int ksrem = payloadLen;
|
||||||
|
while (ksrem >= 8) {
|
||||||
|
ksrem -= 8;
|
||||||
|
*(reinterpret_cast<uint64_t *>(dptr)) ^= *(ksptr++);
|
||||||
|
dptr += 8;
|
||||||
|
}
|
||||||
|
for(unsigned int i=0;i<ksrem;++i) {
|
||||||
|
dptr[i] ^= reinterpret_cast<const uint8_t *>(ksptr)[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
|
Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
|
||||||
|
|
||||||
|
uint64_t macKey[4];
|
||||||
s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
|
s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
|
||||||
|
uint64_t mac[2];
|
||||||
Poly1305::compute(mac,payload,payloadLen,macKey);
|
Poly1305::compute(mac,payload,payloadLen,macKey);
|
||||||
if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8))
|
if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8))
|
||||||
return false; // MAC failed, packet is corrupt, modified, or is not from the sender
|
return false; // MAC failed, packet is corrupt, modified, or is not from the sender
|
||||||
@ -1109,6 +1165,7 @@ bool Packet::dearmor(const void *key)
|
|||||||
s20.crypt12(payload,payload,payloadLen);
|
s20.crypt12(payload,payload,payloadLen);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
return false; // unrecognized cipher suite
|
return false; // unrecognized cipher suite
|
||||||
}
|
}
|
||||||
|
22
selftest.cpp
22
selftest.cpp
@ -54,6 +54,10 @@
|
|||||||
|
|
||||||
#include "controller/JSONDB.hpp"
|
#include "controller/JSONDB.hpp"
|
||||||
|
|
||||||
|
#ifdef ZT_USE_X64_ASM_SALSA2012
|
||||||
|
#include "ext/x64-salsa2012-asm/salsa2012.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __WINDOWS__
|
#ifdef __WINDOWS__
|
||||||
#include <tchar.h>
|
#include <tchar.h>
|
||||||
#endif
|
#endif
|
||||||
@ -204,6 +208,24 @@ static int testCrypto()
|
|||||||
::free((void *)bb);
|
::free((void *)bb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ZT_USE_X64_ASM_SALSA2012
|
||||||
|
std::cout << "[crypto] Benchmarking Salsa20/12 fast x64 ASM... "; std::cout.flush();
|
||||||
|
{
|
||||||
|
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||||
|
for(unsigned int i=0;i<1234567;++i)
|
||||||
|
bb[i] = (unsigned char)i;
|
||||||
|
double bytes = 0.0;
|
||||||
|
uint64_t start = OSUtils::now();
|
||||||
|
for(unsigned int i=0;i<200;++i) {
|
||||||
|
zt_salsa2012_amd64_xmm6_xor(bb,bb,1234567,s20TV0Iv,s20TV0Key);
|
||||||
|
bytes += 1234567.0;
|
||||||
|
}
|
||||||
|
uint64_t end = OSUtils::now();
|
||||||
|
std::cout << ((bytes / 1048576.0) / ((double)(end - start) / 1000.0)) << " MiB/second" << std::endl;
|
||||||
|
::free((void *)bb);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
std::cout << "[crypto] Benchmarking Salsa20/20... "; std::cout.flush();
|
std::cout << "[crypto] Benchmarking Salsa20/20... "; std::cout.flush();
|
||||||
{
|
{
|
||||||
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||||
|
Loading…
Reference in New Issue
Block a user