mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-05-07 11:08:35 +00:00
ARM NEON Salsa20/12 in build and selftest. Almost 2X speedup on a Raspberry Pi.
This commit is contained in:
parent
8e1ac9fb0c
commit
a376bcc654
@ -1,6 +1,14 @@
|
|||||||
#ifndef ZT_SALSA2012_ARM32NEON_ASM
|
#ifndef ZT_SALSA2012_ARM32NEON_ASM
|
||||||
#define ZT_SALSA2012_ARM32NEON_ASM
|
#define ZT_SALSA2012_ARM32NEON_ASM
|
||||||
|
|
||||||
|
#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux)
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
#define zt_arm_has_neon() (getauxval(AT_HWCAP) & HWCAP_NEON)
|
||||||
|
#else
|
||||||
|
#define zt_arm_has_neon() (true)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -98,30 +98,37 @@ endif
|
|||||||
ifeq ($(CC_MACH),arm)
|
ifeq ($(CC_MACH),arm)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),armel)
|
ifeq ($(CC_MACH),armel)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),armhf)
|
ifeq ($(CC_MACH),armhf)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),armv6)
|
ifeq ($(CC_MACH),armv6)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),armv6zk)
|
ifeq ($(CC_MACH),armv6zk)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),armv6kz)
|
ifeq ($(CC_MACH),armv6kz)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),armv7)
|
ifeq ($(CC_MACH),armv7)
|
||||||
ZT_ARCHITECTURE=3
|
ZT_ARCHITECTURE=3
|
||||||
override DEFS+=-DZT_NO_TYPE_PUNNING
|
override DEFS+=-DZT_NO_TYPE_PUNNING
|
||||||
|
ZT_USE_ARM32_NEON_ASM_SALSA2012=1
|
||||||
endif
|
endif
|
||||||
ifeq ($(CC_MACH),arm64)
|
ifeq ($(CC_MACH),arm64)
|
||||||
ZT_ARCHITECTURE=4
|
ZT_ARCHITECTURE=4
|
||||||
@ -158,11 +165,15 @@ endif
|
|||||||
# Disable software updates by default on Linux since that is normally done with package management
|
# Disable software updates by default on Linux since that is normally done with package management
|
||||||
override DEFS+=-DZT_BUILD_PLATFORM=1 -DZT_BUILD_ARCHITECTURE=$(ZT_ARCHITECTURE) -DZT_SOFTWARE_UPDATE_DEFAULT="\"disable\""
|
override DEFS+=-DZT_BUILD_PLATFORM=1 -DZT_BUILD_ARCHITECTURE=$(ZT_ARCHITECTURE) -DZT_SOFTWARE_UPDATE_DEFAULT="\"disable\""
|
||||||
|
|
||||||
# Use X64 ASM Salsa20/12 on X86_64 target
|
# Build faster crypto on some targets
|
||||||
ifeq ($(ZT_USE_X64_ASM_SALSA2012),1)
|
ifeq ($(ZT_USE_X64_ASM_SALSA2012),1)
|
||||||
override DEFS+=-DZT_USE_X64_ASM_SALSA2012
|
override DEFS+=-DZT_USE_X64_ASM_SALSA2012
|
||||||
override OBJS+=ext/x64-salsa2012-asm/salsa2012.o
|
override OBJS+=ext/x64-salsa2012-asm/salsa2012.o
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(ZT_USE_ARM32_NEON_ASM_SALSA2012),1)
|
||||||
|
override DEFS+=-DZT_USE_ARM32_NEON_ASM_SALSA2012
|
||||||
|
override OBJS+=ext/arm32-neon-salsa2012-asm/salsa2012.o
|
||||||
|
endif
|
||||||
|
|
||||||
# Static builds, which are currently done for a number of Linux targets
|
# Static builds, which are currently done for a number of Linux targets
|
||||||
ifeq ($(ZT_STATIC),1)
|
ifeq ($(ZT_STATIC),1)
|
||||||
|
23
selftest.cpp
23
selftest.cpp
@ -57,6 +57,9 @@
|
|||||||
#ifdef ZT_USE_X64_ASM_SALSA2012
|
#ifdef ZT_USE_X64_ASM_SALSA2012
|
||||||
#include "ext/x64-salsa2012-asm/salsa2012.h"
|
#include "ext/x64-salsa2012-asm/salsa2012.h"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012
|
||||||
|
#include "ext/arm32-neon-salsa2012-asm/salsa2012.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __WINDOWS__
|
#ifdef __WINDOWS__
|
||||||
#include <tchar.h>
|
#include <tchar.h>
|
||||||
@ -215,7 +218,7 @@ static int testCrypto()
|
|||||||
double bytes = 0.0;
|
double bytes = 0.0;
|
||||||
uint64_t start = OSUtils::now();
|
uint64_t start = OSUtils::now();
|
||||||
for(unsigned int i=0;i<200;++i) {
|
for(unsigned int i=0;i<200;++i) {
|
||||||
zt_salsa2012_amd64_xmm6(bb, 1234567, s20TV0Iv, s20TV0Key);
|
zt_salsa2012_amd64_xmm6(bb,1234567,s20TV0Iv,s20TV0Key);
|
||||||
bytes += 1234567.0;
|
bytes += 1234567.0;
|
||||||
}
|
}
|
||||||
uint64_t end = OSUtils::now();
|
uint64_t end = OSUtils::now();
|
||||||
@ -224,6 +227,24 @@ static int testCrypto()
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012
|
||||||
|
if (zt_arm_has_neon()) {
|
||||||
|
std::cout << "[crypto] Benchmarking Salsa20/12 fast arm32/neon ASM... "; std::cout.flush();
|
||||||
|
{
|
||||||
|
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||||
|
double bytes = 0.0;
|
||||||
|
uint64_t start = OSUtils::now();
|
||||||
|
for(unsigned int i=0;i<200;++i) {
|
||||||
|
zt_salsa2012_armneon3_xor(bb,(const unsigned char *)0,1234567,s20TV0Iv,s20TV0Key);
|
||||||
|
bytes += 1234567.0;
|
||||||
|
}
|
||||||
|
uint64_t end = OSUtils::now();
|
||||||
|
std::cout << ((bytes / 1048576.0) / ((double)(end - start) / 1024.0)) << " MiB/second" << std::endl;
|
||||||
|
::free((void *)bb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
std::cout << "[crypto] Benchmarking Salsa20/20... "; std::cout.flush();
|
std::cout << "[crypto] Benchmarking Salsa20/20... "; std::cout.flush();
|
||||||
{
|
{
|
||||||
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
unsigned char *bb = (unsigned char *)::malloc(1234567);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user