From 7c9949eea3d4d40b1f1f2787ec774449ef3b8e3d Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 1 Jul 2015 12:29:23 -0700 Subject: [PATCH] For @glimberg -- a *possible* fix to the alignment headaches on Android/ARM. If this works we should find a define that can be used to enable it there since it will slow things down on non-x86 other architectures. --- node/Salsa20.cpp | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/node/Salsa20.cpp b/node/Salsa20.cpp index 030c1cf47..2eb683810 100644 --- a/node/Salsa20.cpp +++ b/node/Salsa20.cpp @@ -11,17 +11,40 @@ #define XOR(v,w) ((v) ^ (w)) #define PLUS(v,w) ((uint32_t)((v) + (w))) +#ifndef ZT_SALSA20_SSE + #if __BYTE_ORDER == __LITTLE_ENDIAN + +/* We have a slower version of these macros for CPU/compiler combos that + * do not allow unaligned access to a uint32_t. Another solution would be + * to methodically require alignment across the code, but this is quicker + * for now. The culprit appears to be some Android-based ARM devices. */ +#if 1 +#define U8TO32_LITTLE(p) ( ((uint32_t)(p)[0]) | ((uint32_t)(p)[1] << 8) | ((uint32_t)(p)[2] << 16) | ((uint32_t)(p)[3] << 24) ) +static inline void U32TO8_LITTLE(uint8_t *const c,const uint32_t v) +{ + c[0] = (uint8_t)v; + c[1] = (uint8_t)(v >> 8); + c[2] = (uint8_t)(v >> 16); + c[3] = (uint8_t)(v >> 24); +} +#else #define U8TO32_LITTLE(p) (*((const uint32_t *)((const void *)(p)))) #define U32TO8_LITTLE(c,v) *((uint32_t *)((void *)(c))) = (v) -#else +#endif + +#else // big endian + #ifdef __GNUC__ #define U8TO32_LITTLE(p) __builtin_bswap32(*((const uint32_t *)((const void *)(p)))) #define U32TO8_LITTLE(c,v) *((uint32_t *)((void *)(c))) = __builtin_bswap32((v)) -#else +#else // no bswap stuff... need to do it manually? error need be; -#endif -#endif +#endif // __GNUC__ or not + +#endif // little/big endian + +#endif // !ZT_SALSA20_SSE #ifdef ZT_SALSA20_SSE class _s20sseconsts