code format

This commit is contained in:
vanhauser-thc
2024-02-08 15:13:46 +01:00
parent eaf4a29930
commit 369fce9c85
13 changed files with 6781 additions and 5567 deletions

View File

@ -2,17 +2,15 @@
## Must ## Must
- UI revamp
- hardened_usercopy=0 page_alloc.shuffle=0 - hardened_usercopy=0 page_alloc.shuffle=0
- add value_profile but only enable after 15 minutes without finds - add value_profile but only enable after 15 minutes without finds
- cmplog max len, cmplog max items envs? - cmplog max items env?
- adapt MOpt to new mutation engine - adapt MOpt to new mutation engine
- Update afl->pending_not_fuzzed for MOpt - Update afl->pending_not_fuzzed for MOpt
- cmplog rtn sanity check on fixed length? currently we ignore the length - cmplog rtn sanity check on fixed length? currently we ignore the length
- afl-showmap -f support - afl-showmap -f support
- afl-fuzz multicore wrapper script - afl-fuzz multicore wrapper script
- when trimming then perform crash detection - when trimming then perform crash detection
- problem: either -L0 and/or -p mmopt results in zero new coverage
## Should ## Should

View File

@ -4,13 +4,31 @@
release of the tool. See README.md for the general instruction manual. release of the tool. See README.md for the general instruction manual.
### Version ++4.20a (dev) ### Version ++4.20a (dev)
! A new forkserver communication model is now introduced. afl-fuzz is
backward compatible to old compiled targets if they are not built
for CMPLOG/Redqueen, but new compiled targets will not work with
old afl-fuzz versions!
! Recompiled all targets that are instrumented for CMPLOG/Redqueen!
- AFL++ now supports up to 4 billion coverage edges, up from 6 million.
- New compile option: `make PERFORMANCE=1` - this will enable special
CPU dependent optimizations that make everything more performant - but
the binaries will likely won't work on different platforms. Also
enables a faster hasher if the CPU requirements are met.
- The persistent record feature (see config.h) was expanded to also
support replay, thanks to @quarta-qti !
- afl-fuzz: - afl-fuzz:
- the new deterministic fuzzing feature is now activated by default, - the new deterministic fuzzing feature is now activated by default,
deactivate with -z. Parameters -d and -D are ignored. deactivate with -z. Parameters -d and -D are ignored.
- small improvements to CMPLOG/redqueen
- workround for a bug with MOpt -L when used with -M - in the future
we will either remove or rewrite MOpt.
- afl-cc: - afl-cc:
- added collision free caller instrumentation to LTO mode. activate with - added collision free caller instrumentation to LTO mode. activate with
`AFL_LLVM_LTO_CALLER=1`. You can set a max depth to go through single `AFL_LLVM_LTO_CALLER=1`. You can set a max depth to go through single
block functions with `AFL_LLVM_LTO_CALLER_DEPTH` (default 0) block functions with `AFL_LLVM_LTO_CALLER_DEPTH` (default 0)
- Minor edits to afl-persistent-config
- Prevent temporary files being left behind on aborted afl-whatsup
- More CPU benchmarks added to benchmark/
### Version ++4.10c (release) ### Version ++4.10c (release)

View File

@ -41,13 +41,12 @@
#define CMP_TYPE_INS 0 #define CMP_TYPE_INS 0
#define CMP_TYPE_RTN 1 #define CMP_TYPE_RTN 1
struct cmp_header { struct cmp_header { // 16 bit = 2 bytes
unsigned hits : 6; // up to 63 entries, we have CMP_MAP_H = 32 unsigned hits : 6; // up to 63 entries, we have CMP_MAP_H = 32
unsigned shape : 5; // 31+1 bytes unsigned shape : 5; // 31+1 bytes max
unsigned type : 1; // 4, we use 3: none, rtn, cmp unsigned type : 1; // 2: cmp, rtn
unsigned attribute : 4; // 16 for arithmetic comparison types unsigned attribute : 4; // 16 for arithmetic comparison types
//unsigned reserved : 6;
} __attribute__((packed)); } __attribute__((packed));

View File

@ -64,6 +64,8 @@ static char *afl_environment_variables[] = {
"AFL_REAL_LD", "AFL_LD_PRELOAD", "AFL_LD_VERBOSE", "AFL_LLVM_ALLOWLIST", "AFL_REAL_LD", "AFL_LD_PRELOAD", "AFL_LD_VERBOSE", "AFL_LLVM_ALLOWLIST",
"AFL_LLVM_DENYLIST", "AFL_LLVM_BLOCKLIST", "AFL_CMPLOG", "AFL_LLVM_CMPLOG", "AFL_LLVM_DENYLIST", "AFL_LLVM_BLOCKLIST", "AFL_CMPLOG", "AFL_LLVM_CMPLOG",
"AFL_GCC_CMPLOG", "AFL_LLVM_INSTRIM", "AFL_LLVM_CALLER", "AFL_LLVM_CTX", "AFL_GCC_CMPLOG", "AFL_LLVM_INSTRIM", "AFL_LLVM_CALLER", "AFL_LLVM_CTX",
"AFL_LLVM_LTO_CALLER", "AFL_LLVM_LTO_CTX", "AFL_LLVM_LTO_CALLER_DEPTH",
"AFL_LLVM_LTO_CTX_DEPTH", "AFL_LLVM_CALLER_DEPTH", "AFL_LLVM_CTX_DEPTH",
"AFL_LLVM_CTX_K", "AFL_LLVM_DICT2FILE", "AFL_LLVM_DICT2FILE_NO_MAIN", "AFL_LLVM_CTX_K", "AFL_LLVM_DICT2FILE", "AFL_LLVM_DICT2FILE_NO_MAIN",
"AFL_LLVM_DOCUMENT_IDS", "AFL_LLVM_INSTRIM_LOOPHEAD", "AFL_LLVM_INSTRUMENT", "AFL_LLVM_DOCUMENT_IDS", "AFL_LLVM_INSTRIM_LOOPHEAD", "AFL_LLVM_INSTRUMENT",
"AFL_LLVM_LTO_AUTODICTIONARY", "AFL_LLVM_AUTODICTIONARY", "AFL_LLVM_LTO_AUTODICTIONARY", "AFL_LLVM_AUTODICTIONARY",

View File

@ -286,25 +286,26 @@
#if defined(__LITTLE_ENDIAN__) || \ #if defined(__LITTLE_ENDIAN__) || \
(defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \
defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ defined(__ARMEL__) || defined(__THUMBEL__) || \
defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ defined(__AARCH64EL__) || defined(__MIPSEL__) || defined(_MIPSEL) || \
defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ defined(__MIPSEL) || defined(_M_ARM) || defined(_M_ARM64) || \
defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ defined(__e2k__) || defined(__elbrus_4c__) || \
defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ defined(__elbrus_8c__) || defined(__bfin__) || defined(__BFIN__) || \
defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \
defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ defined(__ia64) || defined(_M_IA64) || defined(__itanium__) || \
defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ defined(__ia32__) || defined(__CYGWIN__) || defined(_WIN64) || \
defined(__WINDOWS__) defined(_WIN32) || defined(__TOS_WIN__) || defined(__WINDOWS__)
#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
#elif defined(__BIG_ENDIAN__) || \ #elif defined(__BIG_ENDIAN__) || \
(defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \
defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ defined(__ARMEB__) || defined(__THUMBEB__) || \
defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ defined(__AARCH64EB__) || defined(__MIPSEB__) || defined(_MIPSEB) || \
defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ defined(__MIPSEB) || defined(__m68k__) || defined(M68000) || \
defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ defined(__hppa__) || defined(__hppa) || defined(__HPPA__) || \
defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ defined(__sparc__) || defined(__sparc) || defined(__370__) || \
defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || \
defined(__SYSC_ZARCH__)
#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__
#else #else
@ -401,22 +402,29 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef union T1HA_ALIGN_PREFIX t1ha_state256 { typedef union T1HA_ALIGN_PREFIX t1ha_state256 {
uint8_t bytes[32]; uint8_t bytes[32];
uint32_t u32[8]; uint32_t u32[8];
uint64_t u64[4]; uint64_t u64[4];
struct { struct {
uint64_t a, b, c, d; uint64_t a, b, c, d;
} n; } n;
} t1ha_state256_t T1HA_ALIGN_SUFFIX; } t1ha_state256_t T1HA_ALIGN_SUFFIX;
typedef struct t1ha_context { typedef struct t1ha_context {
t1ha_state256_t state; t1ha_state256_t state;
t1ha_state256_t buffer; t1ha_state256_t buffer;
size_t partial; size_t partial;
uint64_t total; uint64_t total;
} t1ha_context_t; } t1ha_context_t;
#ifdef _MSC_VER #ifdef _MSC_VER
@ -599,7 +607,8 @@ uint64_t t1ha0_32be(const void *data, size_t length, uint64_t seed);
#endif #endif
#endif /* T1HA0_AESNI_AVAILABLE */ #endif /* T1HA0_AESNI_AVAILABLE */
/* Define T1HA0_RUNTIME_SELECT to 0 for disable dispatching t1ha0 at runtime. */ /* Define T1HA0_RUNTIME_SELECT to 0 for disable dispatching t1ha0 at runtime.
*/
#ifndef T1HA0_RUNTIME_SELECT #ifndef T1HA0_RUNTIME_SELECT
#if T1HA0_AESNI_AVAILABLE && !defined(__e2k__) #if T1HA0_AESNI_AVAILABLE && !defined(__e2k__)
#define T1HA0_RUNTIME_SELECT 1 #define T1HA0_RUNTIME_SELECT 1
@ -636,8 +645,11 @@ T1HA_API extern uint64_t (*t1ha0_funcptr)(const void *data, size_t length,
uint64_t seed); uint64_t seed);
static __force_inline uint64_t t1ha0(const void *data, size_t length, static __force_inline uint64_t t1ha0(const void *data, size_t length,
uint64_t seed) { uint64_t seed) {
return t1ha0_funcptr(data, length, seed); return t1ha0_funcptr(data, length, seed);
} }
#endif /* T1HA_USE_INDIRECT_FUNCTIONS */ #endif /* T1HA_USE_INDIRECT_FUNCTIONS */
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@ -659,6 +671,7 @@ static __force_inline uint64_t t1ha0(const void *data, size_t length,
static __force_inline uint64_t t1ha0(const void *data, size_t length, static __force_inline uint64_t t1ha0(const void *data, size_t length,
uint64_t seed) { uint64_t seed) {
#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ #if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \
(!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED))
#if defined(T1HA1_DISABLED) #if defined(T1HA1_DISABLED)
@ -669,6 +682,7 @@ static __force_inline uint64_t t1ha0(const void *data, size_t length,
#else /* 32/64 */ #else /* 32/64 */
return t1ha0_32be(data, length, seed); return t1ha0_32be(data, length, seed);
#endif /* 32/64 */ #endif /* 32/64 */
} }
#endif /* !T1HA0_USE_DEFINE */ #endif /* !T1HA0_USE_DEFINE */
@ -692,6 +706,7 @@ static __force_inline uint64_t t1ha0(const void *data, size_t length,
static __force_inline uint64_t t1ha0(const void *data, size_t length, static __force_inline uint64_t t1ha0(const void *data, size_t length,
uint64_t seed) { uint64_t seed) {
#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ #if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \
(!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED))
#if defined(T1HA1_DISABLED) #if defined(T1HA1_DISABLED)
@ -702,6 +717,7 @@ static __force_inline uint64_t t1ha0(const void *data, size_t length,
#else /* 32/64 */ #else /* 32/64 */
return t1ha0_32le(data, length, seed); return t1ha0_32le(data, length, seed);
#endif /* 32/64 */ #endif /* 32/64 */
} }
#endif /* !T1HA0_USE_DEFINE */ #endif /* !T1HA0_USE_DEFINE */
@ -711,9 +727,12 @@ static __force_inline uint64_t t1ha0(const void *data, size_t length,
#endif /* T1HA0_DISABLED */ #endif /* T1HA0_DISABLED */
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#if __GNUC_PREREQ(4, 0) #if __GNUC_PREREQ(4, 0)
#pragma GCC visibility pop #pragma GCC visibility pop
#endif /* __GNUC_PREREQ(4,0) */ #endif /* __GNUC_PREREQ(4,0) */

View File

@ -47,10 +47,12 @@
#if T1HA0_AESNI_AVAILABLE #if T1HA0_AESNI_AVAILABLE
uint64_t T1HA_IA32AES_NAME(const void *data, uint32_t len) { uint64_t T1HA_IA32AES_NAME(const void *data, uint32_t len) {
uint64_t a = 0; uint64_t a = 0;
uint64_t b = len; uint64_t b = len;
if (likely(len > 32)) { if (likely(len > 32)) {
__m128i x = _mm_set_epi64x(a, b); __m128i x = _mm_set_epi64x(a, b);
__m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1)); __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1));
@ -60,14 +62,19 @@ uint64_t T1HA_IA32AES_NAME(const void *data, uint32_t len) {
data = detent; data = detent;
if (len & 16) { if (len & 16) {
x = _mm_add_epi64(x, _mm_loadu_si128(v++)); x = _mm_add_epi64(x, _mm_loadu_si128(v++));
y = _mm_aesenc_si128(x, y); y = _mm_aesenc_si128(x, y);
} }
len &= 15; len &= 15;
if (v + 7 < detent) { if (v + 7 < detent) {
__m128i salt = y; __m128i salt = y;
do { do {
__m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt); __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt);
t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
@ -82,14 +89,18 @@ uint64_t T1HA_IA32AES_NAME(const void *data, uint32_t len) {
t = _mm_aesenc_si128(x, t); t = _mm_aesenc_si128(x, t);
x = _mm_add_epi64(y, x); x = _mm_add_epi64(y, x);
y = t; y = t;
} while (v + 7 < detent); } while (v + 7 < detent);
} }
while (v < detent) { while (v < detent) {
__m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
__m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
x = _mm_aesdec_si128(x, v0y); x = _mm_aesdec_si128(x, v0y);
y = _mm_aesdec_si128(y, v1x); y = _mm_aesdec_si128(y, v1x);
} }
x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y);
@ -121,10 +132,12 @@ uint64_t T1HA_IA32AES_NAME(const void *data, uint32_t len) {
defined(__e2k__)) defined(__e2k__))
_mm_empty(); _mm_empty();
#endif #endif
} }
const uint64_t *v = (const uint64_t *)data; const uint64_t *v = (const uint64_t *)data;
switch (len) { switch (len) {
default: default:
mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4);
/* fall through */ /* fall through */
@ -160,8 +173,11 @@ uint64_t T1HA_IA32AES_NAME(const void *data, uint32_t len) {
/* fall through */ /* fall through */
case 0: case 0:
return final64(a, b); return final64(a, b);
} }
} }
#endif /* T1HA0_AESNI_AVAILABLE */ #endif /* T1HA0_AESNI_AVAILABLE */
#undef T1HA_IA32AES_NAME #undef T1HA_IA32AES_NAME

View File

@ -139,7 +139,8 @@
#elif defined(__clang__) && !__has_attribute(__cold__) && \ #elif defined(__clang__) && !__has_attribute(__cold__) && \
__has_attribute(__section__) __has_attribute(__section__)
/* just put infrequently used functions in separate section */ /* just put infrequently used functions in separate section */
#define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os") #define __cold \
__attribute__((__section__("text.unlikely"))) __optimize("Os")
#elif defined(__GNUC__) || __has_attribute(__cold__) #elif defined(__GNUC__) || __has_attribute(__cold__)
#define __cold __attribute__((__cold__)) __optimize("Os") #define __cold __attribute__((__cold__)) __optimize("Os")
#else #else
@ -199,20 +200,25 @@
#endif /* __iset__ >= 3 */ #endif /* __iset__ >= 3 */
#if __iset__ >= 5 #if __iset__ >= 5
static __maybe_unused __always_inline unsigned static __maybe_unused __always_inline unsigned e2k_add64carry_first(
e2k_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { uint64_t base, uint64_t addend, uint64_t *sum) {
*sum = base + addend; *sum = base + addend;
return (unsigned)__builtin_e2k_addcd_c(base, addend, 0); return (unsigned)__builtin_e2k_addcd_c(base, addend, 0);
} }
\
#define add64carry_first(base, addend, sum) \ #define add64carry_first(base, addend, sum) \
e2k_add64carry_first(base, addend, sum) e2k_add64carry_first(base, addend, sum)
static __maybe_unused __always_inline unsigned static __maybe_unused __always_inline unsigned e2k_add64carry_next(
e2k_add64carry_next(unsigned carry, uint64_t base, uint64_t addend, unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) {
uint64_t *sum) {
*sum = __builtin_e2k_addcd(base, addend, carry); *sum = __builtin_e2k_addcd(base, addend, carry);
return (unsigned)__builtin_e2k_addcd_c(base, addend, carry); return (unsigned)__builtin_e2k_addcd_c(base, addend, carry);
} }
\
#define add64carry_next(carry, base, addend, sum) \ #define add64carry_next(carry, base, addend, sum) \
e2k_add64carry_next(carry, base, addend, sum) e2k_add64carry_next(carry, base, addend, sum)
@ -220,8 +226,11 @@ static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry,
uint64_t base, uint64_t base,
uint64_t addend, uint64_t addend,
uint64_t *sum) { uint64_t *sum) {
*sum = __builtin_e2k_addcd(base, addend, carry); *sum = __builtin_e2k_addcd(base, addend, carry);
} }
\
#define add64carry_last(carry, base, addend, sum) \ #define add64carry_last(carry, base, addend, sum) \
e2k_add64carry_last(carry, base, addend, sum) e2k_add64carry_last(carry, base, addend, sum)
#endif /* __iset__ >= 5 */ #endif /* __iset__ >= 5 */
@ -263,7 +272,8 @@ static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry,
#pragma intrinsic(_umul128) #pragma intrinsic(_umul128)
#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) #define mul_64x64_128(a, b, ph) _umul128(a, b, ph)
#pragma intrinsic(_addcarry_u64) #pragma intrinsic(_addcarry_u64)
#define add64carry_first(base, addend, sum) _addcarry_u64(0, base, addend, sum) #define add64carry_first(base, addend, sum) \
_addcarry_u64(0, base, addend, sum)
#define add64carry_next(carry, base, addend, sum) \ #define add64carry_next(carry, base, addend, sum) \
_addcarry_u64(carry, base, addend, sum) _addcarry_u64(carry, base, addend, sum)
#define add64carry_last(carry, base, addend, sum) \ #define add64carry_last(carry, base, addend, sum) \
@ -281,14 +291,17 @@ static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry,
#if _MSC_VER >= 1915 /* LY: workaround for SSA-optimizer bug */ #if _MSC_VER >= 1915 /* LY: workaround for SSA-optimizer bug */
#pragma intrinsic(_addcarry_u32) #pragma intrinsic(_addcarry_u32)
#define add32carry_first(base, addend, sum) _addcarry_u32(0, base, addend, sum) #define add32carry_first(base, addend, sum) \
_addcarry_u32(0, base, addend, sum)
#define add32carry_next(carry, base, addend, sum) \ #define add32carry_next(carry, base, addend, sum) \
_addcarry_u32(carry, base, addend, sum) _addcarry_u32(carry, base, addend, sum)
#define add32carry_last(carry, base, addend, sum) \ #define add32carry_last(carry, base, addend, sum) \
(void)_addcarry_u32(carry, base, addend, sum) (void)_addcarry_u32(carry, base, addend, sum)
static __forceinline char static __forceinline char msvc32_add64carry_first(uint64_t base,
msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { uint64_t addend,
uint64_t *sum) {
uint32_t *const sum32 = (uint32_t *)sum; uint32_t *const sum32 = (uint32_t *)sum;
const uint32_t base_32l = (uint32_t)base; const uint32_t base_32l = (uint32_t)base;
const uint32_t base_32h = (uint32_t)(base >> 32); const uint32_t base_32h = (uint32_t)(base >> 32);
@ -296,13 +309,16 @@ msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) {
const uint32_t addend_32h = (uint32_t)(addend >> 32); const uint32_t addend_32h = (uint32_t)(addend >> 32);
return add32carry_next(add32carry_first(base_32l, addend_32l, sum32), return add32carry_next(add32carry_first(base_32l, addend_32l, sum32),
base_32h, addend_32h, sum32 + 1); base_32h, addend_32h, sum32 + 1);
} }
\
#define add64carry_first(base, addend, sum) \ #define add64carry_first(base, addend, sum) \
msvc32_add64carry_first(base, addend, sum) msvc32_add64carry_first(base, addend, sum)
static __forceinline char msvc32_add64carry_next(char carry, uint64_t base, static __forceinline char msvc32_add64carry_next(char carry, uint64_t base,
uint64_t addend, uint64_t addend,
uint64_t *sum) { uint64_t *sum) {
uint32_t *const sum32 = (uint32_t *)sum; uint32_t *const sum32 = (uint32_t *)sum;
const uint32_t base_32l = (uint32_t)base; const uint32_t base_32l = (uint32_t)base;
const uint32_t base_32h = (uint32_t)(base >> 32); const uint32_t base_32h = (uint32_t)(base >> 32);
@ -310,13 +326,16 @@ static __forceinline char msvc32_add64carry_next(char carry, uint64_t base,
const uint32_t addend_32h = (uint32_t)(addend >> 32); const uint32_t addend_32h = (uint32_t)(addend >> 32);
return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32), return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32),
base_32h, addend_32h, sum32 + 1); base_32h, addend_32h, sum32 + 1);
} }
\
#define add64carry_next(carry, base, addend, sum) \ #define add64carry_next(carry, base, addend, sum) \
msvc32_add64carry_next(carry, base, addend, sum) msvc32_add64carry_next(carry, base, addend, sum)
static __forceinline void msvc32_add64carry_last(char carry, uint64_t base, static __forceinline void msvc32_add64carry_last(char carry, uint64_t base,
uint64_t addend, uint64_t addend,
uint64_t *sum) { uint64_t *sum) {
uint32_t *const sum32 = (uint32_t *)sum; uint32_t *const sum32 = (uint32_t *)sum;
const uint32_t base_32l = (uint32_t)base; const uint32_t base_32l = (uint32_t)base;
const uint32_t base_32h = (uint32_t)(base >> 32); const uint32_t base_32h = (uint32_t)(base >> 32);
@ -324,7 +343,9 @@ static __forceinline void msvc32_add64carry_last(char carry, uint64_t base,
const uint32_t addend_32h = (uint32_t)(addend >> 32); const uint32_t addend_32h = (uint32_t)(addend >> 32);
add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h, add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h,
addend_32h, sum32 + 1); addend_32h, sum32 + 1);
} }
\
#define add64carry_last(carry, base, addend, sum) \ #define add64carry_last(carry, base, addend, sum) \
msvc32_add64carry_last(carry, base, addend, sum) msvc32_add64carry_last(carry, base, addend, sum)
#endif /* _MSC_FULL_VER >= 190024231 */ #endif /* _MSC_FULL_VER >= 190024231 */
@ -358,6 +379,7 @@ static __forceinline void msvc32_add64carry_last(char carry, uint64_t base,
#ifndef unreachable #ifndef unreachable
#define unreachable() \ #define unreachable() \
do { \ do { \
\
} while (1) } while (1)
#endif #endif
@ -368,13 +390,16 @@ static __forceinline void msvc32_add64carry_last(char carry, uint64_t base,
#define bswap64 __bswap_64 #define bswap64 __bswap_64
#else #else
static __always_inline uint64_t bswap64(uint64_t v) { static __always_inline uint64_t bswap64(uint64_t v) {
return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) |
((v << 24) & UINT64_C(0x0000ff0000000000)) | ((v << 24) & UINT64_C(0x0000ff0000000000)) |
((v << 8) & UINT64_C(0x000000ff00000000)) | ((v << 8) & UINT64_C(0x000000ff00000000)) |
((v >> 8) & UINT64_C(0x00000000ff000000)) | ((v >> 8) & UINT64_C(0x00000000ff000000)) |
((v >> 24) & UINT64_C(0x0000000000ff0000)) | ((v >> 24) & UINT64_C(0x0000000000ff0000)) |
((v >> 40) & UINT64_C(0x000000000000ff00)); ((v >> 40) & UINT64_C(0x000000000000ff00));
} }
#endif #endif
#endif /* bswap64 */ #endif /* bswap64 */
@ -385,9 +410,12 @@ static __always_inline uint64_t bswap64(uint64_t v) {
#define bswap32 __bswap_32 #define bswap32 __bswap_32
#else #else
static __always_inline uint32_t bswap32(uint32_t v) { static __always_inline uint32_t bswap32(uint32_t v) {
return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) |
((v >> 8) & UINT32_C(0x0000ff00)); ((v >> 8) & UINT32_C(0x0000ff00));
} }
#endif #endif
#endif /* bswap32 */ #endif /* bswap32 */
@ -397,7 +425,12 @@ static __always_inline uint32_t bswap32(uint32_t v) {
#elif defined(__bswap_16) #elif defined(__bswap_16)
#define bswap16 __bswap_16 #define bswap16 __bswap_16
#else #else
static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } static __always_inline uint16_t bswap16(uint16_t v) {
return v << 8 | v >> 8;
}
#endif #endif
#endif /* bswap16 */ #endif /* bswap16 */
@ -414,11 +447,14 @@ static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; }
#ifndef read_unaligned #ifndef read_unaligned
#if defined(__GNUC__) || __has_attribute(__packed__) #if defined(__GNUC__) || __has_attribute(__packed__)
typedef struct { typedef struct {
uint8_t unaligned_8; uint8_t unaligned_8;
uint16_t unaligned_16; uint16_t unaligned_16;
uint32_t unaligned_32; uint32_t unaligned_32;
uint64_t unaligned_64; uint64_t unaligned_64;
} __attribute__((__packed__)) t1ha_unaligned_proxy; } __attribute__((__packed__)) t1ha_unaligned_proxy;
\
#define read_unaligned(ptr, bits) \ #define read_unaligned(ptr, bits) \
(((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \
t1ha_unaligned_proxy, unaligned_##bits))) \ t1ha_unaligned_proxy, unaligned_##bits))) \
@ -427,15 +463,19 @@ typedef struct {
#pragma warning( \ #pragma warning( \
disable : 4235) /* nonstandard extension used: '__unaligned' \ disable : 4235) /* nonstandard extension used: '__unaligned' \
* keyword not supported on this architecture */ * keyword not supported on this architecture */
#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr)) #define read_unaligned(ptr, bits) \
(*(const __unaligned uint##bits##_t *)(ptr))
#else #else
#pragma pack(push, 1) #pragma pack(push, 1)
typedef struct { typedef struct {
uint8_t unaligned_8; uint8_t unaligned_8;
uint16_t unaligned_16; uint16_t unaligned_16;
uint32_t unaligned_32; uint32_t unaligned_32;
uint64_t unaligned_64; uint64_t unaligned_64;
} t1ha_unaligned_proxy; } t1ha_unaligned_proxy;
#pragma pack(pop) #pragma pack(pop)
#define read_unaligned(ptr, bits) \ #define read_unaligned(ptr, bits) \
(((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \
@ -455,20 +495,25 @@ typedef struct {
__attribute__((__aligned__(ALIGNMENT_##bits))) *)(ptr)) __attribute__((__aligned__(ALIGNMENT_##bits))) *)(ptr))
#elif __has_attribute(__assume_aligned__) #elif __has_attribute(__assume_aligned__)
static __always_inline const static __always_inline const uint16_t *__attribute__((
uint16_t *__attribute__((__assume_aligned__(ALIGNMENT_16))) __assume_aligned__(ALIGNMENT_16))) cast_aligned_16(const void *ptr) {
cast_aligned_16(const void *ptr) {
return (const uint16_t *)ptr; return (const uint16_t *)ptr;
} }
static __always_inline const
uint32_t *__attribute__((__assume_aligned__(ALIGNMENT_32))) static __always_inline const uint32_t *__attribute__((
cast_aligned_32(const void *ptr) { __assume_aligned__(ALIGNMENT_32))) cast_aligned_32(const void *ptr) {
return (const uint32_t *)ptr; return (const uint32_t *)ptr;
} }
static __always_inline const
uint64_t *__attribute__((__assume_aligned__(ALIGNMENT_64))) static __always_inline const uint64_t *__attribute__((
cast_aligned_64(const void *ptr) { __assume_aligned__(ALIGNMENT_64))) cast_aligned_64(const void *ptr) {
return (const uint64_t *)ptr; return (const uint64_t *)ptr;
} }
#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr)) #define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr))
@ -490,7 +535,9 @@ static __always_inline const
#else #else
#define prefetch(ptr) \ #define prefetch(ptr) \
do { \ do { \
\
(void)(ptr); \ (void)(ptr); \
\
} while (0) } while (0)
#endif #endif
#endif /* prefetch */ #endif /* prefetch */
@ -526,18 +573,22 @@ static __always_inline const
#ifndef fetch16_le_aligned #ifndef fetch16_le_aligned
static __maybe_unused __always_inline uint16_t static __maybe_unused __always_inline uint16_t
fetch16_le_aligned(const void *v) { fetch16_le_aligned(const void *v) {
assert(((uintptr_t)v) % ALIGNMENT_16 == 0); assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return read_aligned(v, 16); return read_aligned(v, 16);
#else #else
return bswap16(read_aligned(v, 16)); return bswap16(read_aligned(v, 16));
#endif #endif
} }
#endif /* fetch16_le_aligned */ #endif /* fetch16_le_aligned */
#ifndef fetch16_le_unaligned #ifndef fetch16_le_unaligned
static __maybe_unused __always_inline uint16_t static __maybe_unused __always_inline uint16_t
fetch16_le_unaligned(const void *v) { fetch16_le_unaligned(const void *v) {
#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE #if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
const uint8_t *p = (const uint8_t *)v; const uint8_t *p = (const uint8_t *)v;
return p[0] | (uint16_t)p[1] << 8; return p[0] | (uint16_t)p[1] << 8;
@ -546,24 +597,30 @@ fetch16_le_unaligned(const void *v) {
#else #else
return bswap16(read_unaligned(v, 16)); return bswap16(read_unaligned(v, 16));
#endif #endif
} }
#endif /* fetch16_le_unaligned */ #endif /* fetch16_le_unaligned */
#ifndef fetch32_le_aligned #ifndef fetch32_le_aligned
static __maybe_unused __always_inline uint32_t static __maybe_unused __always_inline uint32_t
fetch32_le_aligned(const void *v) { fetch32_le_aligned(const void *v) {
assert(((uintptr_t)v) % ALIGNMENT_32 == 0); assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return read_aligned(v, 32); return read_aligned(v, 32);
#else #else
return bswap32(read_aligned(v, 32)); return bswap32(read_aligned(v, 32));
#endif #endif
} }
#endif /* fetch32_le_aligned */ #endif /* fetch32_le_aligned */
#ifndef fetch32_le_unaligned #ifndef fetch32_le_unaligned
static __maybe_unused __always_inline uint32_t static __maybe_unused __always_inline uint32_t
fetch32_le_unaligned(const void *v) { fetch32_le_unaligned(const void *v) {
#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE #if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
return fetch16_le_unaligned(v) | return fetch16_le_unaligned(v) |
(uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16; (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16;
@ -572,24 +629,30 @@ fetch32_le_unaligned(const void *v) {
#else #else
return bswap32(read_unaligned(v, 32)); return bswap32(read_unaligned(v, 32));
#endif #endif
} }
#endif /* fetch32_le_unaligned */ #endif /* fetch32_le_unaligned */
#ifndef fetch64_le_aligned #ifndef fetch64_le_aligned
static __maybe_unused __always_inline uint64_t static __maybe_unused __always_inline uint64_t
fetch64_le_aligned(const void *v) { fetch64_le_aligned(const void *v) {
assert(((uintptr_t)v) % ALIGNMENT_64 == 0); assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return read_aligned(v, 64); return read_aligned(v, 64);
#else #else
return bswap64(read_aligned(v, 64)); return bswap64(read_aligned(v, 64));
#endif #endif
} }
#endif /* fetch64_le_aligned */ #endif /* fetch64_le_aligned */
#ifndef fetch64_le_unaligned #ifndef fetch64_le_unaligned
static __maybe_unused __always_inline uint64_t static __maybe_unused __always_inline uint64_t
fetch64_le_unaligned(const void *v) { fetch64_le_unaligned(const void *v) {
#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE #if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
return fetch32_le_unaligned(v) | return fetch32_le_unaligned(v) |
(uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32; (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32;
@ -598,11 +661,14 @@ fetch64_le_unaligned(const void *v) {
#else #else
return bswap64(read_unaligned(v, 64)); return bswap64(read_unaligned(v, 64));
#endif #endif
} }
#endif /* fetch64_le_unaligned */ #endif /* fetch64_le_unaligned */
static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v, static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v,
size_t tail) { size_t tail) {
const uint8_t *const p = (const uint8_t *)v; const uint8_t *const p = (const uint8_t *)v;
#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) #if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__)
/* We can perform a 'oneshot' read, which is little bit faster. */ /* We can perform a 'oneshot' read, which is little bit faster. */
@ -611,6 +677,7 @@ static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v,
#else #else
uint64_t r = 0; uint64_t r = 0;
switch (tail & 7) { switch (tail & 7) {
default: default:
unreachable(); unreachable();
/* fall through */ /* fall through */
@ -669,8 +736,11 @@ static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v,
case 1: case 1:
return r + p[0]; return r + p[0];
#endif #endif
} }
#endif /* T1HA_USE_FAST_ONESHOT_READ */ #endif /* T1HA_USE_FAST_ONESHOT_READ */
} }
#if T1HA_USE_FAST_ONESHOT_READ && \ #if T1HA_USE_FAST_ONESHOT_READ && \
@ -682,6 +752,7 @@ static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v,
static __maybe_unused __always_inline uint64_t static __maybe_unused __always_inline uint64_t
tail64_le_unaligned(const void *v, size_t tail) { tail64_le_unaligned(const void *v, size_t tail) {
const uint8_t *p = (const uint8_t *)v; const uint8_t *p = (const uint8_t *)v;
#if defined(can_read_underside) && \ #if defined(can_read_underside) && \
(UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul)
@ -691,13 +762,17 @@ tail64_le_unaligned(const void *v, size_t tail) {
const unsigned offset = (8 - tail) & 7; const unsigned offset = (8 - tail) & 7;
const unsigned shift = offset << 3; const unsigned shift = offset << 3;
if (likely(can_read_underside(p, 8))) { if (likely(can_read_underside(p, 8))) {
p -= offset; p -= offset;
return fetch64_le_unaligned(p) >> shift; return fetch64_le_unaligned(p) >> shift;
} }
return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift); return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift);
#else #else
uint64_t r = 0; uint64_t r = 0;
switch (tail & 7) { switch (tail & 7) {
default: default:
unreachable(); unreachable();
/* fall through */ /* fall through */
@ -760,8 +835,11 @@ tail64_le_unaligned(const void *v, size_t tail) {
case 1: case 1:
return r + p[0]; return r + p[0];
#endif #endif
} }
#endif /* can_read_underside */ #endif /* can_read_underside */
} }
/*------------------------------------------------------------- Big Endian */ /*------------------------------------------------------------- Big Endian */
@ -769,18 +847,22 @@ tail64_le_unaligned(const void *v, size_t tail) {
#ifndef fetch16_be_aligned #ifndef fetch16_be_aligned
static __maybe_unused __always_inline uint16_t static __maybe_unused __always_inline uint16_t
fetch16_be_aligned(const void *v) { fetch16_be_aligned(const void *v) {
assert(((uintptr_t)v) % ALIGNMENT_16 == 0); assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return read_aligned(v, 16); return read_aligned(v, 16);
#else #else
return bswap16(read_aligned(v, 16)); return bswap16(read_aligned(v, 16));
#endif #endif
} }
#endif /* fetch16_be_aligned */ #endif /* fetch16_be_aligned */
#ifndef fetch16_be_unaligned #ifndef fetch16_be_unaligned
static __maybe_unused __always_inline uint16_t static __maybe_unused __always_inline uint16_t
fetch16_be_unaligned(const void *v) { fetch16_be_unaligned(const void *v) {
#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE #if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
const uint8_t *p = (const uint8_t *)v; const uint8_t *p = (const uint8_t *)v;
return (uint16_t)p[0] << 8 | p[1]; return (uint16_t)p[0] << 8 | p[1];
@ -789,24 +871,30 @@ fetch16_be_unaligned(const void *v) {
#else #else
return bswap16(read_unaligned(v, 16)); return bswap16(read_unaligned(v, 16));
#endif #endif
} }
#endif /* fetch16_be_unaligned */ #endif /* fetch16_be_unaligned */
#ifndef fetch32_be_aligned #ifndef fetch32_be_aligned
static __maybe_unused __always_inline uint32_t static __maybe_unused __always_inline uint32_t
fetch32_be_aligned(const void *v) { fetch32_be_aligned(const void *v) {
assert(((uintptr_t)v) % ALIGNMENT_32 == 0); assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return read_aligned(v, 32); return read_aligned(v, 32);
#else #else
return bswap32(read_aligned(v, 32)); return bswap32(read_aligned(v, 32));
#endif #endif
} }
#endif /* fetch32_be_aligned */ #endif /* fetch32_be_aligned */
#ifndef fetch32_be_unaligned #ifndef fetch32_be_unaligned
static __maybe_unused __always_inline uint32_t static __maybe_unused __always_inline uint32_t
fetch32_be_unaligned(const void *v) { fetch32_be_unaligned(const void *v) {
#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE #if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
return (uint32_t)fetch16_be_unaligned(v) << 16 | return (uint32_t)fetch16_be_unaligned(v) << 16 |
fetch16_be_unaligned((const uint8_t *)v + 2); fetch16_be_unaligned((const uint8_t *)v + 2);
@ -815,24 +903,30 @@ fetch32_be_unaligned(const void *v) {
#else #else
return bswap32(read_unaligned(v, 32)); return bswap32(read_unaligned(v, 32));
#endif #endif
} }
#endif /* fetch32_be_unaligned */ #endif /* fetch32_be_unaligned */
#ifndef fetch64_be_aligned #ifndef fetch64_be_aligned
static __maybe_unused __always_inline uint64_t static __maybe_unused __always_inline uint64_t
fetch64_be_aligned(const void *v) { fetch64_be_aligned(const void *v) {
assert(((uintptr_t)v) % ALIGNMENT_64 == 0); assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return read_aligned(v, 64); return read_aligned(v, 64);
#else #else
return bswap64(read_aligned(v, 64)); return bswap64(read_aligned(v, 64));
#endif #endif
} }
#endif /* fetch64_be_aligned */ #endif /* fetch64_be_aligned */
#ifndef fetch64_be_unaligned #ifndef fetch64_be_unaligned
static __maybe_unused __always_inline uint64_t static __maybe_unused __always_inline uint64_t
fetch64_be_unaligned(const void *v) { fetch64_be_unaligned(const void *v) {
#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE #if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
return (uint64_t)fetch32_be_unaligned(v) << 32 | return (uint64_t)fetch32_be_unaligned(v) << 32 |
fetch32_be_unaligned((const uint8_t *)v + 4); fetch32_be_unaligned((const uint8_t *)v + 4);
@ -841,11 +935,14 @@ fetch64_be_unaligned(const void *v) {
#else #else
return bswap64(read_unaligned(v, 64)); return bswap64(read_unaligned(v, 64));
#endif #endif
} }
#endif /* fetch64_be_unaligned */ #endif /* fetch64_be_unaligned */
static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v, static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
size_t tail) { size_t tail) {
const uint8_t *const p = (const uint8_t *)v; const uint8_t *const p = (const uint8_t *)v;
#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) #if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__)
/* We can perform a 'oneshot' read, which is little bit faster. */ /* We can perform a 'oneshot' read, which is little bit faster. */
@ -853,6 +950,7 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
return fetch64_be_aligned(p) >> shift; return fetch64_be_aligned(p) >> shift;
#else #else
switch (tail & 7) { switch (tail & 7) {
default: default:
unreachable(); unreachable();
/* fall through */ /* fall through */
@ -893,19 +991,23 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
(uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
case 7: case 7:
return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
(uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 |
(uint64_t)p[0] << 48; (uint64_t)p[1] << 40 | (uint64_t)p[0] << 48;
case 0: case 0:
return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
(uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 |
(uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; (uint64_t)p[2] << 40 | (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
#endif #endif
} }
#endif /* T1HA_USE_FAST_ONESHOT_READ */ #endif /* T1HA_USE_FAST_ONESHOT_READ */
} }
static __maybe_unused __always_inline uint64_t static __maybe_unused __always_inline uint64_t
tail64_be_unaligned(const void *v, size_t tail) { tail64_be_unaligned(const void *v, size_t tail) {
const uint8_t *p = (const uint8_t *)v; const uint8_t *p = (const uint8_t *)v;
#if defined(can_read_underside) && \ #if defined(can_read_underside) && \
(UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul)
@ -915,12 +1017,16 @@ tail64_be_unaligned(const void *v, size_t tail) {
const unsigned offset = (8 - tail) & 7; const unsigned offset = (8 - tail) & 7;
const unsigned shift = offset << 3; const unsigned shift = offset << 3;
if (likely(can_read_underside(p, 8))) { if (likely(can_read_underside(p, 8))) {
p -= offset; p -= offset;
return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift); return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift);
} }
return fetch64_be_unaligned(p) >> shift; return fetch64_be_unaligned(p) >> shift;
#else #else
switch (tail & 7) { switch (tail & 7) {
default: default:
unreachable(); unreachable();
/* fall through */ /* fall through */
@ -966,35 +1072,46 @@ tail64_be_unaligned(const void *v, size_t tail) {
(uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
case 7: case 7:
return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
(uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 |
(uint64_t)p[0] << 48; (uint64_t)p[1] << 40 | (uint64_t)p[0] << 48;
case 0: case 0:
return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
(uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 |
(uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; (uint64_t)p[2] << 40 | (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
#endif #endif
} }
#endif /* can_read_underside */ #endif /* can_read_underside */
} }
/***************************************************************************/ /***************************************************************************/
#ifndef rot64 #ifndef rot64
static __maybe_unused __always_inline uint64_t rot64(uint64_t v, unsigned s) { static __maybe_unused __always_inline uint64_t rot64(uint64_t v, unsigned s) {
return (v >> s) | (v << (64 - s)); return (v >> s) | (v << (64 - s));
} }
#endif /* rot64 */ #endif /* rot64 */
#ifndef mul_32x32_64 #ifndef mul_32x32_64
static __maybe_unused __always_inline uint64_t mul_32x32_64(uint32_t a, static __maybe_unused __always_inline uint64_t mul_32x32_64(uint32_t a,
uint32_t b) { uint32_t b) {
return a * (uint64_t)b; return a * (uint64_t)b;
} }
#endif /* mul_32x32_64 */ #endif /* mul_32x32_64 */
#ifndef add64carry_first #ifndef add64carry_first
static __maybe_unused __always_inline unsigned static __maybe_unused __always_inline unsigned add64carry_first(uint64_t base,
add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { uint64_t addend,
uint64_t *sum) {
#if __has_builtin(__builtin_addcll) #if __has_builtin(__builtin_addcll)
unsigned long long carryout; unsigned long long carryout;
*sum = __builtin_addcll(base, addend, 0, &carryout); *sum = __builtin_addcll(base, addend, 0, &carryout);
@ -1003,12 +1120,17 @@ add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) {
*sum = base + addend; *sum = base + addend;
return *sum < addend; return *sum < addend;
#endif /* __has_builtin(__builtin_addcll) */ #endif /* __has_builtin(__builtin_addcll) */
} }
#endif /* add64carry_fist */ #endif /* add64carry_fist */
#ifndef add64carry_next #ifndef add64carry_next
static __maybe_unused __always_inline unsigned static __maybe_unused __always_inline unsigned add64carry_next(unsigned carry,
add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { uint64_t base,
uint64_t addend,
uint64_t *sum) {
#if __has_builtin(__builtin_addcll) #if __has_builtin(__builtin_addcll)
unsigned long long carryout; unsigned long long carryout;
*sum = __builtin_addcll(base, addend, carry, &carryout); *sum = __builtin_addcll(base, addend, carry, &carryout);
@ -1017,12 +1139,17 @@ add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) {
*sum = base + addend + carry; *sum = base + addend + carry;
return *sum < addend || (carry && *sum == addend); return *sum < addend || (carry && *sum == addend);
#endif /* __has_builtin(__builtin_addcll) */ #endif /* __has_builtin(__builtin_addcll) */
} }
#endif /* add64carry_next */ #endif /* add64carry_next */
#ifndef add64carry_last #ifndef add64carry_last
static __maybe_unused __always_inline void static __maybe_unused __always_inline void add64carry_last(unsigned carry,
add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { uint64_t base,
uint64_t addend,
uint64_t *sum) {
#if __has_builtin(__builtin_addcll) #if __has_builtin(__builtin_addcll)
unsigned long long carryout; unsigned long long carryout;
*sum = __builtin_addcll(base, addend, carry, &carryout); *sum = __builtin_addcll(base, addend, carry, &carryout);
@ -1030,13 +1157,16 @@ add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) {
#else #else
*sum = base + addend + carry; *sum = base + addend + carry;
#endif /* __has_builtin(__builtin_addcll) */ #endif /* __has_builtin(__builtin_addcll) */
} }
#endif /* add64carry_last */ #endif /* add64carry_last */
#ifndef mul_64x64_128 #ifndef mul_64x64_128
static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a,
uint64_t b, uint64_t b,
uint64_t *h) { uint64_t *h) {
#if (defined(__SIZEOF_INT128__) || \ #if (defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) && \ (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) && \
(!defined(__LCC__) || __LCC__ != 124) (!defined(__LCC__) || __LCC__ != 124)
@ -1063,16 +1193,21 @@ static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a,
add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h);
return l; return l;
#endif #endif
} }
#endif /* mul_64x64_128() */ #endif /* mul_64x64_128() */
#ifndef mul_64x64_high #ifndef mul_64x64_high
static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a,
uint64_t b) { uint64_t b) {
uint64_t h; uint64_t h;
mul_64x64_128(a, b, &h); mul_64x64_128(a, b, &h);
return h; return h;
} }
#endif /* mul_64x64_high */ #endif /* mul_64x64_high */
/***************************************************************************/ /***************************************************************************/
@ -1089,43 +1224,54 @@ static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31);
/* xor high and low parts of full 128-bit product */ /* xor high and low parts of full 128-bit product */
static __maybe_unused __always_inline uint64_t mux64(uint64_t v, static __maybe_unused __always_inline uint64_t mux64(uint64_t v,
uint64_t prime) { uint64_t prime) {
uint64_t l, h; uint64_t l, h;
l = mul_64x64_128(v, prime, &h); l = mul_64x64_128(v, prime, &h);
return l ^ h; return l ^ h;
} }
static __maybe_unused __always_inline uint64_t final64(uint64_t a, uint64_t b) { static __maybe_unused __always_inline uint64_t final64(uint64_t a, uint64_t b) {
uint64_t x = (a + rot64(b, 41)) * prime_0; uint64_t x = (a + rot64(b, 41)) * prime_0;
uint64_t y = (rot64(a, 23) + b) * prime_6; uint64_t y = (rot64(a, 23) + b) * prime_6;
return mux64(x ^ y, prime_5); return mux64(x ^ y, prime_5);
} }
static __maybe_unused __always_inline void mixup64(uint64_t *__restrict a, static __maybe_unused __always_inline void mixup64(uint64_t *__restrict a,
uint64_t *__restrict b, uint64_t *__restrict b,
uint64_t v, uint64_t prime) { uint64_t v, uint64_t prime) {
uint64_t h; uint64_t h;
*a ^= mul_64x64_128(*b + v, prime, &h); *a ^= mul_64x64_128(*b + v, prime, &h);
*b += h; *b += h;
} }
/***************************************************************************/ /***************************************************************************/
typedef union t1ha_uint128 { typedef union t1ha_uint128 {
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
__uint128_t v; __uint128_t v;
#endif #endif
struct { struct {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint64_t l, h; uint64_t l, h;
#else #else
uint64_t h, l; uint64_t h, l;
#endif #endif
}; };
} t1ha_uint128_t; } t1ha_uint128_t;
static __maybe_unused __always_inline t1ha_uint128_t static __maybe_unused __always_inline t1ha_uint128_t
not128(const t1ha_uint128_t v) { not128(const t1ha_uint128_t v) {
t1ha_uint128_t r; t1ha_uint128_t r;
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
@ -1135,10 +1281,12 @@ not128(const t1ha_uint128_t v) {
r.h = ~v.h; r.h = ~v.h;
#endif #endif
return r; return r;
} }
static __maybe_unused __always_inline t1ha_uint128_t static __maybe_unused __always_inline t1ha_uint128_t
left128(const t1ha_uint128_t v, unsigned s) { left128(const t1ha_uint128_t v, unsigned s) {
t1ha_uint128_t r; t1ha_uint128_t r;
assert(s < 128); assert(s < 128);
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
@ -1149,10 +1297,12 @@ left128(const t1ha_uint128_t v, unsigned s) {
r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64); r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64);
#endif #endif
return r; return r;
} }
static __maybe_unused __always_inline t1ha_uint128_t static __maybe_unused __always_inline t1ha_uint128_t
right128(const t1ha_uint128_t v, unsigned s) { right128(const t1ha_uint128_t v, unsigned s) {
t1ha_uint128_t r; t1ha_uint128_t r;
assert(s < 128); assert(s < 128);
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
@ -1163,10 +1313,12 @@ right128(const t1ha_uint128_t v, unsigned s) {
r.h = (s < 64) ? v.h >> s : 0; r.h = (s < 64) ? v.h >> s : 0;
#endif #endif
return r; return r;
} }
static __maybe_unused __always_inline t1ha_uint128_t or128(t1ha_uint128_t x, static __maybe_unused __always_inline t1ha_uint128_t or128(t1ha_uint128_t x,
t1ha_uint128_t y) { t1ha_uint128_t y) {
t1ha_uint128_t r; t1ha_uint128_t r;
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
@ -1176,10 +1328,12 @@ static __maybe_unused __always_inline t1ha_uint128_t or128(t1ha_uint128_t x,
r.h = x.h | y.h; r.h = x.h | y.h;
#endif #endif
return r; return r;
} }
static __maybe_unused __always_inline t1ha_uint128_t xor128(t1ha_uint128_t x, static __maybe_unused __always_inline t1ha_uint128_t xor128(t1ha_uint128_t x,
t1ha_uint128_t y) { t1ha_uint128_t y) {
t1ha_uint128_t r; t1ha_uint128_t r;
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
@ -1189,10 +1343,12 @@ static __maybe_unused __always_inline t1ha_uint128_t xor128(t1ha_uint128_t x,
r.h = x.h ^ y.h; r.h = x.h ^ y.h;
#endif #endif
return r; return r;
} }
static __maybe_unused __always_inline t1ha_uint128_t rot128(t1ha_uint128_t v, static __maybe_unused __always_inline t1ha_uint128_t rot128(t1ha_uint128_t v,
unsigned s) { unsigned s) {
s &= 127; s &= 127;
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
@ -1201,10 +1357,12 @@ static __maybe_unused __always_inline t1ha_uint128_t rot128(t1ha_uint128_t v,
#else #else
return s ? or128(left128(v, 128 - s), right128(v, s)) : v; return s ? or128(left128(v, 128 - s), right128(v, s)) : v;
#endif #endif
} }
static __maybe_unused __always_inline t1ha_uint128_t add128(t1ha_uint128_t x, static __maybe_unused __always_inline t1ha_uint128_t add128(t1ha_uint128_t x,
t1ha_uint128_t y) { t1ha_uint128_t y) {
t1ha_uint128_t r; t1ha_uint128_t r;
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
@ -1213,10 +1371,12 @@ static __maybe_unused __always_inline t1ha_uint128_t add128(t1ha_uint128_t x,
add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h); add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h);
#endif #endif
return r; return r;
} }
static __maybe_unused __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x, static __maybe_unused __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x,
t1ha_uint128_t y) { t1ha_uint128_t y) {
t1ha_uint128_t r; t1ha_uint128_t r;
#if defined(__SIZEOF_INT128__) || \ #if defined(__SIZEOF_INT128__) || \
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
@ -1226,6 +1386,7 @@ static __maybe_unused __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x,
r.h += x.l * y.h + y.l * x.h; r.h += x.l * y.h + y.l * x.h;
#endif #endif
return r; return r;
} }
/***************************************************************************/ /***************************************************************************/
@ -1233,22 +1394,29 @@ static __maybe_unused __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x,
#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) #if T1HA0_AESNI_AVAILABLE && defined(__ia32__)
uint64_t t1ha_ia32cpu_features(void); uint64_t t1ha_ia32cpu_features(void);
static __maybe_unused __always_inline bool static __maybe_unused __always_inline bool t1ha_ia32_AESNI_avail(
t1ha_ia32_AESNI_avail(uint64_t ia32cpu_features) { uint64_t ia32cpu_features) {
/* check for AES-NI */ /* check for AES-NI */
return (ia32cpu_features & UINT32_C(0x02000000)) != 0; return (ia32cpu_features & UINT32_C(0x02000000)) != 0;
} }
static __maybe_unused __always_inline bool static __maybe_unused __always_inline bool t1ha_ia32_AVX_avail(
t1ha_ia32_AVX_avail(uint64_t ia32cpu_features) { uint64_t ia32cpu_features) {
/* check for any AVX */ /* check for any AVX */
return (ia32cpu_features & UINT32_C(0x1A000000)) == UINT32_C(0x1A000000); return (ia32cpu_features & UINT32_C(0x1A000000)) == UINT32_C(0x1A000000);
} }
static __maybe_unused __always_inline bool static __maybe_unused __always_inline bool t1ha_ia32_AVX2_avail(
t1ha_ia32_AVX2_avail(uint64_t ia32cpu_features) { uint64_t ia32cpu_features) {
/* check for 'Advanced Vector Extensions 2' */ /* check for 'Advanced Vector Extensions 2' */
return ((ia32cpu_features >> 32) & 32) != 0; return ((ia32cpu_features >> 32) & 32) != 0;
} }
#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ #endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */

View File

@ -74,3 +74,4 @@ extern const uint64_t t1ha_refval_ia32aes_a[81];
extern const uint64_t t1ha_refval_ia32aes_b[81]; extern const uint64_t t1ha_refval_ia32aes_b[81];
#endif /* T1HA0_AESNI_AVAILABLE */ #endif /* T1HA0_AESNI_AVAILABLE */
#endif /* T1HA0_DISABLED */ #endif /* T1HA0_DISABLED */

File diff suppressed because it is too large Load Diff

View File

@ -1967,6 +1967,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
continue; continue;
} }
#endif #endif
#ifdef _DEBUG #ifdef _DEBUG
@ -2789,6 +2790,7 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
continue; continue;
} }
#endif #endif
t = taint; t = taint;

View File

@ -402,7 +402,9 @@ static void usage(u8 *argv0, int more_help) {
#endif #endif
#ifdef _AFL_SPECIAL_PERFORMANCE #ifdef _AFL_SPECIAL_PERFORMANCE
SAYF("Compiled with special performance options for this specific system, it might not work on other platforms!\n"); SAYF(
"Compiled with special performance options for this specific system, it "
"might not work on other platforms!\n");
#endif #endif
SAYF("For additional help please consult %s/README.md :)\n\n", doc_path); SAYF("For additional help please consult %s/README.md :)\n\n", doc_path);

View File

@ -14,7 +14,6 @@
#undef XXH_INLINE_ALL #undef XXH_INLINE_ALL
#endif #endif
void rand_set_seed(afl_state_t *afl, s64 init_seed) { void rand_set_seed(afl_state_t *afl, s64 init_seed) {
afl->init_seed = init_seed; afl->init_seed = init_seed;

View File

@ -13,6 +13,7 @@
#undef XXH_INLINE_ALL #undef XXH_INLINE_ALL
int main() { int main() {
char *data = malloc(4097); char *data = malloc(4097);
struct timespec start, end; struct timespec start, end;
long long duration; long long duration;
@ -21,22 +22,32 @@ int main() {
clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &start);
for (i = 0; i < 100000000; ++i) { for (i = 0; i < 100000000; ++i) {
res = XXH3_64bits(data, 4097); res = XXH3_64bits(data, 4097);
memcpy(data + 16, (char *)&res, 8); memcpy(data + 16, (char *)&res, 8);
} }
clock_gettime(CLOCK_MONOTONIC, &end); clock_gettime(CLOCK_MONOTONIC, &end);
duration = (end.tv_sec - start.tv_sec) * 1000000000LL + (end.tv_nsec - start.tv_nsec); duration = (end.tv_sec - start.tv_sec) * 1000000000LL +
(end.tv_nsec - start.tv_nsec);
printf("xxh3 duration: %lld ns\n", duration); printf("xxh3 duration: %lld ns\n", duration);
memset(data, 0, 4097); memset(data, 0, 4097);
clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &start);
for (i = 0; i < 100000000; ++i) { for (i = 0; i < 100000000; ++i) {
res = t1ha0_ia32aes(data, 4097); res = t1ha0_ia32aes(data, 4097);
memcpy(data + 16, (char *)&res, 8); memcpy(data + 16, (char *)&res, 8);
} }
clock_gettime(CLOCK_MONOTONIC, &end); clock_gettime(CLOCK_MONOTONIC, &end);
duration = (end.tv_sec - start.tv_sec) * 1000000000LL + (end.tv_nsec - start.tv_nsec); duration = (end.tv_sec - start.tv_sec) * 1000000000LL +
(end.tv_nsec - start.tv_nsec);
printf("t1ha0_ia32aes duration: %lld ns\n", duration); printf("t1ha0_ia32aes duration: %lld ns\n", duration);
return 0; return 0;
} }