/* * Copyright (c)2019 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * * Change Date: 2023-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. */ /****/ #ifndef ZT_AES_HPP #define ZT_AES_HPP #include "Constants.hpp" #include "Utils.hpp" #include "SHA512.hpp" #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) #include #include #include #define ZT_AES_AESNI 1 #endif #define ZT_AES_KEY_SIZE 32 #define ZT_AES_BLOCK_SIZE 16 namespace ZeroTier { /** * AES-256 and AES-GCM AEAD */ class AES { public: /** * This will be true if your platform's type of AES acceleration is supported on this machine */ static const bool HW_ACCEL; ZT_ALWAYS_INLINE AES() {} ZT_ALWAYS_INLINE AES(const uint8_t key[32]) { this->init(key); } ZT_ALWAYS_INLINE ~AES() { Utils::burn(&_k,sizeof(_k)); } /** * Set (or re-set) this AES256 cipher's key */ ZT_ALWAYS_INLINE void init(const uint8_t key[32]) { #ifdef ZT_AES_AESNI if (likely(HW_ACCEL)) { _init_aesni(key); return; } #endif _initSW(key); } /** * Encrypt a single AES block (ECB mode) * * @param in Input block * @param out Output block (can be same as input) */ ZT_ALWAYS_INLINE void encrypt(const uint8_t in[16],uint8_t out[16]) const { #ifdef ZT_AES_AESNI if (likely(HW_ACCEL)) { _encrypt_aesni(in,out); return; } #endif _encryptSW(in,out); } /** * Compute GMAC-AES256 (GCM without ciphertext) * * @param iv 96-bit IV * @param in Input data * @param len Length of input * @param out 128-bit authorization tag from GMAC */ ZT_ALWAYS_INLINE void gmac(const uint8_t iv[12],const void *in,const unsigned int len,uint8_t out[16]) const { #ifdef ZT_AES_AESNI if (likely(HW_ACCEL)) { _gmac_aesni(iv,(const uint8_t *)in,len,out); return; } #endif _gmacSW(iv,(const uint8_t *)in,len,out); } /** * Encrypt or decrypt (they're the same) using AES256-CTR * * The counter here is a 128-bit big-endian that starts at the IV. The code only * increments the least significant 64 bits, making it only safe to use for a * maximum of 2^64-1 bytes (much larger than we ever do). * * @param iv 128-bit CTR IV * @param in Input plaintext or ciphertext * @param len Length of input * @param out Output plaintext or ciphertext */ ZT_ALWAYS_INLINE void ctr(const uint8_t iv[16],const void *in,unsigned int len,void *out) const { #ifdef ZT_AES_AESNI if (likely(HW_ACCEL)) { _crypt_ctr_aesni(iv,(const uint8_t *)in,len,(uint8_t *)out); return; } #endif uint64_t ctr[2],cenc[2]; memcpy(ctr,iv,16); uint64_t bctr = Utils::ntoh(ctr[1]); const uint8_t *i = (const uint8_t *)in; uint8_t *o = (uint8_t *)out; while (len >= 16) { _encryptSW((const uint8_t *)ctr,(uint8_t *)cenc); ctr[1] = Utils::hton(++bctr); #ifdef ZT_NO_TYPE_PUNNING for(unsigned int k=0;k<16;++k) *(o++) = *(i++) ^ ((uint8_t *)cenc)[k]; #else *((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[0]; o += 8; i += 8; *((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[1]; o += 8; i += 8; #endif len -= 16; } if (len) { _encryptSW((const uint8_t *)ctr,(uint8_t *)cenc); for(unsigned int k=0;k= 128) { __m128i c0 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton(ctr),iv0),k0); __m128i c1 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+1ULL)),iv0),k0); __m128i c2 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+2ULL)),iv0),k0); __m128i c3 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+3ULL)),iv0),k0); __m128i c4 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+4ULL)),iv0),k0); __m128i c5 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+5ULL)),iv0),k0); __m128i c6 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+6ULL)),iv0),k0); __m128i c7 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton((uint64_t)(ctr+7ULL)),iv0),k0); ctr += 8; ZT_AES_CTR_AESNI_ROUND(k1); ZT_AES_CTR_AESNI_ROUND(k2); ZT_AES_CTR_AESNI_ROUND(k3); ZT_AES_CTR_AESNI_ROUND(k4); ZT_AES_CTR_AESNI_ROUND(k5); ZT_AES_CTR_AESNI_ROUND(k6); ZT_AES_CTR_AESNI_ROUND(k7); ZT_AES_CTR_AESNI_ROUND(k8); ZT_AES_CTR_AESNI_ROUND(k9); ZT_AES_CTR_AESNI_ROUND(k10); ZT_AES_CTR_AESNI_ROUND(k11); ZT_AES_CTR_AESNI_ROUND(k12); ZT_AES_CTR_AESNI_ROUND(k13); _mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,k14))); _mm_storeu_si128((__m128i *)(out + 16),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 16)),_mm_aesenclast_si128(c1,k14))); _mm_storeu_si128((__m128i *)(out + 32),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 32)),_mm_aesenclast_si128(c2,k14))); _mm_storeu_si128((__m128i *)(out + 48),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 48)),_mm_aesenclast_si128(c3,k14))); _mm_storeu_si128((__m128i *)(out + 64),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 64)),_mm_aesenclast_si128(c4,k14))); _mm_storeu_si128((__m128i *)(out + 80),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 80)),_mm_aesenclast_si128(c5,k14))); _mm_storeu_si128((__m128i *)(out + 96),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 96)),_mm_aesenclast_si128(c6,k14))); _mm_storeu_si128((__m128i *)(out + 112),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 112)),_mm_aesenclast_si128(c7,k14))); in += 128; out += 128; len -= 128; } #undef ZT_AES_CTR_AESNI_ROUND while (len >= 16) { __m128i c0 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton(ctr++),(__m64)iv0),k0); c0 = _mm_aesenc_si128(c0,k1); c0 = _mm_aesenc_si128(c0,k2); c0 = _mm_aesenc_si128(c0,k3); c0 = _mm_aesenc_si128(c0,k4); c0 = _mm_aesenc_si128(c0,k5); c0 = _mm_aesenc_si128(c0,k6); c0 = _mm_aesenc_si128(c0,k7); c0 = _mm_aesenc_si128(c0,k8); c0 = _mm_aesenc_si128(c0,k9); c0 = _mm_aesenc_si128(c0,k10); c0 = _mm_aesenc_si128(c0,k11); c0 = _mm_aesenc_si128(c0,k12); c0 = _mm_aesenc_si128(c0,k13); _mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,k14))); in += 16; out += 16; len -= 16; } if (len) { __m128i c0 = _mm_xor_si128(_mm_set_epi64((__m64)Utils::hton(ctr++),(__m64)iv0),k0); c0 = _mm_aesenc_si128(c0,k1); c0 = _mm_aesenc_si128(c0,k2); c0 = _mm_aesenc_si128(c0,k3); c0 = _mm_aesenc_si128(c0,k4); c0 = _mm_aesenc_si128(c0,k5); c0 = _mm_aesenc_si128(c0,k6); c0 = _mm_aesenc_si128(c0,k7); c0 = _mm_aesenc_si128(c0,k8); c0 = _mm_aesenc_si128(c0,k9); c0 = _mm_aesenc_si128(c0,k10); c0 = _mm_aesenc_si128(c0,k11); c0 = _mm_aesenc_si128(c0,k12); c0 = _mm_aesenc_si128(c0,k13); c0 = _mm_aesenclast_si128(c0,k14); for(unsigned int i=0;i