CPU: remove AVX code, switch to __builtin for AES detection (#1959)

* [cpu] remove avx detect and code blocks, try to switch to __builtin
* [cpu] use __builtin_* only on x86 systems
* [cpu] perform check in separate function
* [cpu] set AES definition on MSVC
* update x86 and aes support checks at compile time
* [cmake] update comment about AES on MSVC
This commit is contained in:
R4SAS 2023-08-31 16:52:51 +00:00 committed by GitHub
parent 32c5ff23a6
commit 7b6aa41ca8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 222 additions and 232 deletions

View file

@ -34,3 +34,6 @@ trim_trailing_whitespace = false
[*.yml]
indent_style = space
indent_size = 2
[*.patch]
trim_trailing_whitespace = false

View file

@ -197,14 +197,11 @@ endif()
# Note: AES-NI and AVX is available on x86-based CPU's.
# Here also ARM64 implementation, but currently we don't support it.
# MSVC is not supported.
if(MSVC)
message(STATUS "AES-NI is not supported on MSVC, option was disabled")
set(WITH_AESNI OFF)
endif()
# MSVC is not supported due to different ASM processing, so we hope OpenSSL has its own checks to run optimized code.
if(WITH_AESNI AND (ARCHITECTURE MATCHES "x86_64" OR ARCHITECTURE MATCHES "i386"))
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
endif()
add_definitions(-D__AES__)
endif()

View file

@ -280,8 +280,6 @@ verify = true
[cpuext]
## Use CPU AES-NI instructions set when work with cryptography when available (default: true)
# aesni = true
## Use CPU AVX instructions set when work with cryptography when available (default: true)
# avx = true
## Force usage of CPU instructions set, even if they not found (default: false)
## DO NOT TOUCH that option if you really don't know what are you doing!
# force = false

View file

@ -150,12 +150,11 @@ namespace util
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
bool ssu; i2p::config::GetOption("ssu", ssu);
if (!ssu && i2p::config::IsDefault ("precomputation.elgamal"))
precomputation = false; // we don't elgamal table if no ssu, unless it's specified explicitly
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt);
i2p::transport::InitAddressFromIface (); // get address4/6 from interfaces

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2020, The PurpleI2P Project
* Copyright (c) 2013-2023, The PurpleI2P Project
*
* This file is part of Purple i2pd project and licensed under BSD3
*
@ -7,52 +7,52 @@
*/
#include "CPU.h"
#if defined(__x86_64__) || defined(__i386__)
#include <cpuid.h>
#endif
#include "Log.h"
#ifndef bit_AES
#define bit_AES (1 << 25)
#endif
#ifndef bit_AVX
#define bit_AVX (1 << 28)
#endif
#if defined(_MSC_VER)
#include <intrin.h>
#ifndef bit_AES
#define bit_AES (1 << 25)
#endif
#endif
namespace i2p
{
namespace cpu
{
bool aesni = false;
bool avx = false;
void Detect(bool AesSwitch, bool AvxSwitch, bool force)
inline bool cpu_support_aes()
{
#if defined(__x86_64__) || defined(__i386__)
int info[4];
__cpuid(0, info[0], info[1], info[2], info[3]);
if (info[0] >= 0x00000001) {
__cpuid(0x00000001, info[0], info[1], info[2], info[3]);
#if defined (_WIN32) && (WINVER == 0x0501) // WinXP
if (AesSwitch && force) { // only if forced
#else
if ((info[2] & bit_AES && AesSwitch) || (AesSwitch && force)) {
#if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__))
#if defined(_MSC_VER)
int cpu_info[4];
__cpuid(cpu_info, 1);
return ((cpu_info[2] & bit_AES) != 0);
#elif defined(__clang__)
#if __clang_major__ >= 6
__builtin_cpu_init();
#endif
return __builtin_cpu_supports("aes");
#elif defined(__GNUC__)
__builtin_cpu_init();
return __builtin_cpu_supports("aes");
#else
return false;
#endif
#else
return false;
#endif
}
void Detect(bool AesSwitch, bool force)
{
if ((cpu_support_aes() && AesSwitch) || (AesSwitch && force)) {
aesni = true;
}
#if defined (_WIN32) && (WINVER == 0x0501) // WinXP
if (AvxSwitch && force) { // only if forced
#else
if ((info[2] & bit_AVX && AvxSwitch) || (AvxSwitch && force)) {
#endif
avx = true;
}
}
#endif // defined(__x86_64__) || defined(__i386__)
LogPrint(eLogInfo, "AESNI ", (aesni ? "enabled" : "disabled"));
LogPrint(eLogInfo, "AVX ", (avx ? "enabled" : "disabled"));
}
}
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2020, The PurpleI2P Project
* Copyright (c) 2013-2023, The PurpleI2P Project
*
* This file is part of Purple i2pd project and licensed under BSD3
*
@ -14,9 +14,8 @@ namespace i2p
namespace cpu
{
extern bool aesni;
extern bool avx;
void Detect(bool AesSwitch, bool AvxSwitch, bool force);
void Detect(bool AesSwitch, bool force);
}
}

View file

@ -193,7 +193,7 @@ namespace config {
options_description precomputation("Precomputation options");
precomputation.add_options()
("precomputation.elgamal",
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
value<bool>()->default_value(false),
#else
value<bool>()->default_value(true),
@ -308,7 +308,7 @@ namespace config {
options_description cpuext("CPU encryption extensions options");
cpuext.add_options()
("cpuext.aesni", bool_switch()->default_value(true), "Use auto detection for AESNI CPU extensions. If false, AESNI will be not used")
("cpuext.avx", bool_switch()->default_value(true), "Use auto detection for AVX CPU extensions. If false, AVX will be not used")
("cpuext.avx", bool_switch()->default_value(false), "Deprecated option")
("cpuext.force", bool_switch()->default_value(false), "Force usage of CPU extensions. Useful when cpuinfo is not available on virtual machines")
;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2022, The PurpleI2P Project
* Copyright (c) 2013-2023, The PurpleI2P Project
*
* This file is part of Purple i2pd project and licensed under BSD3
*
@ -28,6 +28,12 @@
#include "I2PEndian.h"
#include "Log.h"
#if defined(__AES__) && !defined(_MSC_VER) && ((defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)))
#define SUPPORTS_AES 1
#else
#define SUPPORTS_AES 0
#endif
namespace i2p
{
namespace crypto
@ -361,7 +367,7 @@ namespace crypto
BIGNUM * b1 = BN_CTX_get (ctx);
BIGNUM * b = BN_CTX_get (ctx);
// select random k
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
BN_rand (k, ELGAMAL_FULL_EXPONENT_NUM_BITS, -1, 1); // full exponent for x64
#else
BN_rand (k, ELGAMAL_SHORT_EXPONENT_NUM_BITS, -1, 1); // short exponent of 226 bits
@ -428,7 +434,7 @@ namespace crypto
void GenerateElGamalKeyPair (uint8_t * priv, uint8_t * pub)
{
#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER)
#if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)) || defined(_MSC_VER)
RAND_bytes (priv, 256);
#else
// lower 226 bits (28 bytes and 2 bits) only. short exponent
@ -555,7 +561,7 @@ namespace crypto
}
// AES
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define KeyExpansion256(round0,round1) \
"pshufd $0xff, %%xmm2, %%xmm2 \n" \
"movaps %%xmm1, %%xmm4 \n" \
@ -580,7 +586,7 @@ namespace crypto
"movaps %%xmm3, "#round1"(%[sched]) \n"
#endif
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
void ECBCryptoAESNI::ExpandKey (const AESKey& key)
{
__asm__
@ -621,7 +627,7 @@ namespace crypto
#endif
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define EncryptAES256(sched) \
"pxor (%["#sched"]), %%xmm0 \n" \
"aesenc 16(%["#sched"]), %%xmm0 \n" \
@ -642,7 +648,7 @@ namespace crypto
void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -650,7 +656,9 @@ namespace crypto
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
:
: [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out)
: "%xmm0", "memory"
);
}
else
@ -660,7 +668,7 @@ namespace crypto
}
}
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define DecryptAES256(sched) \
"pxor 224(%["#sched"]), %%xmm0 \n" \
"aesdec 208(%["#sched"]), %%xmm0 \n" \
@ -681,7 +689,7 @@ namespace crypto
void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -689,7 +697,9 @@ namespace crypto
"movups (%[in]), %%xmm0 \n"
DecryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
:
: [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out)
: "%xmm0", "memory"
);
}
else
@ -699,7 +709,7 @@ namespace crypto
}
}
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define CallAESIMC(offset) \
"movaps "#offset"(%[shed]), %%xmm0 \n" \
"aesimc %%xmm0, %%xmm0 \n" \
@ -708,7 +718,7 @@ namespace crypto
void ECBEncryption::SetKey (const AESKey& key)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
ExpandKey (key);
@ -722,7 +732,7 @@ namespace crypto
void ECBDecryption::SetKey (const AESKey& key)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
ExpandKey (key); // expand encryption key first
@ -742,7 +752,9 @@ namespace crypto
CallAESIMC(176)
CallAESIMC(192)
CallAESIMC(208)
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
:
: [shed]"r"(GetKeySchedule ())
: "%xmm0", "memory"
);
}
else
@ -754,7 +766,7 @@ namespace crypto
void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -799,7 +811,7 @@ namespace crypto
void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -823,7 +835,7 @@ namespace crypto
void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -869,7 +881,7 @@ namespace crypto
void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -893,7 +905,7 @@ namespace crypto
void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -934,7 +946,7 @@ namespace crypto
void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
@ -1285,9 +1297,9 @@ namespace crypto
}
}*/
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force)
void InitCrypto (bool precomputation, bool aesni, bool force)
{
i2p::cpu::Detect (aesni, avx, force);
i2p::cpu::Detect (aesni, force);
#if LEGACY_OPENSSL
SSL_library_init ();
#endif
@ -1297,7 +1309,7 @@ namespace crypto
CRYPTO_set_locking_callback (OpensslLockingCallback);*/
if (precomputation)
{
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
g_ElggTable = new BIGNUM * [ELGAMAL_FULL_EXPONENT_NUM_BYTES][255];
PrecalculateElggTable (g_ElggTable, ELGAMAL_FULL_EXPONENT_NUM_BYTES);
#else
@ -1312,7 +1324,7 @@ namespace crypto
if (g_ElggTable)
{
DestroyElggTable (g_ElggTable,
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
ELGAMAL_FULL_EXPONENT_NUM_BYTES
#else
ELGAMAL_SHORT_EXPONENT_NUM_BYTES

View file

@ -307,7 +307,7 @@ namespace crypto
void InitNoiseIKState (NoiseSymmetricState& state, const uint8_t * pub); // Noise_IK (ratchets)
// init and terminate
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force);
void InitCrypto (bool precomputation, bool aesni, bool force);
void TerminateCrypto ();
}
}

View file

@ -803,29 +803,12 @@ namespace data
XORMetric operator^(const IdentHash& key1, const IdentHash& key2)
{
XORMetric m;
#if (defined(__x86_64__) || defined(__i386__)) && defined(__AVX__) // not all X86 targets supports AVX (like old Pentium, see #1600)
if(i2p::cpu::avx)
{
__asm__
(
"vmovups %1, %%ymm0 \n"
"vmovups %2, %%ymm1 \n"
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
"vmovups %%ymm1, %0 \n"
: "=m"(*m.metric)
: "m"(*key1), "m"(*key2)
: "memory", "%xmm0", "%xmm1" // should be replaced by %ymm0/1 once supported by compiler
);
}
else
#endif
{
const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL ();
m.metric_ll[0] = hash1[0] ^ hash2[0];
m.metric_ll[1] = hash1[1] ^ hash2[1];
m.metric_ll[2] = hash1[2] ^ hash2[2];
m.metric_ll[3] = hash1[3] ^ hash2[3];
}
return m;
}

View file

@ -38,9 +38,8 @@ namespace api
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt);
int netID; i2p::config::GetOption("netid", netID);
i2p::context.SetNetID (netID);