Automate AES-NI and AVX detection on runtime, make it default on x86-based systems (#1578)

Rework CPU extensions detection code and build with AES-NI and AVX support by default
This commit is contained in:
R4SAS 2020-11-15 01:31:20 +03:00 committed by GitHub
parent 7e874eaa7c
commit 62cd9fffa3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 208 additions and 237 deletions

View file

@ -27,37 +27,24 @@ namespace cpu
bool aesni = false;
bool avx = false;
void Detect()
void Detect(bool AesSwitch, bool AvxSwitch, bool force)
{
#if defined(__AES__) || defined(__AVX__)
#if defined(__x86_64__) || defined(__i386__)
int info[4];
__cpuid(0, info[0], info[1], info[2], info[3]);
if (info[0] >= 0x00000001) {
__cpuid(0x00000001, info[0], info[1], info[2], info[3]);
#ifdef __AES__
aesni = info[2] & bit_AES; // AESNI
#endif // __AES__
#ifdef __AVX__
avx = info[2] & bit_AVX; // AVX
#endif // __AVX__
if ((info[2] & bit_AES && AesSwitch) || (AesSwitch && force)) {
aesni = true;
}
if ((info[2] & bit_AVX && AvxSwitch) || (AvxSwitch && force)) {
avx = true;
}
}
#endif // defined(__x86_64__) || defined(__i386__)
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __AES__
if(aesni)
{
LogPrint(eLogInfo, "AESNI enabled");
}
#endif // __AES__
#ifdef __AVX__
if(avx)
{
LogPrint(eLogInfo, "AVX enabled");
}
#endif // __AVX__
#endif // defined(__AES__) || defined(__AVX__)
LogPrint(eLogInfo, "AESNI ", (aesni ? "enabled" : "disabled"));
LogPrint(eLogInfo, "AVX ", (avx ? "enabled" : "disabled"));
}
}
}

View file

@ -16,7 +16,7 @@ namespace cpu
extern bool aesni;
extern bool avx;
void Detect();
void Detect(bool AesSwitch, bool AvxSwitch, bool force);
}
}

View file

@ -47,11 +47,11 @@ namespace config {
("ifname", value<std::string>()->default_value(""), "Network interface to bind to")
("ifname4", value<std::string>()->default_value(""), "Network interface to bind to for ipv4")
("ifname6", value<std::string>()->default_value(""), "Network interface to bind to for ipv6")
("nat", value<bool>()->default_value(true), "Should we assume we are behind NAT? (default: enabled)")
("nat", bool_switch()->default_value(true), "Should we assume we are behind NAT? (default: enabled)")
("port", value<uint16_t>()->default_value(0), "Port to listen for incoming connections (default: auto)")
("ipv4", value<bool>()->default_value(true), "Enable communication through ipv4 (default: enabled)")
("ipv4", bool_switch()->default_value(true), "Enable communication through ipv4 (default: enabled)")
("ipv6", bool_switch()->default_value(false), "Enable communication through ipv6 (default: disabled)")
("reservedrange", value<bool>()->default_value(true), "Check remote RI for being in blacklist of reserved IP ranges (default: enabled)")
("reservedrange", bool_switch()->default_value(true), "Check remote RI for being in blacklist of reserved IP ranges (default: enabled)")
("netid", value<int>()->default_value(I2PD_NET_ID), "Specify NetID. Main I2P is 2")
("daemon", bool_switch()->default_value(false), "Router will go to background after start (default: disabled)")
("service", bool_switch()->default_value(false), "Router will use system folders like '/var/lib/i2pd' (default: disabled)")
@ -59,8 +59,8 @@ namespace config {
("floodfill", bool_switch()->default_value(false), "Router will be floodfill (default: disabled)")
("bandwidth", value<std::string>()->default_value(""), "Bandwidth limit: integer in KBps or letters: L (32), O (256), P (2048), X (>9000)")
("share", value<int>()->default_value(100), "Limit of transit traffic from max bandwidth in percents. (default: 100)")
("ntcp", value<bool>()->default_value(false), "Ignored. Always false")
("ssu", value<bool>()->default_value(true), "Enable SSU transport (default: enabled)")
("ntcp", bool_switch()->default_value(false), "Ignored. Always false")
("ssu", bool_switch()->default_value(true), "Enable SSU transport (default: enabled)")
("ntcpproxy", value<std::string>()->default_value(""), "Ignored")
#ifdef _WIN32
("svcctl", value<std::string>()->default_value(""), "Windows service management ('install' or 'remove')")
@ -266,6 +266,13 @@ namespace config {
("persist.addressbook", value<bool>()->default_value(true), "Persist full addresses (default: true)")
;
options_description cpuext("CPU encryption extensions options");
cpuext.add_options()
("cpuext.aesni", bool_switch()->default_value(true), "Use auto detection for AESNI CPU extensions. If false, AESNI will be not used")
("cpuext.avx", bool_switch()->default_value(true), "Use auto detection for AVX CPU extensions. If false, AVX will be not used")
("cpuext.force", bool_switch()->default_value(false), "Force usage of CPU extensions. Useful when cpuinfo is not available on virtual machines")
;
m_OptionsDesc
.add(general)
.add(limits)
@ -286,6 +293,7 @@ namespace config {
.add(ntcp2)
.add(nettime)
.add(persist)
.add(cpuext)
;
}

View file

@ -119,7 +119,7 @@ namespace crypto
~CryptoConstants ()
{
BN_free (elgp); BN_free (elgg); BN_free (dsap); BN_free (dsaq); BN_free (dsag); BN_free (rsae);
BN_free (elgp); BN_free (elgg); BN_free (dsap); BN_free (dsaq); BN_free (dsag); BN_free (rsae);
}
};
@ -522,7 +522,7 @@ namespace crypto
bn2buf (y, encrypted + len, len);
RAND_bytes (encrypted + 2*len, 256 - 2*len);
}
// ecryption key and iv
// encryption key and iv
EC_POINT_mul (curve, p, nullptr, key, k, ctx);
EC_POINT_get_affine_coordinates_GFp (curve, p, x, y, nullptr);
uint8_t keyBuf[64], iv[64], shared[32];
@ -638,7 +638,7 @@ namespace crypto
{
uint64_t buf[256];
uint64_t hash[12]; // 96 bytes
#ifdef __AVX__
#if defined(__x86_64__) || defined(__i386__)
if(i2p::cpu::avx)
{
__asm__
@ -657,7 +657,7 @@ namespace crypto
:
: [key]"m"(*(const uint8_t *)key), [ipad]"m"(*ipads), [opad]"m"(*opads),
[buf]"r"(buf), [hash]"r"(hash)
: "memory", "%xmm0" // TODO: change to %ymm0 later
: "memory", "%xmm0" // TODO: change to %ymm0 later
);
}
else
@ -688,7 +688,7 @@ namespace crypto
// concatenate with msg
memcpy (buf + 8, msg, len);
// calculate first hash
MD5((uint8_t *)buf, len + 64, (uint8_t *)(hash + 8)); // 16 bytes
MD5((uint8_t *)buf, len + 64, (uint8_t *)(hash + 8)); // 16 bytes
// calculate digest
MD5((uint8_t *)hash, 96, digest);
@ -696,35 +696,28 @@ namespace crypto
// AES
#ifdef __AES__
#ifdef ARM64AES
void init_aesenc(void){
// TODO: Implementation
}
#endif
#define KeyExpansion256(round0,round1) \
"pshufd $0xff, %%xmm2, %%xmm2 \n" \
"movaps %%xmm1, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pshufd $0xff, %%xmm2, %%xmm2 \n" \
"movaps %%xmm1, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pxor %%xmm4, %%xmm1 \n" \
"pslldq $4, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pxor %%xmm4, %%xmm1 \n" \
"pslldq $4, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pxor %%xmm4, %%xmm1 \n" \
"pxor %%xmm2, %%xmm1 \n" \
"movaps %%xmm1, "#round0"(%[sched]) \n" \
"movaps %%xmm1, "#round0"(%[sched]) \n" \
"aeskeygenassist $0, %%xmm1, %%xmm4 \n" \
"pshufd $0xaa, %%xmm4, %%xmm2 \n" \
"movaps %%xmm3, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pshufd $0xaa, %%xmm4, %%xmm2 \n" \
"movaps %%xmm3, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pxor %%xmm4, %%xmm3 \n" \
"pslldq $4, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pxor %%xmm4, %%xmm3 \n" \
"pslldq $4, %%xmm4 \n" \
"pslldq $4, %%xmm4 \n" \
"pxor %%xmm4, %%xmm3 \n" \
"pxor %%xmm2, %%xmm3 \n" \
"movaps %%xmm3, "#round1"(%[sched]) \n"
"movaps %%xmm3, "#round1"(%[sched]) \n"
#endif
#ifdef __AES__
@ -750,16 +743,16 @@ namespace crypto
KeyExpansion256(192,208)
"aeskeygenassist $64, %%xmm3, %%xmm2 \n"
// key expansion final
"pshufd $0xff, %%xmm2, %%xmm2 \n"
"movaps %%xmm1, %%xmm4 \n"
"pslldq $4, %%xmm4 \n"
"pshufd $0xff, %%xmm2, %%xmm2 \n"
"movaps %%xmm1, %%xmm4 \n"
"pslldq $4, %%xmm4 \n"
"pxor %%xmm4, %%xmm1 \n"
"pslldq $4, %%xmm4 \n"
"pslldq $4, %%xmm4 \n"
"pxor %%xmm4, %%xmm1 \n"
"pslldq $4, %%xmm4 \n"
"pslldq $4, %%xmm4 \n"
"pxor %%xmm4, %%xmm1 \n"
"pxor %%xmm2, %%xmm1 \n"
"movups %%xmm1, 224(%[sched]) \n"
"movups %%xmm1, 224(%[sched]) \n"
: // output
: [key]"r"((const uint8_t *)key), [sched]"r"(GetKeySchedule ()) // input
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "memory" // clogged
@ -794,9 +787,9 @@ namespace crypto
{
__asm__
(
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
);
}
@ -833,9 +826,9 @@ namespace crypto
{
__asm__
(
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
DecryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
);
}
@ -848,7 +841,7 @@ namespace crypto
#ifdef __AES__
#define CallAESIMC(offset) \
"movaps "#offset"(%[shed]), %%xmm0 \n" \
"movaps "#offset"(%[shed]), %%xmm0 \n" \
"aesimc %%xmm0, %%xmm0 \n" \
"movaps %%xmm0, "#offset"(%[shed]) \n"
#endif
@ -873,7 +866,7 @@ namespace crypto
if(i2p::cpu::aesni)
{
ExpandKey (key); // expand encryption key first
// then invert it using aesimc
// then invert it using aesimc
__asm__
(
CallAESIMC(16)
@ -906,18 +899,18 @@ namespace crypto
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[iv]), %%xmm1 \n"
"1: \n"
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched)
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"dec %[num] \n"
"jnz 1b \n"
"movups %%xmm1, (%[iv]) \n"
"movups %%xmm1, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
@ -951,12 +944,12 @@ namespace crypto
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[iv]) \n"
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out)
@ -975,19 +968,19 @@ namespace crypto
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[iv]), %%xmm1 \n"
"1: \n"
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"dec %[num] \n"
"jnz 1b \n"
"movups %%xmm1, (%[iv]) \n"
"movups %%xmm1, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
@ -1021,12 +1014,12 @@ namespace crypto
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"movups %%xmm0, (%[iv]) \n"
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"movups %%xmm0, (%[iv]) \n"
DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[out]) \n"
:
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out)
@ -1046,7 +1039,7 @@ namespace crypto
__asm__
(
// encrypt IV
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n"
// double IV encryption
@ -1056,11 +1049,11 @@ namespace crypto
"1: \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched_l)
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"dec %[num] \n"
"jnz 1b \n"
:
@ -1097,11 +1090,11 @@ namespace crypto
"1: \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n"
"movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched_l)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n"
"dec %[num] \n"
"jnz 1b \n"
@ -1324,23 +1317,23 @@ namespace crypto
}
void NoiseSymmetricState::MixHash (const uint8_t * buf, size_t len)
{
SHA256_CTX ctx;
SHA256_Init (&ctx);
SHA256_Update (&ctx, m_H, 32);
SHA256_Update (&ctx, buf, len);
SHA256_Final (m_H, &ctx);
}
{
SHA256_CTX ctx;
SHA256_Init (&ctx);
SHA256_Update (&ctx, m_H, 32);
SHA256_Update (&ctx, buf, len);
SHA256_Final (m_H, &ctx);
}
void NoiseSymmetricState::MixKey (const uint8_t * sharedSecret)
{
HKDF (m_CK, sharedSecret, 32, "", m_CK);
void NoiseSymmetricState::MixKey (const uint8_t * sharedSecret)
{
HKDF (m_CK, sharedSecret, 32, "", m_CK);
// new ck is m_CK[0:31], key is m_CK[32:63]
}
}
// init and terminate
/* std::vector <std::unique_ptr<std::mutex> > m_OpenSSLMutexes;
/* std::vector <std::unique_ptr<std::mutex> > m_OpenSSLMutexes;
static void OpensslLockingCallback(int mode, int type, const char * file, int line)
{
if (type > 0 && (size_t)type < m_OpenSSLMutexes.size ())
@ -1351,10 +1344,10 @@ namespace crypto
m_OpenSSLMutexes[type]->unlock ();
}
}*/
void InitCrypto (bool precomputation)
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force)
{
i2p::cpu::Detect ();
i2p::cpu::Detect (aesni, avx, force);
#if LEGACY_OPENSSL
SSL_library_init ();
#endif

View file

@ -169,9 +169,6 @@ namespace crypto
#ifdef __AES__
#ifdef ARM64AES
void init_aesenc(void) __attribute__((constructor));
#endif
class ECBCryptoAESNI
{
public:
@ -316,13 +313,13 @@ namespace crypto
struct NoiseSymmetricState
{
uint8_t m_H[32] /*h*/, m_CK[64] /*[ck, k]*/;
void MixHash (const uint8_t * buf, size_t len);
void MixKey (const uint8_t * sharedSecret);
};
void MixKey (const uint8_t * sharedSecret);
};
// init and terminate
void InitCrypto (bool precomputation);
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force);
void TerminateCrypto ();
}
}

View file

@ -828,7 +828,7 @@ namespace data
XORMetric operator^(const IdentHash& key1, const IdentHash& key2)
{
XORMetric m;
#ifdef __AVX__
#if defined(__x86_64__) || defined(__i386__)
if(i2p::cpu::avx)
{
__asm__

View file

@ -37,7 +37,10 @@ namespace api
i2p::fs::Init();
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
i2p::crypto::InitCrypto (precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt);
int netID; i2p::config::GetOption("netid", netID);
i2p::context.SetNetID (netID);