mirror of
https://github.com/PurpleI2P/i2pd.git
synced 2025-01-22 13:27:17 +01:00
initial code for runtime detection of aesni/avx
This commit is contained in:
parent
b6e75e9c5a
commit
91e45d9a4a
|
@ -479,10 +479,9 @@ namespace crypto
|
|||
const uint64_t IPAD = 0x3636363636363636;
|
||||
const uint64_t OPAD = 0x5C5C5C5C5C5C5C5C;
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
static const uint64_t ipads[] = { IPAD, IPAD, IPAD, IPAD };
|
||||
static const uint64_t opads[] = { OPAD, OPAD, OPAD, OPAD };
|
||||
#endif
|
||||
|
||||
void HMACMD5Digest (uint8_t * msg, size_t len, const MACKey& key, uint8_t * digest)
|
||||
// key is 32 bytes
|
||||
|
@ -491,47 +490,73 @@ namespace crypto
|
|||
{
|
||||
uint64_t buf[256];
|
||||
uint64_t hash[12]; // 96 bytes
|
||||
#if defined(__AVX__) // for AVX
|
||||
__asm__
|
||||
(
|
||||
"vmovups %[key], %%ymm0 \n"
|
||||
"vmovups %[ipad], %%ymm1 \n"
|
||||
"vmovups %%ymm1, 32(%[buf]) \n"
|
||||
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
|
||||
"vmovups %%ymm1, (%[buf]) \n"
|
||||
"vmovups %[opad], %%ymm1 \n"
|
||||
"vmovups %%ymm1, 32(%[hash]) \n"
|
||||
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
|
||||
"vmovups %%ymm1, (%[hash]) \n"
|
||||
"vzeroall \n" // end of AVX
|
||||
"movups %%xmm0, 80(%[hash]) \n" // zero last 16 bytes
|
||||
:
|
||||
: [key]"m"(*(const uint8_t *)key), [ipad]"m"(*ipads), [opad]"m"(*opads),
|
||||
[buf]"r"(buf), [hash]"r"(hash)
|
||||
: "memory", "%xmm0" // TODO: change to %ymm0 later
|
||||
);
|
||||
if(i2p::cpu::avx)
|
||||
{
|
||||
#ifdef AVX
|
||||
__asm__
|
||||
(
|
||||
"vmovups %[key], %%ymm0 \n"
|
||||
"vmovups %[ipad], %%ymm1 \n"
|
||||
"vmovups %%ymm1, 32(%[buf]) \n"
|
||||
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
|
||||
"vmovups %%ymm1, (%[buf]) \n"
|
||||
"vmovups %[opad], %%ymm1 \n"
|
||||
"vmovups %%ymm1, 32(%[hash]) \n"
|
||||
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
|
||||
"vmovups %%ymm1, (%[hash]) \n"
|
||||
"vzeroall \n" // end of AVX
|
||||
"movups %%xmm0, 80(%[hash]) \n" // zero last 16 bytes
|
||||
:
|
||||
: [key]"m"(*(const uint8_t *)key), [ipad]"m"(*ipads), [opad]"m"(*opads),
|
||||
[buf]"r"(buf), [hash]"r"(hash)
|
||||
: "memory", "%xmm0" // TODO: change to %ymm0 later
|
||||
);
|
||||
#else
|
||||
// ikeypad
|
||||
buf[0] = key.GetLL ()[0] ^ IPAD;
|
||||
buf[1] = key.GetLL ()[1] ^ IPAD;
|
||||
buf[2] = key.GetLL ()[2] ^ IPAD;
|
||||
buf[3] = key.GetLL ()[3] ^ IPAD;
|
||||
buf[4] = IPAD;
|
||||
buf[5] = IPAD;
|
||||
buf[6] = IPAD;
|
||||
buf[7] = IPAD;
|
||||
// okeypad
|
||||
hash[0] = key.GetLL ()[0] ^ OPAD;
|
||||
hash[1] = key.GetLL ()[1] ^ OPAD;
|
||||
hash[2] = key.GetLL ()[2] ^ OPAD;
|
||||
hash[3] = key.GetLL ()[3] ^ OPAD;
|
||||
hash[4] = OPAD;
|
||||
hash[5] = OPAD;
|
||||
hash[6] = OPAD;
|
||||
hash[7] = OPAD;
|
||||
// fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P)
|
||||
memset (hash + 10, 0, 16);
|
||||
// ikeypad
|
||||
buf[0] = key.GetLL ()[0] ^ IPAD;
|
||||
buf[1] = key.GetLL ()[1] ^ IPAD;
|
||||
buf[2] = key.GetLL ()[2] ^ IPAD;
|
||||
buf[3] = key.GetLL ()[3] ^ IPAD;
|
||||
buf[4] = IPAD;
|
||||
buf[5] = IPAD;
|
||||
buf[6] = IPAD;
|
||||
buf[7] = IPAD;
|
||||
// okeypad
|
||||
hash[0] = key.GetLL ()[0] ^ OPAD;
|
||||
hash[1] = key.GetLL ()[1] ^ OPAD;
|
||||
hash[2] = key.GetLL ()[2] ^ OPAD;
|
||||
hash[3] = key.GetLL ()[3] ^ OPAD;
|
||||
hash[4] = OPAD;
|
||||
hash[5] = OPAD;
|
||||
hash[6] = OPAD;
|
||||
hash[7] = OPAD;
|
||||
// fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P)
|
||||
memset (hash + 10, 0, 16);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// ikeypad
|
||||
buf[0] = key.GetLL ()[0] ^ IPAD;
|
||||
buf[1] = key.GetLL ()[1] ^ IPAD;
|
||||
buf[2] = key.GetLL ()[2] ^ IPAD;
|
||||
buf[3] = key.GetLL ()[3] ^ IPAD;
|
||||
buf[4] = IPAD;
|
||||
buf[5] = IPAD;
|
||||
buf[6] = IPAD;
|
||||
buf[7] = IPAD;
|
||||
// okeypad
|
||||
hash[0] = key.GetLL ()[0] ^ OPAD;
|
||||
hash[1] = key.GetLL ()[1] ^ OPAD;
|
||||
hash[2] = key.GetLL ()[2] ^ OPAD;
|
||||
hash[3] = key.GetLL ()[3] ^ OPAD;
|
||||
hash[4] = OPAD;
|
||||
hash[5] = OPAD;
|
||||
hash[6] = OPAD;
|
||||
hash[7] = OPAD;
|
||||
// fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P)
|
||||
memset (hash + 10, 0, 16);
|
||||
}
|
||||
|
||||
// concatenate with msg
|
||||
memcpy (buf + 8, msg, len);
|
||||
|
@ -543,8 +568,7 @@ namespace crypto
|
|||
}
|
||||
|
||||
// AES
|
||||
#ifdef AESNI
|
||||
|
||||
#ifdef AESNI
|
||||
#define KeyExpansion256(round0,round1) \
|
||||
"pshufd $0xff, %%xmm2, %%xmm2 \n" \
|
||||
"movaps %%xmm1, %%xmm4 \n" \
|
||||
|
@ -567,7 +591,9 @@ namespace crypto
|
|||
"pxor %%xmm4, %%xmm3 \n" \
|
||||
"pxor %%xmm2, %%xmm3 \n" \
|
||||
"movaps %%xmm3, "#round1"(%[sched]) \n"
|
||||
#endif
|
||||
|
||||
#ifdef AESNI
|
||||
void ECBCryptoAESNI::ExpandKey (const AESKey& key)
|
||||
{
|
||||
__asm__
|
||||
|
@ -604,8 +630,11 @@ namespace crypto
|
|||
: [key]"r"((const uint8_t *)key), [sched]"r"(GetKeySchedule ()) // input
|
||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "memory" // clogged
|
||||
);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if AESNI
|
||||
#define EncryptAES256(sched) \
|
||||
"pxor (%["#sched"]), %%xmm0 \n" \
|
||||
"aesenc 16(%["#sched"]), %%xmm0 \n" \
|
||||
|
@ -622,18 +651,31 @@ namespace crypto
|
|||
"aesenc 192(%["#sched"]), %%xmm0 \n" \
|
||||
"aesenc 208(%["#sched"]), %%xmm0 \n" \
|
||||
"aesenclast 224(%["#sched"]), %%xmm0 \n"
|
||||
|
||||
void ECBEncryptionAESNI::Encrypt (const ChipherBlock * in, ChipherBlock * out)
|
||||
#endif
|
||||
|
||||
void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out)
|
||||
{
|
||||
__asm__
|
||||
(
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
EncryptAES256(sched)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
|
||||
);
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
EncryptAES256(sched)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
|
||||
);
|
||||
#else
|
||||
AES_encrypt (in->buf, out->buf, &m_Key);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
AES_encrypt (in->buf, out->buf, &m_Key);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef AESNI
|
||||
#define DecryptAES256(sched) \
|
||||
"pxor 224(%["#sched"]), %%xmm0 \n" \
|
||||
"aesdec 208(%["#sched"]), %%xmm0 \n" \
|
||||
|
@ -650,79 +692,148 @@ namespace crypto
|
|||
"aesdec 32(%["#sched"]), %%xmm0 \n" \
|
||||
"aesdec 16(%["#sched"]), %%xmm0 \n" \
|
||||
"aesdeclast (%["#sched"]), %%xmm0 \n"
|
||||
|
||||
void ECBDecryptionAESNI::Decrypt (const ChipherBlock * in, ChipherBlock * out)
|
||||
#endif
|
||||
|
||||
void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out)
|
||||
{
|
||||
__asm__
|
||||
(
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
DecryptAES256(sched)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
|
||||
);
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
DecryptAES256(sched)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
|
||||
);
|
||||
#else
|
||||
AES_decrypt (in->buf, out->buf, &m_Key);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
AES_decrypt (in->buf, out->buf, &m_Key);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef AESNI
|
||||
#define CallAESIMC(offset) \
|
||||
"movaps "#offset"(%[shed]), %%xmm0 \n" \
|
||||
"aesimc %%xmm0, %%xmm0 \n" \
|
||||
"movaps %%xmm0, "#offset"(%[shed]) \n"
|
||||
|
||||
void ECBDecryptionAESNI::SetKey (const AESKey& key)
|
||||
{
|
||||
ExpandKey (key); // expand encryption key first
|
||||
// then invert it using aesimc
|
||||
__asm__
|
||||
(
|
||||
CallAESIMC(16)
|
||||
CallAESIMC(32)
|
||||
CallAESIMC(48)
|
||||
CallAESIMC(64)
|
||||
CallAESIMC(80)
|
||||
CallAESIMC(96)
|
||||
CallAESIMC(112)
|
||||
CallAESIMC(128)
|
||||
CallAESIMC(144)
|
||||
CallAESIMC(160)
|
||||
CallAESIMC(176)
|
||||
CallAESIMC(192)
|
||||
CallAESIMC(208)
|
||||
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void ECBEncryption::SetKey (const AESKey& key)
|
||||
{
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
ExpandKey (key); // expand encryption key first
|
||||
// then invert it using aesimc
|
||||
__asm__
|
||||
(
|
||||
CallAESIMC(16)
|
||||
CallAESIMC(32)
|
||||
CallAESIMC(48)
|
||||
CallAESIMC(64)
|
||||
CallAESIMC(80)
|
||||
CallAESIMC(96)
|
||||
CallAESIMC(112)
|
||||
CallAESIMC(128)
|
||||
CallAESIMC(144)
|
||||
CallAESIMC(160)
|
||||
CallAESIMC(176)
|
||||
CallAESIMC(192)
|
||||
CallAESIMC(208)
|
||||
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
|
||||
);
|
||||
#else
|
||||
AES_set_encrypt_key (key, 256, &m_Key);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
AES_set_encrypt_key (key, 256, &m_Key);
|
||||
}
|
||||
}
|
||||
|
||||
void ECBDecryption::SetKey (const AESKey& key)
|
||||
{
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
ExpandKey (key); // expand encryption key first
|
||||
// then invert it using aesimc
|
||||
__asm__
|
||||
(
|
||||
CallAESIMC(16)
|
||||
CallAESIMC(32)
|
||||
CallAESIMC(48)
|
||||
CallAESIMC(64)
|
||||
CallAESIMC(80)
|
||||
CallAESIMC(96)
|
||||
CallAESIMC(112)
|
||||
CallAESIMC(128)
|
||||
CallAESIMC(144)
|
||||
CallAESIMC(160)
|
||||
CallAESIMC(176)
|
||||
CallAESIMC(192)
|
||||
CallAESIMC(208)
|
||||
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
|
||||
);
|
||||
#else
|
||||
AES_set_decrypt_key (key, 256, &m_Key);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
AES_set_decrypt_key (key, 256, &m_Key);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"1: \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
EncryptAES256(sched)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
"movups %%xmm1, (%[iv]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
|
||||
: "%xmm0", "%xmm1", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
*m_LastBlock.GetChipherBlock () ^= in[i];
|
||||
m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ());
|
||||
out[i] = *m_LastBlock.GetChipherBlock ();
|
||||
}
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"1: \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
EncryptAES256(sched)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
"movups %%xmm1, (%[iv]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
|
||||
: "%xmm0", "%xmm1", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
{
|
||||
*m_LastBlock.GetChipherBlock () ^= in[i];
|
||||
m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ());
|
||||
out[i] = *m_LastBlock.GetChipherBlock ();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
{
|
||||
*m_LastBlock.GetChipherBlock () ^= in[i];
|
||||
m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ());
|
||||
out[i] = *m_LastBlock.GetChipherBlock ();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CBCEncryption::Encrypt (const uint8_t * in, std::size_t len, uint8_t * out)
|
||||
|
@ -735,57 +846,75 @@ namespace crypto
|
|||
|
||||
void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out)
|
||||
{
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
EncryptAES256(sched)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"movups %%xmm0, (%[iv]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
EncryptAES256(sched)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"movups %%xmm0, (%[iv]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
#else
|
||||
Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
|
||||
Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
|
||||
}
|
||||
|
||||
void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"1: \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"movaps %%xmm0, %%xmm2 \n"
|
||||
DecryptAES256(sched)
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"movaps %%xmm2, %%xmm1 \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
"movups %%xmm1, (%[iv]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
|
||||
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
ChipherBlock tmp = in[i];
|
||||
m_ECBDecryption.Decrypt (in + i, out + i);
|
||||
out[i] ^= *m_IV.GetChipherBlock ();
|
||||
*m_IV.GetChipherBlock () = tmp;
|
||||
}
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"1: \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"movaps %%xmm0, %%xmm2 \n"
|
||||
DecryptAES256(sched)
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"movaps %%xmm2, %%xmm1 \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
"movups %%xmm1, (%[iv]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
|
||||
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
{
|
||||
ChipherBlock tmp = in[i];
|
||||
m_ECBDecryption.Decrypt (in + i, out + i);
|
||||
out[i] ^= *m_IV.GetChipherBlock ();
|
||||
*m_IV.GetChipherBlock () = tmp;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
{
|
||||
ChipherBlock tmp = in[i];
|
||||
m_ECBDecryption.Decrypt (in + i, out + i);
|
||||
out[i] ^= *m_IV.GetChipherBlock ();
|
||||
*m_IV.GetChipherBlock () = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CBCDecryption::Decrypt (const uint8_t * in, std::size_t len, uint8_t * out)
|
||||
|
@ -797,96 +926,121 @@ namespace crypto
|
|||
|
||||
void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out)
|
||||
{
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"movups %%xmm0, (%[iv]) \n"
|
||||
DecryptAES256(sched)
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
__asm__
|
||||
(
|
||||
"movups (%[iv]), %%xmm1 \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"movups %%xmm0, (%[iv]) \n"
|
||||
DecryptAES256(sched)
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
:
|
||||
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
#else
|
||||
Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
|
||||
Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
|
||||
}
|
||||
|
||||
void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out)
|
||||
{
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
// encrypt IV
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
EncryptAES256(sched_iv)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
// double IV encryption
|
||||
EncryptAES256(sched_iv)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
// encrypt data, IV is xmm1
|
||||
"1: \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
EncryptAES256(sched_l)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
:
|
||||
: [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
|
||||
: "%xmm0", "%xmm1", "cc", "memory"
|
||||
);
|
||||
__asm__
|
||||
(
|
||||
// encrypt IV
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
EncryptAES256(sched_iv)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
// double IV encryption
|
||||
EncryptAES256(sched_iv)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
// encrypt data, IV is xmm1
|
||||
"1: \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
EncryptAES256(sched_l)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
:
|
||||
: [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.ECB().GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
|
||||
: "%xmm0", "%xmm1", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
|
||||
m_LayerEncryption.SetIV (out);
|
||||
m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
|
||||
m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
|
||||
m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
|
||||
m_LayerEncryption.SetIV (out);
|
||||
m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
|
||||
m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
|
||||
m_LayerEncryption.SetIV (out);
|
||||
m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
|
||||
m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
|
||||
}
|
||||
}
|
||||
|
||||
void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out)
|
||||
{
|
||||
if(i2p::cpu::aesni)
|
||||
{
|
||||
#ifdef AESNI
|
||||
__asm__
|
||||
(
|
||||
// decrypt IV
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
DecryptAES256(sched_iv)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
// double IV encryption
|
||||
DecryptAES256(sched_iv)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
// decrypt data, IV is xmm1
|
||||
"1: \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"movaps %%xmm0, %%xmm2 \n"
|
||||
DecryptAES256(sched_l)
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"movaps %%xmm2, %%xmm1 \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
:
|
||||
: [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
|
||||
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
|
||||
);
|
||||
__asm__
|
||||
(
|
||||
// decrypt IV
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
DecryptAES256(sched_iv)
|
||||
"movaps %%xmm0, %%xmm1 \n"
|
||||
// double IV encryption
|
||||
DecryptAES256(sched_iv)
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
// decrypt data, IV is xmm1
|
||||
"1: \n"
|
||||
"add $16, %[in] \n"
|
||||
"add $16, %[out] \n"
|
||||
"movups (%[in]), %%xmm0 \n"
|
||||
"movaps %%xmm0, %%xmm2 \n"
|
||||
DecryptAES256(sched_l)
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[out]) \n"
|
||||
"movaps %%xmm2, %%xmm1 \n"
|
||||
"dec %[num] \n"
|
||||
"jnz 1b \n"
|
||||
:
|
||||
: [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.ECB().GetKeySchedule ()),
|
||||
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
|
||||
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
|
||||
m_LayerDecryption.SetIV (out);
|
||||
m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
|
||||
m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
|
||||
m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
|
||||
m_LayerDecryption.SetIV (out);
|
||||
m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
|
||||
m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
|
||||
m_LayerDecryption.SetIV (out);
|
||||
m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
|
||||
m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
|
||||
}
|
||||
}
|
||||
|
||||
/* std::vector <std::unique_ptr<std::mutex> > m_OpenSSLMutexes;
|
||||
|
@ -904,6 +1058,7 @@ namespace crypto
|
|||
|
||||
void InitCrypto (bool precomputation)
|
||||
{
|
||||
i2p::cpu::Detect ();
|
||||
SSL_library_init ();
|
||||
/* auto numLocks = CRYPTO_num_locks();
|
||||
for (int i = 0; i < numLocks; i++)
|
||||
|
|
125
libi2pd/Crypto.h
125
libi2pd/Crypto.h
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "Base.h"
|
||||
#include "Tag.h"
|
||||
#include "CPU.h"
|
||||
|
||||
namespace i2p
|
||||
{
|
||||
|
@ -68,33 +69,30 @@ namespace crypto
|
|||
|
||||
void operator^=(const ChipherBlock& other) // XOR
|
||||
{
|
||||
#if defined(__AVX__) // AVX
|
||||
__asm__
|
||||
(
|
||||
"vmovups (%[buf]), %%xmm0 \n"
|
||||
"vmovups (%[other]), %%xmm1 \n"
|
||||
"vxorps %%xmm0, %%xmm1, %%xmm0 \n"
|
||||
"vmovups %%xmm0, (%[buf]) \n"
|
||||
:
|
||||
: [buf]"r"(buf), [other]"r"(other.buf)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
#elif defined(__SSE__) // SSE
|
||||
__asm__
|
||||
(
|
||||
"movups (%[buf]), %%xmm0 \n"
|
||||
"movups (%[other]), %%xmm1 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n"
|
||||
"movups %%xmm0, (%[buf]) \n"
|
||||
:
|
||||
: [buf]"r"(buf), [other]"r"(other.buf)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
if (i2p::cpu::avx)
|
||||
{
|
||||
#ifdef AVX
|
||||
__asm__
|
||||
(
|
||||
"vmovups (%[buf]), %%xmm0 \n"
|
||||
"vmovups (%[other]), %%xmm1 \n"
|
||||
"vxorps %%xmm0, %%xmm1, %%xmm0 \n"
|
||||
"vmovups %%xmm0, (%[buf]) \n"
|
||||
:
|
||||
: [buf]"r"(buf), [other]"r"(other.buf)
|
||||
: "%xmm0", "%xmm1", "memory"
|
||||
);
|
||||
#else
|
||||
// TODO: implement it better
|
||||
for (int i = 0; i < 16; i++)
|
||||
buf[i] ^= other.buf[i];
|
||||
for (int i = 0; i < 16; i++)
|
||||
buf[i] ^= other.buf[i];
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: implement it better
|
||||
for (int i = 0; i < 16; i++)
|
||||
buf[i] ^= other.buf[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -138,69 +136,40 @@ namespace crypto
|
|||
|
||||
private:
|
||||
|
||||
AESAlignedBuffer<240> m_KeySchedule; // 14 rounds for AES-256, 240 bytes
|
||||
AESAlignedBuffer<240> m_KeySchedule; // 14 rounds for AES-256, 240 bytes
|
||||
};
|
||||
#endif
|
||||
|
||||
class ECBEncryptionAESNI: public ECBCryptoAESNI
|
||||
#ifdef AESNI
|
||||
class ECBEncryption: public ECBCryptoAESNI
|
||||
#else
|
||||
class ECBEncryption
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
|
||||
void SetKey (const AESKey& key) { ExpandKey (key); };
|
||||
void Encrypt (const ChipherBlock * in, ChipherBlock * out);
|
||||
void SetKey (const AESKey& key);
|
||||
|
||||
void Encrypt(const ChipherBlock * in, ChipherBlock * out);
|
||||
|
||||
private:
|
||||
AES_KEY m_Key;
|
||||
};
|
||||
|
||||
class ECBDecryptionAESNI: public ECBCryptoAESNI
|
||||
#ifdef AESNI
|
||||
class ECBDecryption: public ECBCryptoAESNI
|
||||
#else
|
||||
class ECBDecryption
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
|
||||
void SetKey (const AESKey& key);
|
||||
void Decrypt (const ChipherBlock * in, ChipherBlock * out);
|
||||
};
|
||||
|
||||
typedef ECBEncryptionAESNI ECBEncryption;
|
||||
typedef ECBDecryptionAESNI ECBDecryption;
|
||||
|
||||
#else // use openssl
|
||||
|
||||
class ECBEncryption
|
||||
{
|
||||
public:
|
||||
|
||||
void SetKey (const AESKey& key)
|
||||
{
|
||||
AES_set_encrypt_key (key, 256, &m_Key);
|
||||
}
|
||||
void Encrypt (const ChipherBlock * in, ChipherBlock * out)
|
||||
{
|
||||
AES_encrypt (in->buf, out->buf, &m_Key);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
AES_KEY m_Key;
|
||||
};
|
||||
|
||||
class ECBDecryption
|
||||
{
|
||||
public:
|
||||
|
||||
void SetKey (const AESKey& key)
|
||||
{
|
||||
AES_set_decrypt_key (key, 256, &m_Key);
|
||||
}
|
||||
void Decrypt (const ChipherBlock * in, ChipherBlock * out)
|
||||
{
|
||||
AES_decrypt (in->buf, out->buf, &m_Key);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
AES_KEY m_Key;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
class CBCEncryption
|
||||
{
|
||||
public:
|
||||
|
@ -214,6 +183,8 @@ namespace crypto
|
|||
void Encrypt (const uint8_t * in, std::size_t len, uint8_t * out);
|
||||
void Encrypt (const uint8_t * in, uint8_t * out); // one block
|
||||
|
||||
ECBEncryption & ECB() { return m_ECBEncryption; }
|
||||
|
||||
private:
|
||||
|
||||
AESAlignedBuffer<16> m_LastBlock;
|
||||
|
@ -234,6 +205,8 @@ namespace crypto
|
|||
void Decrypt (const uint8_t * in, std::size_t len, uint8_t * out);
|
||||
void Decrypt (const uint8_t * in, uint8_t * out); // one block
|
||||
|
||||
ECBDecryption & ECB() { return m_ECBDecryption; }
|
||||
|
||||
private:
|
||||
|
||||
AESAlignedBuffer<16> m_IV;
|
||||
|
@ -255,11 +228,7 @@ namespace crypto
|
|||
private:
|
||||
|
||||
ECBEncryption m_IVEncryption;
|
||||
#ifdef AESNI
|
||||
ECBEncryption m_LayerEncryption;
|
||||
#else
|
||||
CBCEncryption m_LayerEncryption;
|
||||
#endif
|
||||
};
|
||||
|
||||
class TunnelDecryption // with double IV encryption
|
||||
|
@ -277,11 +246,7 @@ namespace crypto
|
|||
private:
|
||||
|
||||
ECBDecryption m_IVDecryption;
|
||||
#ifdef AESNI
|
||||
ECBDecryption m_LayerDecryption;
|
||||
#else
|
||||
CBCDecryption m_LayerDecryption;
|
||||
#endif
|
||||
};
|
||||
|
||||
void InitCrypto (bool precomputation);
|
||||
|
|
Loading…
Reference in a new issue