#include #include "TunnelBase.h" #include "aes.h" namespace i2p { namespace crypto { #ifdef AESNI ECBCryptoAESNI::ECBCryptoAESNI () { m_KeySchedule = m_UnalignedBuffer; uint8_t rem = ((uint64_t)m_KeySchedule) & 0x0f; if (rem) m_KeySchedule += (16 - rem); } #define KeyExpansion256(round0,round1) \ "pshufd $0xff, %%xmm2, %%xmm2 \n" \ "movaps %%xmm1, %%xmm4 \n" \ "pslldq $4, %%xmm4 \n" \ "pxor %%xmm4, %%xmm1 \n" \ "pslldq $4, %%xmm4 \n" \ "pxor %%xmm4, %%xmm1 \n" \ "pslldq $4, %%xmm4 \n" \ "pxor %%xmm4, %%xmm1 \n" \ "pxor %%xmm2, %%xmm1 \n" \ "movaps %%xmm1, "#round0"(%[sched]) \n" \ "aeskeygenassist $0, %%xmm1, %%xmm4 \n" \ "pshufd $0xaa, %%xmm4, %%xmm2 \n" \ "movaps %%xmm3, %%xmm4 \n" \ "pslldq $4, %%xmm4 \n" \ "pxor %%xmm4, %%xmm3 \n" \ "pslldq $4, %%xmm4 \n" \ "pxor %%xmm4, %%xmm3 \n" \ "pslldq $4, %%xmm4 \n" \ "pxor %%xmm4, %%xmm3 \n" \ "pxor %%xmm2, %%xmm3 \n" \ "movaps %%xmm3, "#round1"(%[sched]) \n" void ECBCryptoAESNI::ExpandKey (const uint8_t * key) { __asm__ ( "movups (%[key]), %%xmm1 \n" "movups 16(%[key]), %%xmm3 \n" "movaps %%xmm1, (%[sched]) \n" "movaps %%xmm3, 16(%[sched]) \n" "aeskeygenassist $1, %%xmm3, %%xmm2 \n" KeyExpansion256(32,48) "aeskeygenassist $2, %%xmm3, %%xmm2 \n" KeyExpansion256(64,80) "aeskeygenassist $4, %%xmm3, %%xmm2 \n" KeyExpansion256(96,112) "aeskeygenassist $8, %%xmm3, %%xmm2 \n" KeyExpansion256(128,144) "aeskeygenassist $16, %%xmm3, %%xmm2 \n" KeyExpansion256(160,176) "aeskeygenassist $32, %%xmm3, %%xmm2 \n" KeyExpansion256(192,208) "aeskeygenassist $64, %%xmm3, %%xmm2 \n" // key expansion final "pshufd $0xff, %%xmm2, %%xmm2 \n" "movaps %%xmm1, %%xmm4 \n" "pslldq $4, %%xmm4 \n" "pxor %%xmm4, %%xmm1 \n" "pslldq $4, %%xmm4 \n" "pxor %%xmm4, %%xmm1 \n" "pslldq $4, %%xmm4 \n" "pxor %%xmm4, %%xmm1 \n" "pxor %%xmm2, %%xmm1 \n" "movups %%xmm1, 224(%[sched]) \n" : // output : [key]"r"(key), [sched]"r"(m_KeySchedule) // input : "%xmm1", "%xmm2", "%xmm3", "%xmm4" // clogged ); } #define EncryptAES256(sched) \ "pxor (%["#sched"]), %%xmm0 \n" \ "aesenc 16(%["#sched"]), %%xmm0 \n" \ "aesenc 32(%["#sched"]), %%xmm0 \n" \ "aesenc 48(%["#sched"]), %%xmm0 \n" \ "aesenc 64(%["#sched"]), %%xmm0 \n" \ "aesenc 80(%["#sched"]), %%xmm0 \n" \ "aesenc 96(%["#sched"]), %%xmm0 \n" \ "aesenc 112(%["#sched"]), %%xmm0 \n" \ "aesenc 128(%["#sched"]), %%xmm0 \n" \ "aesenc 144(%["#sched"]), %%xmm0 \n" \ "aesenc 160(%["#sched"]), %%xmm0 \n" \ "aesenc 176(%["#sched"]), %%xmm0 \n" \ "aesenc 192(%["#sched"]), %%xmm0 \n" \ "aesenc 208(%["#sched"]), %%xmm0 \n" \ "aesenclast 224(%["#sched"]), %%xmm0 \n" void ECBEncryptionAESNI::Encrypt (const ChipherBlock * in, ChipherBlock * out) { __asm__ ( "movups (%[in]), %%xmm0 \n" EncryptAES256(sched) "movups %%xmm0, (%[out]) \n" : : [sched]"r"(m_KeySchedule), [in]"r"(in), [out]"r"(out) : "%xmm0" ); } #define DecryptAES256(sched) \ "pxor 224(%["#sched"]), %%xmm0 \n" \ "aesdec 208(%["#sched"]), %%xmm0 \n" \ "aesdec 192(%["#sched"]), %%xmm0 \n" \ "aesdec 176(%["#sched"]), %%xmm0 \n" \ "aesdec 160(%["#sched"]), %%xmm0 \n" \ "aesdec 144(%["#sched"]), %%xmm0 \n" \ "aesdec 128(%["#sched"]), %%xmm0 \n" \ "aesdec 112(%["#sched"]), %%xmm0 \n" \ "aesdec 96(%["#sched"]), %%xmm0 \n" \ "aesdec 80(%["#sched"]), %%xmm0 \n" \ "aesdec 64(%["#sched"]), %%xmm0 \n" \ "aesdec 48(%["#sched"]), %%xmm0 \n" \ "aesdec 32(%["#sched"]), %%xmm0 \n" \ "aesdec 16(%["#sched"]), %%xmm0 \n" \ "aesdeclast (%["#sched"]), %%xmm0 \n" void ECBDecryptionAESNI::Decrypt (const ChipherBlock * in, ChipherBlock * out) { __asm__ ( "movups (%[in]), %%xmm0 \n" DecryptAES256(sched) "movups %%xmm0, (%[out]) \n" : : [sched]"r"(m_KeySchedule), [in]"r"(in), [out]"r"(out) : "%xmm0" ); } #define CallAESIMC(offset) \ "movaps "#offset"(%[shed]), %%xmm0 \n" \ "aesimc %%xmm0, %%xmm0 \n" \ "movaps %%xmm0, "#offset"(%[shed]) \n" void ECBDecryptionAESNI::SetKey (const uint8_t * key) { ExpandKey (key); // expand encryption key first // then invert it using aesimc __asm__ ( CallAESIMC(16) CallAESIMC(32) CallAESIMC(48) CallAESIMC(64) CallAESIMC(80) CallAESIMC(96) CallAESIMC(112) CallAESIMC(128) CallAESIMC(144) CallAESIMC(160) CallAESIMC(176) CallAESIMC(192) CallAESIMC(208) : : [shed]"r"(m_KeySchedule) : "%xmm0" ); } #endif void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { #ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" "block_e: \n" "movups (%[in]), %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n" EncryptAES256(sched) "movaps %%xmm0, %%xmm1 \n" "movups %%xmm0, (%[out]) \n" "add $16, %[in] \n" "add $16, %[out] \n" "dec %[num] \n" "jnz block_e; \n" "movups %%xmm1, (%[iv]) \n" : : [iv]"r"(&m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) : "%xmm0", "%xmm1", "cc", "memory" ); #else for (int i = 0; i < numBlocks; i++) { m_LastBlock ^= in[i]; m_ECBEncryption.Encrypt (&m_LastBlock, &m_LastBlock); out[i] = m_LastBlock; } #endif } bool CBCEncryption::Encrypt (const uint8_t * in, std::size_t len, uint8_t * out) { div_t d = div (len, 16); if (d.rem) return false; // len is not multipple of 16 Encrypt (d.quot, (const ChipherBlock *)in, (ChipherBlock *)out); return true; } void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out) { #ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" "movups (%[in]), %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n" EncryptAES256(sched) "movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[iv]) \n" : : [iv]"r"(&m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "%xmm1", "memory" ); #else Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); #endif } void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { #ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" "block_d: \n" "movups (%[in]), %%xmm0 \n" "movaps %%xmm0, %%xmm2 \n" DecryptAES256(sched) "pxor %%xmm1, %%xmm0 \n" "movups %%xmm0, (%[out]) \n" "movaps %%xmm2, %%xmm1 \n" "add $16, %[in] \n" "add $16, %[out] \n" "dec %[num] \n" "jnz block_d; \n" "movups %%xmm1, (%[iv]) \n" : : [iv]"r"(&m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" ); #else for (int i = 0; i < numBlocks; i++) { ChipherBlock tmp = in[i]; m_ECBDecryption.Decrypt (in + i, out + i); out[i] ^= m_IV; m_IV = tmp; } #endif } bool CBCDecryption::Decrypt (const uint8_t * in, std::size_t len, uint8_t * out) { div_t d = div (len, 16); if (d.rem) return false; // len is not multiple of 16 Decrypt (d.quot, (const ChipherBlock *)in, (ChipherBlock *)out); return true; } void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out) { #ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" "movups (%[in]), %%xmm0 \n" "movups %%xmm0, (%[iv]) \n" DecryptAES256(sched) "pxor %%xmm1, %%xmm0 \n" "movups %%xmm0, (%[out]) \n" : : [iv]"r"(&m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "%xmm1", "memory" ); #else Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); #endif } void TunnelEncryption::Encrypt (uint8_t * payload) { #ifdef AESNI __asm__ ( // encrypt IV "movups (%[payload]), %%xmm0 \n" EncryptAES256(sched_iv) "movaps %%xmm0, %%xmm1 \n" // double IV encryption EncryptAES256(sched_iv) "movups %%xmm0, (%[payload]) \n" // encrypt data, IV is xmm1 "block_et: \n" "add $16, %[payload] \n" "movups (%[payload]), %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n" EncryptAES256(sched_l) "movaps %%xmm0, %%xmm1 \n" "movups %%xmm0, (%[payload]) \n" "dec %[num] \n" "jnz block_et; \n" : : [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.GetKeySchedule ()), [payload]"r"(payload), [num]"r"(63) // 63 blocks = 1008 bytes : "%xmm0", "%xmm1", "cc", "memory" ); #else m_IVEncryption.Encrypt ((ChipherBlock *)payload, (ChipherBlock *)payload); // iv m_LayerEncryption.Encrypt (payload + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, payload + 16); // data m_IVEncryption.Encrypt ((ChipherBlock *)payload, (ChipherBlock *)payload); // double iv #endif } void TunnelDecryption::Decrypt (uint8_t * payload) { #ifdef AESNI __asm__ ( // decrypt IV "movups (%[payload]), %%xmm0 \n" DecryptAES256(sched_iv) "movaps %%xmm0, %%xmm1 \n" // double IV encryption DecryptAES256(sched_iv) "movups %%xmm0, (%[payload]) \n" // decrypt data, IV is xmm1 "block_dt: \n" "add $16, %[payload] \n" "movups (%[payload]), %%xmm0 \n" "movaps %%xmm0, %%xmm2 \n" DecryptAES256(sched_l) "pxor %%xmm1, %%xmm0 \n" "movups %%xmm0, (%[payload]) \n" "movaps %%xmm2, %%xmm1 \n" "dec %[num] \n" "jnz block_dt; \n" : : [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.GetKeySchedule ()), [payload]"r"(payload), [num]"r"(63) // 63 blocks = 1008 bytes : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" ); #else m_IVDecryption.Decrypt ((ChipherBlock *)payload, (ChipherBlock *)payload); // iv m_LayerDecryption.Decrypt (payload + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, payload + 16); // data m_IVDecryption.Decrypt ((ChipherBlock *)payload, (ChipherBlock *)payload); // double iv #endif } } }