1
0
mirror of https://github.com/RIOT-OS/RIOT.git synced 2024-12-29 04:50:03 +01:00

sys/crypto: optimize AES

This commit is contained in:
Oleg Artamonov 2018-09-25 21:58:45 +03:00 committed by Juan Carrano
parent 836fe3dbba
commit 05fe168a1e
2 changed files with 417 additions and 336 deletions

View File

@ -18,6 +18,9 @@
* @author Fabrice Bellard
* @author Zakaria Kasmi <zkasmi@inf.fu-berlin.de>
*
* @author Unwired Devices LLC
* @author Oleg Artamonov <oleg@unwds.com>
*
* @note Integrated in QEMU by Fabrice Bellard from the OpenSSL project.
* @version 3.0 (December 2000). Optimised ANSI C code for the
* Rijndael cipher (now AES).
@ -115,6 +118,20 @@ static const u32 Te0[256] = {
0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
};
#if defined(AES_CALCULATE_TABLES)
#define Te0(n) (Te0[n])
#define Te1(n) ((Te0[n] >> 8) | (Te0[n] << 24))
#define Te2(n) ((Te0[n] >> 16) | (Te0[n] << 16))
#define Te3(n) ((Te0[n] >> 24) | (Te0[n] << 8))
#define Te4(n) (((Te0[n] & 0x00FFFF00) >> 8) | ((Te0[n] & 0x00FFFF00) << 8))
#else
#define Te0(n) (Te0[n])
#define Te1(n) (Te1[n])
#define Te2(n) (Te2[n])
#define Te3(n) (Te3[n])
#define Te4(n) (Te4[n])
static const u32 Te1[256] = {
0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
@ -380,6 +397,8 @@ static const u32 Te4[256] = {
0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
};
#endif /* AES_CALCULATE_TABLES */
static const u32 Td0[256] = {
0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
@ -446,6 +465,55 @@ static const u32 Td0[256] = {
0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
};
#if defined(AES_CALCULATE_TABLES)
#define Td0(n) (Td0[n])
#define Td1(n) ((Td0[n] >> 8) | (Td0[n] << 24))
#define Td2(n) ((Td0[n] >> 16) | (Td0[n] << 16))
#define Td3(n) ((Td0[n] >> 24) | (Td0[n] << 8))
#define Td4(n) (Td4[n] | (Td4[n] << 8) | (Td4[n] << 16) | (Td4[n] << 24))
static const u8 Td4[256] = {
0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
};
#else
#define Td0(n) (Td0[n])
#define Td1(n) (Td1[n])
#define Td2(n) (Td2[n])
#define Td3(n) (Td3[n])
#define Td4(n) (Td4[n])
static const u32 Td1[256] = {
0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
@ -711,6 +779,7 @@ static const u32 Td4[256] = {
0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
};
#endif /* AES_CALCULATE_TABLES */
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
static const u32 rcon[] = {
@ -785,10 +854,10 @@ static int aes_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[3];
rk[4] = rk[0] ^
(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
(Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
(Te4[(temp) & 0xff] & 0x0000ff00) ^
(Te4[(temp >> 24) ] & 0x000000ff) ^
(Te4((temp >> 16) & 0xff) & 0xff000000) ^
(Te4((temp >> 8) & 0xff) & 0x00ff0000) ^
(Te4((temp) & 0xff) & 0x0000ff00) ^
(Te4((temp >> 24) ) & 0x000000ff) ^
rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
@ -809,10 +878,10 @@ static int aes_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[ 5];
rk[ 6] = rk[ 0] ^
(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
(Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
(Te4[(temp) & 0xff] & 0x0000ff00) ^
(Te4[(temp >> 24) ] & 0x000000ff) ^
(Te4((temp >> 16) & 0xff) & 0xff000000) ^
(Te4((temp >> 8) & 0xff) & 0x00ff0000) ^
(Te4((temp) & 0xff) & 0x0000ff00) ^
(Te4((temp >> 24) ) & 0x000000ff) ^
rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
@ -835,10 +904,10 @@ static int aes_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[ 7];
rk[ 8] = rk[ 0] ^
(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
(Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
(Te4[(temp) & 0xff] & 0x0000ff00) ^
(Te4[(temp >> 24) ] & 0x000000ff) ^
(Te4((temp >> 16) & 0xff) & 0xff000000) ^
(Te4((temp >> 8) & 0xff) & 0x00ff0000) ^
(Te4((temp) & 0xff) & 0x0000ff00) ^
(Te4((temp >> 24) ) & 0x000000ff) ^
rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
@ -850,10 +919,10 @@ static int aes_set_encrypt_key(const unsigned char *userKey, const int bits,
temp = rk[11];
rk[12] = rk[ 4] ^
(Te4[(temp >> 24) ] & 0xff000000) ^
(Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
(Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
(Te4[(temp) & 0xff] & 0x000000ff);
(Te4((temp >> 24) ) & 0xff000000) ^
(Te4((temp >> 16) & 0xff) & 0x00ff0000) ^
(Te4((temp >> 8) & 0xff) & 0x0000ff00) ^
(Te4((temp) & 0xff) & 0x000000ff);
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
@ -906,26 +975,36 @@ static int aes_set_decrypt_key(const unsigned char *userKey, const int bits,
**/
for (i = 1; i < (key->rounds); i++) {
rk += 4;
#ifdef FULL_UNROLL
rk[0] =
Td0[Te4[(rk[0] >> 24) ] & 0xff] ^
Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
Td3[Te4[(rk[0]) & 0xff] & 0xff];
Td0(Te4((rk[0] >> 24) ) & 0xff) ^
Td1(Te4((rk[0] >> 16) & 0xff) & 0xff) ^
Td2(Te4((rk[0] >> 8) & 0xff) & 0xff) ^
Td3(Te4((rk[0]) & 0xff) & 0xff);
rk[1] =
Td0[Te4[(rk[1] >> 24) ] & 0xff] ^
Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
Td3[Te4[(rk[1]) & 0xff] & 0xff];
Td0(Te4((rk[1] >> 24) ) & 0xff) ^
Td1(Te4((rk[1] >> 16) & 0xff) & 0xff) ^
Td2(Te4((rk[1] >> 8) & 0xff) & 0xff) ^
Td3(Te4((rk[1]) & 0xff) & 0xff);
rk[2] =
Td0[Te4[(rk[2] >> 24) ] & 0xff] ^
Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
Td3[Te4[(rk[2]) & 0xff] & 0xff];
Td0(Te4((rk[2] >> 24) ) & 0xff) ^
Td1(Te4((rk[2] >> 16) & 0xff) & 0xff) ^
Td2(Te4((rk[2] >> 8) & 0xff) & 0xff) ^
Td3(Te4((rk[2]) & 0xff) & 0xff);
rk[3] =
Td0[Te4[(rk[3] >> 24) ] & 0xff] ^
Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
Td3[Te4[(rk[3]) & 0xff] & 0xff];
Td0(Te4((rk[3] >> 24) ) & 0xff) ^
Td1(Te4((rk[3] >> 16) & 0xff) & 0xff) ^
Td2(Te4((rk[3] >> 8) & 0xff) & 0xff) ^
Td3(Te4((rk[3]) & 0xff) & 0xff);
#else
for (int k = 0; k < 4; k++) {
rk[k] =
Td0(Te4((rk[k] >> 24) ) & 0xff) ^
Td1(Te4((rk[k] >> 16) & 0xff) & 0xff) ^
Td2(Te4((rk[k] >> 8) & 0xff) & 0xff) ^
Td3(Te4((rk[k]) & 0xff) & 0xff);
}
#endif
}
return 0;
@ -967,126 +1046,126 @@ int aes_encrypt(const cipher_context_t *context, const uint8_t *plainBlock,
s3 = GETU32(plainBlock + 12) ^ rk[3];
#ifdef FULL_UNROLL
/* round 1: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
Te3[s3 & 0xff] ^ rk[ 4];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
Te3[s0 & 0xff] ^ rk[ 5];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
Te3[s1 & 0xff] ^ rk[ 6];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
Te3[s2 & 0xff] ^ rk[ 7];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) & 0xff) ^
Te3(s3 & 0xff) ^ rk[ 4];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) & 0xff) ^
Te3(s0 & 0xff) ^ rk[ 5];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) & 0xff) ^
Te3(s1 & 0xff) ^ rk[ 6];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) & 0xff) ^
Te3(s2 & 0xff) ^ rk[ 7];
/* round 2: */
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
Te3[t3 & 0xff] ^ rk[ 8];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
Te3[t0 & 0xff] ^ rk[ 9];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
Te3[t1 & 0xff] ^ rk[10];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
Te3[t2 & 0xff] ^ rk[11];
s0 = Te0(t0 >> 24) ^ Te1((t1 >> 16) & 0xff) ^ Te2((t2 >> 8) & 0xff) ^
Te3(t3 & 0xff) ^ rk[ 8];
s1 = Te0(t1 >> 24) ^ Te1((t2 >> 16) & 0xff) ^ Te2((t3 >> 8) & 0xff) ^
Te3(t0 & 0xff) ^ rk[ 9];
s2 = Te0(t2 >> 24) ^ Te1((t3 >> 16) & 0xff) ^ Te2((t0 >> 8) & 0xff) ^
Te3(t1 & 0xff) ^ rk[10];
s3 = Te0(t3 >> 24) ^ Te1((t0 >> 16) & 0xff) ^ Te2((t1 >> 8) & 0xff) ^
Te3(t2 & 0xff) ^ rk[11];
/* round 3: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
Te3[s3 & 0xff] ^ rk[12];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
Te3[s0 & 0xff] ^ rk[13];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
Te3[s1 & 0xff] ^ rk[14];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
Te3[s2 & 0xff] ^ rk[15];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) & 0xff) ^
Te3(s3 & 0xff) ^ rk[12];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) & 0xff) ^
Te3(s0 & 0xff) ^ rk[13];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) & 0xff) ^
Te3(s1 & 0xff) ^ rk[14];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) & 0xff) ^
Te3(s2 & 0xff) ^ rk[15];
/* round 4: */
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
Te3[t3 & 0xff] ^ rk[16];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
Te3[t0 & 0xff] ^ rk[17];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
Te3[t1 & 0xff] ^ rk[18];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
Te3[t2 & 0xff] ^ rk[19];
s0 = Te0(t0 >> 24) ^ Te1((t1 >> 16) & 0xff) ^ Te2((t2 >> 8) & 0xff) ^
Te3(t3 & 0xff) ^ rk[16];
s1 = Te0(t1 >> 24) ^ Te1((t2 >> 16) & 0xff) ^ Te2((t3 >> 8) & 0xff) ^
Te3(t0 & 0xff) ^ rk[17];
s2 = Te0(t2 >> 24) ^ Te1((t3 >> 16) & 0xff) ^ Te2((t0 >> 8) & 0xff) ^
Te3(t1 & 0xff) ^ rk[18];
s3 = Te0(t3 >> 24) ^ Te1((t0 >> 16) & 0xff) ^ Te2((t1 >> 8) & 0xff) ^
Te3(t2 & 0xff) ^ rk[19];
/* round 5: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
Te3[s3 & 0xff] ^ rk[20];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
Te3[s0 & 0xff] ^ rk[21];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
Te3[s1 & 0xff] ^ rk[22];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
Te3[s2 & 0xff] ^ rk[23];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) & 0xff) ^
Te3(s3 & 0xff) ^ rk[20];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) & 0xff) ^
Te3(s0 & 0xff) ^ rk[21];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) & 0xff) ^
Te3(s1 & 0xff) ^ rk[22];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) & 0xff) ^
Te3(s2 & 0xff) ^ rk[23];
/* round 6: */
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
Te3[t3 & 0xff] ^ rk[24];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
Te3[t0 & 0xff] ^ rk[25];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
Te3[t1 & 0xff] ^ rk[26];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
Te3[t2 & 0xff] ^ rk[27];
s0 = Te0(t0 >> 24) ^ Te1((t1 >> 16) & 0xff) ^ Te2((t2 >> 8) & 0xff) ^
Te3(t3 & 0xff) ^ rk[24];
s1 = Te0(t1 >> 24) ^ Te1((t2 >> 16) & 0xff) ^ Te2((t3 >> 8) & 0xff) ^
Te3(t0 & 0xff) ^ rk[25];
s2 = Te0(t2 >> 24) ^ Te1((t3 >> 16) & 0xff) ^ Te2((t0 >> 8) & 0xff) ^
Te3(t1 & 0xff) ^ rk[26];
s3 = Te0(t3 >> 24) ^ Te1((t0 >> 16) & 0xff) ^ Te2((t1 >> 8) & 0xff) ^
Te3(t2 & 0xff) ^ rk[27];
/* round 7: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
Te3[s3 & 0xff] ^ rk[28];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
Te3[s0 & 0xff] ^ rk[29];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
Te3[s1 & 0xff] ^ rk[30];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
Te3[s2 & 0xff] ^ rk[31];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) & 0xff) ^
Te3(s3 & 0xff) ^ rk[28];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) & 0xff) ^
Te3(s0 & 0xff) ^ rk[29];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) & 0xff) ^
Te3(s1 & 0xff) ^ rk[30];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) & 0xff) ^
Te3(s2 & 0xff) ^ rk[31];
/* round 8: */
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
Te3[t3 & 0xff] ^ rk[32];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
Te3[t0 & 0xff] ^ rk[33];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
Te3[t1 & 0xff] ^ rk[34];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
Te3[t2 & 0xff] ^ rk[35];
s0 = Te0(t0 >> 24) ^ Te1((t1 >> 16) & 0xff) ^ Te2((t2 >> 8) & 0xff) ^
Te3(t3 & 0xff) ^ rk[32];
s1 = Te0(t1 >> 24) ^ Te1((t2 >> 16) & 0xff) ^ Te2((t3 >> 8) & 0xff) ^
Te3(t0 & 0xff) ^ rk[33];
s2 = Te0(t2 >> 24) ^ Te1((t3 >> 16) & 0xff) ^ Te2((t0 >> 8) & 0xff) ^
Te3(t1 & 0xff) ^ rk[34];
s3 = Te0(t3 >> 24) ^ Te1((t0 >> 16) & 0xff) ^ Te2((t1 >> 8) & 0xff) ^
Te3(t2 & 0xff) ^ rk[35];
/* round 9: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
Te3[s3 & 0xff] ^ rk[36];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
Te3[s0 & 0xff] ^ rk[37];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
Te3[s1 & 0xff] ^ rk[38];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
Te3[s2 & 0xff] ^ rk[39];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) & 0xff) ^
Te3(s3 & 0xff) ^ rk[36];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) & 0xff) ^
Te3(s0 & 0xff) ^ rk[37];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) & 0xff) ^
Te3(s1 & 0xff) ^ rk[38];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) & 0xff) ^
Te3(s2 & 0xff) ^ rk[39];
if (key->rounds > 10) {
/* round 10: */
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
Te3[t3 & 0xff] ^ rk[40];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
Te3[t0 & 0xff] ^ rk[41];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
Te3[t1 & 0xff] ^ rk[42];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
Te3[t2 & 0xff] ^ rk[43];
s0 = Te0(t0 >> 24) ^ Te1((t1 >> 16) & 0xff) ^ Te2((t2 >> 8) & 0xff) ^
Te3(t3 & 0xff) ^ rk[40];
s1 = Te0(t1 >> 24) ^ Te1((t2 >> 16) & 0xff) ^ Te2((t3 >> 8) & 0xff) ^
Te3(t0 & 0xff) ^ rk[41];
s2 = Te0(t2 >> 24) ^ Te1((t3 >> 16) & 0xff) ^ Te2((t0 >> 8) & 0xff) ^
Te3(t1 & 0xff) ^ rk[42];
s3 = Te0(t3 >> 24) ^ Te1((t0 >> 16) & 0xff) ^ Te2((t1 >> 8) & 0xff) ^
Te3(t2 & 0xff) ^ rk[43];
/* round 11: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
Te3[s3 & 0xff] ^ rk[44];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
Te3[s0 & 0xff] ^ rk[45];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
Te3[s1 & 0xff] ^ rk[46];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
Te3[s2 & 0xff] ^ rk[47];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) & 0xff) ^
Te3(s3 & 0xff) ^ rk[44];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) & 0xff) ^
Te3(s0 & 0xff) ^ rk[45];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) & 0xff) ^
Te3(s1 & 0xff) ^ rk[46];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) & 0xff) ^
Te3(s2 & 0xff) ^ rk[47];
if (key->rounds > 12) {
/* round 12: */
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) &
0xff] ^ Te3[t3 & 0xff] ^ rk[48];
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) &
0xff] ^ Te3[t0 & 0xff] ^ rk[49];
s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) &
0xff] ^ Te3[t1 & 0xff] ^ rk[50];
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) &
0xff] ^ Te3[t2 & 0xff] ^ rk[51];
s0 = Te0(t0 >> 24) ^ Te1((t1 >> 16) & 0xff) ^ Te2((t2 >> 8) &
0xff) ^ Te3(t3 & 0xff) ^ rk[48];
s1 = Te0(t1 >> 24) ^ Te1((t2 >> 16) & 0xff) ^ Te2((t3 >> 8) &
0xff) ^ Te3(t0 & 0xff) ^ rk[49];
s2 = Te0(t2 >> 24) ^ Te1((t3 >> 16) & 0xff) ^ Te2((t0 >> 8) &
0xff) ^ Te3(t1 & 0xff) ^ rk[50];
s3 = Te0(t3 >> 24) ^ Te1((t0 >> 16) & 0xff) ^ Te2((t1 >> 8) &
0xff) ^ Te3(t2 & 0xff) ^ rk[51];
/* round 13: */
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) &
0xff] ^ Te3[s3 & 0xff] ^ rk[52];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) &
0xff] ^ Te3[s0 & 0xff] ^ rk[53];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) &
0xff] ^ Te3[s1 & 0xff] ^ rk[54];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) &
0xff] ^ Te3[s2 & 0xff] ^ rk[55];
t0 = Te0(s0 >> 24) ^ Te1((s1 >> 16) & 0xff) ^ Te2((s2 >> 8) &
0xff) ^ Te3(s3 & 0xff) ^ rk[52];
t1 = Te0(s1 >> 24) ^ Te1((s2 >> 16) & 0xff) ^ Te2((s3 >> 8) &
0xff) ^ Te3(s0 & 0xff) ^ rk[53];
t2 = Te0(s2 >> 24) ^ Te1((s3 >> 16) & 0xff) ^ Te2((s0 >> 8) &
0xff) ^ Te3(s1 & 0xff) ^ rk[54];
t3 = Te0(s3 >> 24) ^ Te1((s0 >> 16) & 0xff) ^ Te2((s1 >> 8) &
0xff) ^ Te3(s2 & 0xff) ^ rk[55];
}
}
@ -1099,28 +1178,28 @@ int aes_encrypt(const cipher_context_t *context, const uint8_t *plainBlock,
while (1) {
t0 =
Te0[(s0 >> 24) ] ^
Te1[(s1 >> 16) & 0xff] ^
Te2[(s2 >> 8) & 0xff] ^
Te3[(s3) & 0xff] ^
Te0((s0 >> 24) ) ^
Te1((s1 >> 16) & 0xff) ^
Te2((s2 >> 8) & 0xff) ^
Te3((s3) & 0xff) ^
rk[4];
t1 =
Te0[(s1 >> 24) ] ^
Te1[(s2 >> 16) & 0xff] ^
Te2[(s3 >> 8) & 0xff] ^
Te3[(s0) & 0xff] ^
Te0((s1 >> 24) ) ^
Te1((s2 >> 16) & 0xff) ^
Te2((s3 >> 8) & 0xff) ^
Te3((s0) & 0xff) ^
rk[5];
t2 =
Te0[(s2 >> 24) ] ^
Te1[(s3 >> 16) & 0xff] ^
Te2[(s0 >> 8) & 0xff] ^
Te3[(s1) & 0xff] ^
Te0((s2 >> 24) ) ^
Te1((s3 >> 16) & 0xff) ^
Te2((s0 >> 8) & 0xff) ^
Te3((s1) & 0xff) ^
rk[6];
t3 =
Te0[(s3 >> 24) ] ^
Te1[(s0 >> 16) & 0xff] ^
Te2[(s1 >> 8) & 0xff] ^
Te3[(s2) & 0xff] ^
Te0((s3 >> 24) ) ^
Te1((s0 >> 16) & 0xff) ^
Te2((s1 >> 8) & 0xff) ^
Te3((s2) & 0xff) ^
rk[7];
rk += 8;
@ -1130,28 +1209,28 @@ int aes_encrypt(const cipher_context_t *context, const uint8_t *plainBlock,
}
s0 =
Te0[(t0 >> 24) ] ^
Te1[(t1 >> 16) & 0xff] ^
Te2[(t2 >> 8) & 0xff] ^
Te3[(t3) & 0xff] ^
Te0((t0 >> 24) ) ^
Te1((t1 >> 16) & 0xff) ^
Te2((t2 >> 8) & 0xff) ^
Te3((t3) & 0xff) ^
rk[0];
s1 =
Te0[(t1 >> 24) ] ^
Te1[(t2 >> 16) & 0xff] ^
Te2[(t3 >> 8) & 0xff] ^
Te3[(t0) & 0xff] ^
Te0((t1 >> 24) ) ^
Te1((t2 >> 16) & 0xff) ^
Te2((t3 >> 8) & 0xff) ^
Te3((t0) & 0xff) ^
rk[1];
s2 =
Te0[(t2 >> 24) ] ^
Te1[(t3 >> 16) & 0xff] ^
Te2[(t0 >> 8) & 0xff] ^
Te3[(t1) & 0xff] ^
Te0((t2 >> 24) ) ^
Te1((t3 >> 16) & 0xff) ^
Te2((t0 >> 8) & 0xff) ^
Te3((t1) & 0xff) ^
rk[2];
s3 =
Te0[(t3 >> 24) ] ^
Te1[(t0 >> 16) & 0xff] ^
Te2[(t1 >> 8) & 0xff] ^
Te3[(t2) & 0xff] ^
Te0((t3 >> 24) ) ^
Te1((t0 >> 16) & 0xff) ^
Te2((t1 >> 8) & 0xff) ^
Te3((t2) & 0xff) ^
rk[3];
}
@ -1161,31 +1240,31 @@ int aes_encrypt(const cipher_context_t *context, const uint8_t *plainBlock,
* map cipher state to byte array block:
*/
s0 =
(Te4[(t0 >> 24) ] & 0xff000000) ^
(Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
(Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
(Te4[(t3) & 0xff] & 0x000000ff) ^
(Te4((t0 >> 24) ) & 0xff000000) ^
(Te4((t1 >> 16) & 0xff) & 0x00ff0000) ^
(Te4((t2 >> 8) & 0xff) & 0x0000ff00) ^
(Te4((t3) & 0xff) & 0x000000ff) ^
rk[0];
PUTU32(cipherBlock , s0);
s1 =
(Te4[(t1 >> 24) ] & 0xff000000) ^
(Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
(Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
(Te4[(t0) & 0xff] & 0x000000ff) ^
(Te4((t1 >> 24) ) & 0xff000000) ^
(Te4((t2 >> 16) & 0xff) & 0x00ff0000) ^
(Te4((t3 >> 8) & 0xff) & 0x0000ff00) ^
(Te4((t0) & 0xff) & 0x000000ff) ^
rk[1];
PUTU32(cipherBlock + 4, s1);
s2 =
(Te4[(t2 >> 24) ] & 0xff000000) ^
(Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
(Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
(Te4[(t1) & 0xff] & 0x000000ff) ^
(Te4((t2 >> 24) ) & 0xff000000) ^
(Te4((t3 >> 16) & 0xff) & 0x00ff0000) ^
(Te4((t0 >> 8) & 0xff) & 0x0000ff00) ^
(Te4((t1) & 0xff) & 0x000000ff) ^
rk[2];
PUTU32(cipherBlock + 8, s2);
s3 =
(Te4[(t3 >> 24) ] & 0xff000000) ^
(Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
(Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
(Te4[(t2) & 0xff] & 0x000000ff) ^
(Te4((t3 >> 24) ) & 0xff000000) ^
(Te4((t0 >> 16) & 0xff) & 0x00ff0000) ^
(Te4((t1 >> 8) & 0xff) & 0x0000ff00) ^
(Te4((t2) & 0xff) & 0x000000ff) ^
rk[3];
PUTU32(cipherBlock + 12, s3);
return 1;
@ -1227,126 +1306,126 @@ int aes_decrypt(const cipher_context_t *context, const uint8_t *cipherBlock,
s3 = GETU32(cipherBlock + 12) ^ rk[3];
#ifdef FULL_UNROLL
/* round 1: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
Td3[s1 & 0xff] ^ rk[ 4];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
Td3[s2 & 0xff] ^ rk[ 5];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
Td3[s3 & 0xff] ^ rk[ 6];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
Td3[s0 & 0xff] ^ rk[ 7];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff) ^
Td3(s1 & 0xff) ^ rk[ 4];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff) ^
Td3(s2 & 0xff) ^ rk[ 5];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff) ^
Td3(s3 & 0xff) ^ rk[ 6];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff) ^
Td3(s0 & 0xff) ^ rk[ 7];
/* round 2: */
s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
Td3[t1 & 0xff] ^ rk[ 8];
s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
Td3[t2 & 0xff] ^ rk[ 9];
s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
Td3[t3 & 0xff] ^ rk[10];
s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
Td3[t0 & 0xff] ^ rk[11];
s0 = Td0(t0 >> 24) ^ Td1((t3 >> 16) & 0xff) ^ Td2((t2 >> 8) & 0xff) ^
Td3(t1 & 0xff) ^ rk[ 8];
s1 = Td0(t1 >> 24) ^ Td1((t0 >> 16) & 0xff) ^ Td2((t3 >> 8) & 0xff) ^
Td3(t2 & 0xff) ^ rk[ 9];
s2 = Td0(t2 >> 24) ^ Td1((t1 >> 16) & 0xff) ^ Td2((t0 >> 8) & 0xff) ^
Td3(t3 & 0xff) ^ rk[10];
s3 = Td0(t3 >> 24) ^ Td1((t2 >> 16) & 0xff) ^ Td2((t1 >> 8) & 0xff) ^
Td3(t0 & 0xff) ^ rk[11];
/* round 3: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
Td3[s1 & 0xff] ^ rk[12];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
Td3[s2 & 0xff] ^ rk[13];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
Td3[s3 & 0xff] ^ rk[14];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
Td3[s0 & 0xff] ^ rk[15];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff) ^
Td3(s1 & 0xff) ^ rk[12];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff) ^
Td3(s2 & 0xff) ^ rk[13];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff) ^
Td3(s3 & 0xff) ^ rk[14];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff) ^
Td3(s0 & 0xff) ^ rk[15];
/* round 4: */
s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
Td3[t1 & 0xff] ^ rk[16];
s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
Td3[t2 & 0xff] ^ rk[17];
s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
Td3[t3 & 0xff] ^ rk[18];
s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
Td3[t0 & 0xff] ^ rk[19];
s0 = Td0(t0 >> 24) ^ Td1((t3 >> 16) & 0xff) ^ Td2((t2 >> 8) & 0xff) ^
Td3(t1 & 0xff) ^ rk[16];
s1 = Td0(t1 >> 24) ^ Td1((t0 >> 16) & 0xff) ^ Td2((t3 >> 8) & 0xff) ^
Td3(t2 & 0xff) ^ rk[17];
s2 = Td0(t2 >> 24) ^ Td1((t1 >> 16) & 0xff) ^ Td2((t0 >> 8) & 0xff) ^
Td3(t3 & 0xff) ^ rk[18];
s3 = Td0(t3 >> 24) ^ Td1((t2 >> 16) & 0xff) ^ Td2((t1 >> 8) & 0xff) ^
Td3(t0 & 0xff) ^ rk[19];
/* round 5: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
Td3[s1 & 0xff] ^ rk[20];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
Td3[s2 & 0xff] ^ rk[21];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
Td3[s3 & 0xff] ^ rk[22];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
Td3[s0 & 0xff] ^ rk[23];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff) ^
Td3(s1 & 0xff) ^ rk[20];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff) ^
Td3(s2 & 0xff) ^ rk[21];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff) ^
Td3(s3 & 0xff) ^ rk[22];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff) ^
Td3(s0 & 0xff) ^ rk[23];
/* round 6: */
s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
Td3[t1 & 0xff] ^ rk[24];
s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
Td3[t2 & 0xff] ^ rk[25];
s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
Td3[t3 & 0xff] ^ rk[26];
s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
Td3[t0 & 0xff] ^ rk[27];
s0 = Td0(t0 >> 24) ^ Td1((t3 >> 16) & 0xff) ^ Td2((t2 >> 8) & 0xff) ^
Td3(t1 & 0xff) ^ rk[24];
s1 = Td0(t1 >> 24) ^ Td1((t0 >> 16) & 0xff) ^ Td2((t3 >> 8) & 0xff) ^
Td3(t2 & 0xff) ^ rk[25];
s2 = Td0(t2 >> 24) ^ Td1((t1 >> 16) & 0xff) ^ Td2((t0 >> 8) & 0xff) ^
Td3(t3 & 0xff) ^ rk[26];
s3 = Td0(t3 >> 24) ^ Td1((t2 >> 16) & 0xff) ^ Td2((t1 >> 8) & 0xff) ^
Td3(t0 & 0xff) ^ rk[27];
/* round 7: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
Td3[s1 & 0xff] ^ rk[28];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
Td3[s2 & 0xff] ^ rk[29];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
Td3[s3 & 0xff] ^ rk[30];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
Td3[s0 & 0xff] ^ rk[31];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff) ^
Td3(s1 & 0xff) ^ rk[28];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff) ^
Td3(s2 & 0xff) ^ rk[29];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff) ^
Td3(s3 & 0xff) ^ rk[30];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff) ^
Td3(s0 & 0xff) ^ rk[31];
/* round 8: */
s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
Td3[t1 & 0xff] ^ rk[32];
s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
Td3[t2 & 0xff] ^ rk[33];
s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
Td3[t3 & 0xff] ^ rk[34];
s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
Td3[t0 & 0xff] ^ rk[35];
s0 = Td0(t0 >> 24) ^ Td1((t3 >> 16) & 0xff) ^ Td2((t2 >> 8) & 0xff) ^
Td3(t1 & 0xff) ^ rk[32];
s1 = Td0(t1 >> 24) ^ Td1((t0 >> 16) & 0xff) ^ Td2((t3 >> 8) & 0xff) ^
Td3(t2 & 0xff) ^ rk[33];
s2 = Td0(t2 >> 24) ^ Td1((t1 >> 16) & 0xff) ^ Td2((t0 >> 8) & 0xff) ^
Td3(t3 & 0xff) ^ rk[34];
s3 = Td0(t3 >> 24) ^ Td1((t2 >> 16) & 0xff) ^ Td2((t1 >> 8) & 0xff) ^
Td3(t0 & 0xff) ^ rk[35];
/* round 9: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
Td3[s1 & 0xff] ^ rk[36];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
Td3[s2 & 0xff] ^ rk[37];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
Td3[s3 & 0xff] ^ rk[38];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
Td3[s0 & 0xff] ^ rk[39];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff) ^
Td3(s1 & 0xff) ^ rk[36];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff) ^
Td3(s2 & 0xff) ^ rk[37];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff) ^
Td3(s3 & 0xff) ^ rk[38];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff) ^
Td3(s0 & 0xff) ^ rk[39];
if (key->rounds > 10) {
/* round 10: */
s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
Td3[t1 & 0xff] ^ rk[40];
s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
Td3[t2 & 0xff] ^ rk[41];
s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
Td3[t3 & 0xff] ^ rk[42];
s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
Td3[t0 & 0xff] ^ rk[43];
s0 = Td0(t0 >> 24) ^ Td1((t3 >> 16) & 0xff) ^ Td2((t2 >> 8) & 0xff) ^
Td3(t1 & 0xff) ^ rk[40];
s1 = Td0(t1 >> 24) ^ Td1((t0 >> 16) & 0xff) ^ Td2((t3 >> 8) & 0xff) ^
Td3(t2 & 0xff) ^ rk[41];
s2 = Td0(t2 >> 24) ^ Td1((t1 >> 16) & 0xff) ^ Td2((t0 >> 8) & 0xff) ^
Td3(t3 & 0xff) ^ rk[42];
s3 = Td0(t3 >> 24) ^ Td1((t2 >> 16) & 0xff) ^ Td2((t1 >> 8) & 0xff) ^
Td3(t0 & 0xff) ^ rk[43];
/* round 11: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
Td3[s1 & 0xff] ^ rk[44];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
Td3[s2 & 0xff] ^ rk[45];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
Td3[s3 & 0xff] ^ rk[46];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
Td3[s0 & 0xff] ^ rk[47];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff) ^
Td3(s1 & 0xff) ^ rk[44];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff) ^
Td3(s2 & 0xff) ^ rk[45];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff) ^
Td3(s3 & 0xff) ^ rk[46];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff) ^
Td3(s0 & 0xff) ^ rk[47];
if (key->rounds > 12) {
/* round 12: */
s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
^ Td3[t1 & 0xff] ^ rk[48];
s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
^ Td3[t2 & 0xff] ^ rk[49];
s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
^ Td3[t3 & 0xff] ^ rk[50];
s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
^ Td3[t0 & 0xff] ^ rk[51];
s0 = Td0(t0 >> 24) ^ Td1((t3 >> 16) & 0xff) ^ Td2((t2 >> 8) & 0xff)
^ Td3(t1 & 0xff) ^ rk[48];
s1 = Td0(t1 >> 24) ^ Td1((t0 >> 16) & 0xff) ^ Td2((t3 >> 8) & 0xff)
^ Td3(t2 & 0xff) ^ rk[49];
s2 = Td0(t2 >> 24) ^ Td1((t1 >> 16) & 0xff) ^ Td2((t0 >> 8) & 0xff)
^ Td3(t3 & 0xff) ^ rk[50];
s3 = Td0(t3 >> 24) ^ Td1((t2 >> 16) & 0xff) ^ Td2((t1 >> 8) & 0xff)
^ Td3(t0 & 0xff) ^ rk[51];
/* round 13: */
t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
^ Td3[s1 & 0xff] ^ rk[52];
t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
^ Td3[s2 & 0xff] ^ rk[53];
t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
^ Td3[s3 & 0xff] ^ rk[54];
t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
^ Td3[s0 & 0xff] ^ rk[55];
t0 = Td0(s0 >> 24) ^ Td1((s3 >> 16) & 0xff) ^ Td2((s2 >> 8) & 0xff)
^ Td3(s1 & 0xff) ^ rk[52];
t1 = Td0(s1 >> 24) ^ Td1((s0 >> 16) & 0xff) ^ Td2((s3 >> 8) & 0xff)
^ Td3(s2 & 0xff) ^ rk[53];
t2 = Td0(s2 >> 24) ^ Td1((s1 >> 16) & 0xff) ^ Td2((s0 >> 8) & 0xff)
^ Td3(s3 & 0xff) ^ rk[54];
t3 = Td0(s3 >> 24) ^ Td1((s2 >> 16) & 0xff) ^ Td2((s1 >> 8) & 0xff)
^ Td3(s0 & 0xff) ^ rk[55];
}
}
@ -1359,28 +1438,28 @@ int aes_decrypt(const cipher_context_t *context, const uint8_t *cipherBlock,
while (1) {
t0 =
Td0[(s0 >> 24) ] ^
Td1[(s3 >> 16) & 0xff] ^
Td2[(s2 >> 8) & 0xff] ^
Td3[(s1) & 0xff] ^
Td0((s0 >> 24) ) ^
Td1((s3 >> 16) & 0xff) ^
Td2((s2 >> 8) & 0xff) ^
Td3((s1) & 0xff) ^
rk[4];
t1 =
Td0[(s1 >> 24) ] ^
Td1[(s0 >> 16) & 0xff] ^
Td2[(s3 >> 8) & 0xff] ^
Td3[(s2) & 0xff] ^
Td0((s1 >> 24) ) ^
Td1((s0 >> 16) & 0xff) ^
Td2((s3 >> 8) & 0xff) ^
Td3((s2) & 0xff) ^
rk[5];
t2 =
Td0[(s2 >> 24) ] ^
Td1[(s1 >> 16) & 0xff] ^
Td2[(s0 >> 8) & 0xff] ^
Td3[(s3) & 0xff] ^
Td0((s2 >> 24) ) ^
Td1((s1 >> 16) & 0xff) ^
Td2((s0 >> 8) & 0xff) ^
Td3((s3) & 0xff) ^
rk[6];
t3 =
Td0[(s3 >> 24) ] ^
Td1[(s2 >> 16) & 0xff] ^
Td2[(s1 >> 8) & 0xff] ^
Td3[(s0) & 0xff] ^
Td0((s3 >> 24) ) ^
Td1((s2 >> 16) & 0xff) ^
Td2((s1 >> 8) & 0xff) ^
Td3((s0) & 0xff) ^
rk[7];
rk += 8;
@ -1390,28 +1469,28 @@ int aes_decrypt(const cipher_context_t *context, const uint8_t *cipherBlock,
}
s0 =
Td0[(t0 >> 24) ] ^
Td1[(t3 >> 16) & 0xff] ^
Td2[(t2 >> 8) & 0xff] ^
Td3[(t1) & 0xff] ^
Td0((t0 >> 24) ) ^
Td1((t3 >> 16) & 0xff) ^
Td2((t2 >> 8) & 0xff) ^
Td3((t1) & 0xff) ^
rk[0];
s1 =
Td0[(t1 >> 24) ] ^
Td1[(t0 >> 16) & 0xff] ^
Td2[(t3 >> 8) & 0xff] ^
Td3[(t2) & 0xff] ^
Td0((t1 >> 24) ) ^
Td1((t0 >> 16) & 0xff) ^
Td2((t3 >> 8) & 0xff) ^
Td3((t2) & 0xff) ^
rk[1];
s2 =
Td0[(t2 >> 24) ] ^
Td1[(t1 >> 16) & 0xff] ^
Td2[(t0 >> 8) & 0xff] ^
Td3[(t3) & 0xff] ^
Td0((t2 >> 24) ) ^
Td1((t1 >> 16) & 0xff) ^
Td2((t0 >> 8) & 0xff) ^
Td3((t3) & 0xff) ^
rk[2];
s3 =
Td0[(t3 >> 24) ] ^
Td1[(t2 >> 16) & 0xff] ^
Td2[(t1 >> 8) & 0xff] ^
Td3[(t0) & 0xff] ^
Td0((t3 >> 24) ) ^
Td1((t2 >> 16) & 0xff) ^
Td2((t1 >> 8) & 0xff) ^
Td3((t0) & 0xff) ^
rk[3];
}
@ -1421,31 +1500,31 @@ int aes_decrypt(const cipher_context_t *context, const uint8_t *cipherBlock,
* map cipher state to byte array block:
*/
s0 =
(Td4[(t0 >> 24) ] & 0xff000000) ^
(Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t1) & 0xff] & 0x000000ff) ^
(Td4((t0 >> 24) ) & 0xff000000) ^
(Td4((t3 >> 16) & 0xff) & 0x00ff0000) ^
(Td4((t2 >> 8) & 0xff) & 0x0000ff00) ^
(Td4((t1) & 0xff) & 0x000000ff) ^
rk[0];
PUTU32(plainBlock , s0);
s1 =
(Td4[(t1 >> 24) ] & 0xff000000) ^
(Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t2) & 0xff] & 0x000000ff) ^
(Td4((t1 >> 24) ) & 0xff000000) ^
(Td4((t0 >> 16) & 0xff) & 0x00ff0000) ^
(Td4((t3 >> 8) & 0xff) & 0x0000ff00) ^
(Td4((t2) & 0xff) & 0x000000ff) ^
rk[1];
PUTU32(plainBlock + 4, s1);
s2 =
(Td4[(t2 >> 24) ] & 0xff000000) ^
(Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t3) & 0xff] & 0x000000ff) ^
(Td4((t2 >> 24) ) & 0xff000000) ^
(Td4((t1 >> 16) & 0xff) & 0x00ff0000) ^
(Td4((t0 >> 8) & 0xff) & 0x0000ff00) ^
(Td4((t3) & 0xff) & 0x000000ff) ^
rk[2];
PUTU32(plainBlock + 8, s2);
s3 =
(Td4[(t3 >> 24) ] & 0xff000000) ^
(Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t0) & 0xff] & 0x000000ff) ^
(Td4((t3 >> 24) ) & 0xff000000) ^
(Td4((t2 >> 16) & 0xff) & 0x00ff0000) ^
(Td4((t1 >> 8) & 0xff) & 0x0000ff00) ^
(Td4((t0) & 0xff) & 0x000000ff) ^
rk[3];
PUTU32(plainBlock + 12, s3);
return 1;

View File

@ -37,6 +37,8 @@ typedef uint32_t u32;
typedef uint16_t u16;
typedef uint8_t u8;
/* This controls AES table calculation on the fly */
#define AES_CALCULATE_TABLES
/* This controls loop-unrolling in aes_core.c */
#undef FULL_UNROLL