#include "aes.hpp"
+#include <endian.h>
/* static function prototypes */
static byteArray word2bytes (word input);
static void printState (byteArray &bytes, std::string name);
AES::AES (const byteArray& key)
- : Nb(4) // This is constant in AES
- , Nk(key.size() / 4) // This can be either 4, 6, or 8 (128, 192, or 256 bit)
+ : Nk(key.size() / 4) // This can be either 4, 6, or 8 (128, 192, or 256 bit)
, Nr(Nk + Nb + 2)
, keySchedule(Nb * (Nr+1), 0x00000000)
{
throw incorrectTextSizeException ();
int round;
- byteArray state (plaintext);
+ byteArray state;
+
+ /* Copy the plaintext into the state matrix. It is copied in
+ * column-wise, because the AES Spec. does it this way.
+ *
+ * It also allows us to optimize ShiftRows later */
+ for (int c=0; c<Nb; ++c)
+ for (int r=0; r<Nb; ++r)
+ state.push_back (plaintext.at (r*Nb+c));
/* Round 0 */
//std::printf ("Round 0\n");
//printState (state, "srows");
AddRoundKey (state, GetRoundKey (round));
- return state;
+ /* This reverses the column-wise we did above, so
+ * the the ciphertext comes out in the correct order. */
+ byteArray ciphertext;
+
+ for (int c=0; c<Nb; ++c)
+ for (int r=0; r<Nb; ++r)
+ ciphertext.push_back (state.at (r*Nb+c));
+
+ return ciphertext;
}
byteArray AES::decrypt (const byteArray& ciphertext) const
throw incorrectTextSizeException ();
int round = Nr;
- byteArray state (ciphertext);
+ byteArray state;
+
+ /* Copy the ciphertext into the state matrix. It is copied in
+ * column-wise, because the AES Spec. does it this way.
+ *
+ * It also allows us to optimize InvShiftRows later */
+ for (int c=0; c<Nb; ++c)
+ for (int r=0; r<Nb; ++r)
+ state.push_back (ciphertext.at (r*Nb+c));
/* Round Nr-1 */
AddRoundKey (state, GetRoundKey (round));
InvSubBytes (state);
AddRoundKey (state, GetRoundKey (round));
- return state;
+
+ /* This reverses the column-wise copy we did above to
+ * output the plaintext in the correct order. */
+ byteArray plaintext;
+
+ for (int c=0; c<Nb; ++c)
+ for (int r=0; r<Nb; ++r)
+ plaintext.push_back (state.at (r*Nb+c));
+
+ return plaintext;
}
void AES::KeyExpansion (const byteArray& key, wordArray& w) const
if (state.size() != Nb * 4)
throw badStateArrayException ();
+ /* This is a more-optimized way of doing ShiftRows than using
+ * bytes2word() and word2bytes() to pack and unpack the state matrix
+ * into words in order to use the shift-or method of doing the
+ * circular shift. It works because the memory used by a std::vector
+ * is guaranteed to be contiguous.
+ *
+ * Since bytes are stored in the byteArray vector, and they are in
+ * the proper order, we can access it like a word, and then shift that,
+ * instead of packing and then unpacking later.
+ *
+ * This should improve performance a little bit, because we are doing
+ * less assignments now. We do have to do more work in encrypt() and
+ * decrypt(), but that is 16 assignments, vs. 32 assignments per call
+ * to ShiftRows(). */
+
int r;
- word w;
- byteArray temp;
+ word *w_ptr = (word*)&state[0];
for (r=0; r<Nb; ++r)
{
- /* Pack the bytes into an word */
- w = bytes2word (state[r], state[r+4], state[r+8], state[r+12]);
-
- /* Circular Left Shift the word */
- w = (w << r*8) | (w >> ((4-r)*8));
-
- /* Unpack the bytes from the word back into the state matrix */
- temp = word2bytes (w);
- state[r] = temp.at (0);
- state[r+4] = temp.at (1);
- state[r+8] = temp.at (2);
- state[r+12] = temp.at (3);
+#if __BYTE_ORDER == LITTLE_ENDIAN
+ *w_ptr = (*w_ptr >> r*8) | (*w_ptr << ((4-r)*8));
+#else // BIG_ENDIAN
+ *w_ptr = (*w_ptr << r*8) | (*w_ptr >> ((4-r)*8));
+#endif
+ w_ptr++;
}
}
if (state.size() != Nb * 4)
throw badStateArrayException ();
+ /* This is a more-optimized way of doing ShiftRows than using
+ * bytes2word() and word2bytes() to pack and unpack the state matrix
+ * into words in order to use the shift-or method of doing the
+ * circular shift. It works because the memory used by a std::vector
+ * is guaranteed to be contiguous.
+ *
+ * Since bytes are stored in the byteArray vector, and they are in
+ * the proper order, we can access it like a word, and then shift that,
+ * instead of packing and then unpacking later.
+ *
+ * This should improve performance a little bit, because we are doing
+ * less assignments now. We do have to do more work in encrypt() and
+ * decrypt(), but that is 16 assignments, vs. 32 assignments per call
+ * to ShiftRows(). */
+
int r;
- word w;
- byteArray temp;
+ word *w_ptr = (word*)&state[0];
for (r=0; r<Nb; ++r)
{
- /* Pack the bytes into an word */
- w = bytes2word (state[r], state[r+4], state[r+8], state[r+12]);
-
- /* Circular Right Shift the word */
- w = (w << ((4-r)*8)) | (w >> (r*8));
-
- /* Unpack the bytes from the word back into the state matrix */
- temp = word2bytes (w);
- state[r] = temp.at (0);
- state[r+4] = temp.at (1);
- state[r+8] = temp.at (2);
- state[r+12] = temp.at (3);
+#if __BYTE_ORDER == LITTLE_ENDIAN
+ *w_ptr = (*w_ptr << r*8) | (*w_ptr >> ((4-r)*8));
+#else // BIG_ENDIAN
+ *w_ptr = (*w_ptr >> (4-r)*8) | (*w_ptr << r*8);
+#endif
+ w_ptr++;
}
}
{
/* Get this column */
for (c=0; c<Nb; ++c)
- temp[c] = state[(r*4)+c];
+ temp[c] = state[(c*4)+r];
/* Do the Multiply */
for (i=0; i<4; ++i)
/* Copy back into state matrix */
for (c=0; c<Nb; ++c)
- state[(r*4)+c] = result[c];
+ state[(c*4)+r] = result[c];
}
}
{
/* Get this column */
for (c=0; c<Nb; ++c)
- temp[c] = state[(r*4)+c];
+ temp[c] = state[(c*4)+r];
/* Do the Multiply */
for (i=0; i<4; ++i)
/* Copy back into state matrix */
for (c=0; c<Nb; ++c)
- state[(r*4)+c] = result[c];
+ state[(c*4)+r] = result[c];
}
}
byteArray wBytes = word2bytes (w[i]);
for (j=0; j<Nb; ++j)
- {
- //std::printf ("state.at(%d) ^= wBytes.at(%d) -- %.2x ^ %.2x = %.2x\n", i*Nb+j, j, state.at (i*Nb+j), wBytes.at(j), state.at(i*Nb+j) ^ wBytes.at(j));
- state.at(i*Nb+j) ^= wBytes.at(j);
- }
+ state.at(j*Nb+i) ^= wBytes.at(j);
}
}
static void printState (byteArray &bytes, std::string name)
{
- int i;
+ int r, c;
std::cout << name << ": ";
- for (i=0; i<16; ++i)
- std::printf ("%.2x", bytes.at(i));
+ for (r=0; r<4; ++r)
+ for (c=0; c<4; ++c)
+ std::printf ("%.2x", bytes.at(c*4+r));
std::printf ("\n");
}