Unroll aesce_encrypt_block

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/aesce.c b/library/aesce.c
index 4db8d2a..abd47b1 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -105,22 +105,64 @@
                                       unsigned char *keys,
                                       int rounds)
 {
-    for (int i = 0; i < rounds - 1; i++) {
-        /* AES AddRoundKey, SubBytes, ShiftRows (in this order).
-         * AddRoundKey adds the round key for the previous round. */
-        block = vaeseq_u8(block, vld1q_u8(keys + i * 16));
-        /* AES mix columns */
-        block = vaesmcq_u8(block);
+    /* Assume either 10, 12 or 14 rounds */
+    if (rounds == 10) {
+        goto rounds_10;
     }
+    if (rounds == 12) {
+        goto rounds_12;
+    }
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+rounds_12:
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+rounds_10:
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    block = vaesmcq_u8(block);
+    keys += 16;
 
     /* AES AddRoundKey for the previous round.
      * SubBytes, ShiftRows for the final round.  */
-    block = vaeseq_u8(block, vld1q_u8(keys + (rounds -1) * 16));
+    block = vaeseq_u8(block, vld1q_u8(keys));
+    keys += 16;
 
     /* Final round: no MixColumns */
 
     /* Final AddRoundKey */
-    block = veorq_u8(block, vld1q_u8(keys + rounds  * 16));
+    block = veorq_u8(block, vld1q_u8(keys));
 
     return block;
 }
diff --git a/library/aesce.h b/library/aesce.h
index 7048d77..61e73bf 100644
--- a/library/aesce.h
+++ b/library/aesce.h
@@ -52,6 +52,9 @@
 /**
  * \brief          Internal AES-ECB block encryption and decryption
  *
+ * Note: this assumes that the context specifies either 10, 12 or 14 rounds
+ * and will behave incorrectly if this is not the case.
+ *
  * \param ctx      AES context
  * \param mode     MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT
  * \param input    16-byte input block