Improve readability of unrolled AESCE code

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/aesce.c b/library/aesce.c
index 6b493a2..600326a 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -101,59 +101,36 @@
 #endif
 }
 
+/* Single round of AESCE encryption */
+#define AESCE_ENCRYPT_ROUND                   \
+    block = vaeseq_u8(block, vld1q_u8(keys)); \
+    block = vaesmcq_u8(block);                \
+    keys += 16
+/* Two rounds of AESCE encryption */
+#define AESCE_ENCRYPT_ROUND_X2        AESCE_ENCRYPT_ROUND; AESCE_ENCRYPT_ROUND
+
 MBEDTLS_OPTIMIZE_ALWAYS
 static uint8x16_t aesce_encrypt_block(uint8x16_t block,
                                       unsigned char *keys,
                                       int rounds)
 {
-    /* Assume either 10, 12 or 14 rounds */
+    /* Assume either 10, 12 or 14 rounds.
+     * Skip 4 or 2 rounds, if doing 10 or 12 rounds */
     if (rounds == 10) {
         goto rounds_10;
     }
     if (rounds == 12) {
         goto rounds_12;
     }
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
+    AESCE_ENCRYPT_ROUND_X2;
 rounds_12:
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
+    AESCE_ENCRYPT_ROUND_X2;
 rounds_10:
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
-    block = vaeseq_u8(block, vld1q_u8(keys));
-    block = vaesmcq_u8(block);
-    keys += 16;
+    AESCE_ENCRYPT_ROUND_X2;
+    AESCE_ENCRYPT_ROUND_X2;
+    AESCE_ENCRYPT_ROUND_X2;
+    AESCE_ENCRYPT_ROUND_X2;
+    AESCE_ENCRYPT_ROUND;
 
     /* AES AddRoundKey for the previous round.
      * SubBytes, ShiftRows for the final round.  */
@@ -168,74 +145,56 @@
     return block;
 }
 
+/* Single round of AESCE decryption
+ *
+ * AES AddRoundKey, SubBytes, ShiftRows
+ *
+ *      block = vaesdq_u8(block, vld1q_u8(keys));
+ *
+ * AES inverse MixColumns for the next round.
+ *
+ * This means that we switch the order of the inverse AddRoundKey and
+ * inverse MixColumns operations. We have to do this as AddRoundKey is
+ * done in an atomic instruction together with the inverses of SubBytes
+ * and ShiftRows.
+ *
+ * It works because MixColumns is a linear operation over GF(2^8) and
+ * AddRoundKey is an exclusive or, which is equivalent to addition over
+ * GF(2^8). (The inverse of MixColumns needs to be applied to the
+ * affected round keys separately which has been done when the
+ * decryption round keys were calculated.)
+ *
+ *      block = vaesimcq_u8(block);
+ */
+#define AESCE_DECRYPT_ROUND                   \
+    block = vaesdq_u8(block, vld1q_u8(keys)); \
+    block = vaesimcq_u8(block);               \
+    keys += 16
+/* Two rounds of AESCE decryption */
+#define AESCE_DECRYPT_ROUND_X2        AESCE_DECRYPT_ROUND; AESCE_DECRYPT_ROUND
+
 MBEDTLS_OPTIMIZE_ALWAYS
 static uint8x16_t aesce_decrypt_block(uint8x16_t block,
                                       unsigned char *keys,
                                       int rounds)
 {
-    /* Assume either 10, 12 or 14 rounds */
+    /* Assume either 10, 12 or 14 rounds.
+     * Skip 4 or 2 rounds, if doing 10 or 12 rounds */
     if (rounds == 10) {
         goto rounds_10;
     }
     if (rounds == 12) {
         goto rounds_12;
     }
-
-    /* AES AddRoundKey, SubBytes, ShiftRows */
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    /* AES inverse MixColumns for the next round.
-     *
-     * This means that we switch the order of the inverse AddRoundKey and
-     * inverse MixColumns operations. We have to do this as AddRoundKey is
-     * done in an atomic instruction together with the inverses of SubBytes
-     * and ShiftRows.
-     *
-     * It works because MixColumns is a linear operation over GF(2^8) and
-     * AddRoundKey is an exclusive or, which is equivalent to addition over
-     * GF(2^8). (The inverse of MixColumns needs to be applied to the
-     * affected round keys separately which has been done when the
-     * decryption round keys were calculated.) */
-    block = vaesimcq_u8(block);
-    keys += 16;
-
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
+    AESCE_DECRYPT_ROUND_X2;
 rounds_12:
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
+    AESCE_DECRYPT_ROUND_X2;
 rounds_10:
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
-    block = vaesdq_u8(block, vld1q_u8(keys));
-    block = vaesimcq_u8(block);
-    keys += 16;
+    AESCE_DECRYPT_ROUND_X2;
+    AESCE_DECRYPT_ROUND_X2;
+    AESCE_DECRYPT_ROUND_X2;
+    AESCE_DECRYPT_ROUND_X2;
+    AESCE_DECRYPT_ROUND;
 
     /* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the
      * last full round. */