Update AES SCA countermeasures

-Add dummy rounds to the start and/or end of the AES calculation
 rounds.
diff --git a/library/aes.c b/library/aes.c
index c96f29e..ece7a82 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -95,7 +95,7 @@
 
 #if defined(MBEDTLS_AES_SCA_COUNTERMEASURES)
 /* Number of additional AES calculation rounds added for SCA CM */
-#define AES_SCA_CM_ROUNDS  3
+#define AES_SCA_CM_ROUNDS  4
 #else /* MBEDTLS_AES_SCA_COUNTERMEASURES */
 #define AES_SCA_CM_ROUNDS  0
 #endif /* MBEDTLS_AES_SCA_COUNTERMEASURES */
@@ -513,97 +513,99 @@
 #endif /* MBEDTLS_AES_ROM_TABLES */
 
 /**
- * Randomize positions when to use AES SCA countermeasures.
- * Each byte indicates one AES round as follows:
- * first ( tbl_len - 4 ) bytes are reserved for middle AES rounds:
- *  -4 high bit = table to use 0x10 for SCA CM data, 0 otherwise
- *  -4 low bits = offset based on order, 4 for even position, 0 otherwise
- * Last 4 bytes for first(2) and final(2) round calculation
- *  -4 high bit = table to use, 0x10 for SCA CM data, otherwise real data
- *  -4 low bits = not used
+ * Randomize positions for AES SCA countermeasures if AES countermeasures are
+ * enabled. If countermeasures are not enabled then fill given table with real
+ * data values.
+ *
+ * Dummy rounds are added as follows:
+ * 1. One dummy round added to the initial round key addition (executed in
+ *    random order).
+ * 2. Random number of dummy rounds added as first and/or last AES calculation
+ *    round. Total number of dummy rounds is AES_SCA_CM_ROUNDS.
+ *
+ * Description of the bytes in the table are as follows:
+ * - 2 bytes for initial round key addition
+ * - remaining bytes for AES calculation with real or dummy data
+ *
+ * Each byte indicates one AES calculation round:
+ *  -4 high bit = table to use 0x10 for dummy data, 0x00 real data
+ *  -bit 2 = offset for even/odd rounds
+ *  -bit 0-1: stop mark (0x03) to indicate calculation end
  *
  *  Return  Number of additional AES rounds
  *
  * Example of the control bytes:
- *  Control data when only real data (R) is used:
- *  | R  | R  | R  | R  | R  | R  | R  | R  | Start   | Final   |
- *  |0x04|0x00|0x00|0x04|0x00|0x04|0x00|0x04|0x00|0x00|0x00|0x00|
+ *  1. No countermeasures enabled and AES-128, only real data (R) used:
+ *  | Ri  | R  | R  | R  | R  | R  | R  | R  | R  | R  | R  |
+ *  |0x03|0x04|0x00|0x04|0x00|0x04|0x00|0x04|0x00|0x07|0x03|
  *
- *  Control data with 5 (F) dummy rounds and randomized start and final round:
- *  | R  | F  | R  | F  | F  | R  | R  | R  | R  | R  | R  | START RF| FINAL FR|
- *  |0x04|0x10|0x04|0x10|0x10|0x00|0x04|0x00|0x04|0x00|0x04|0x00|0x10|0x10|0x00|
+ *  2. Countermeasures enabled, 3 (F) dummy rounds in start and 1 at end:
+ *  | Fi | Ri  | F  | F  | F  | R  | R  | ... | R  | R  | R  | R  | F  |
+ *  |0x10|0x03 |0x10|0x10|0x10|0x04|0x00| ... |0x04|0x00|0x04|0x03|0x07|
  */
 static int aes_sca_cm_data_randomize( uint8_t *tbl, uint8_t tbl_len )
 {
-    int i, is_even_pos;
+    int i = 0, j = 0, is_even_pos;
 #if AES_SCA_CM_ROUNDS != 0
-    int is_unique_number;
     int num;
 #endif
 
     mbedtls_platform_memset( tbl, 0, tbl_len );
 
 #if AES_SCA_CM_ROUNDS != 0
-    // Randomize SCA CM positions to tbl
-    for( i = 0; i < AES_SCA_CM_ROUNDS; i++ )
+    num = mbedtls_platform_random_in_range( 0x1f );
+
+    // Randomize execution order of initial round key addition
+    if ( ( num & 0x10 ) == 0 )
     {
-        is_unique_number = 0;
-        do
-        {
-            is_unique_number++;
-            num = mbedtls_platform_random_in_range( tbl_len - 4 );
-
-            if( is_unique_number > 10 )
-            {
-                // prevent forever loop if random returns constant
-                is_unique_number = 0;
-                tbl[i] = 0x10;    // fake data
-            }
-
-            if( tbl[num] == 0 )
-            {
-                is_unique_number = 0;
-                tbl[num] = 0x10;    // fake data
-            }
-        } while( is_unique_number != 0 );
+        tbl[i++] = 0x10;        // dummy data
+        tbl[i++] = 0x00 | 0x03; // real data + stop marker
+    } else {
+        tbl[i++] = 0x00;        // real data
+        tbl[i++] = 0x10 | 0x03; // dummy data + stop marker
     }
 
-    // randomize control data for start and final round
-    for( i = 1; i <= 2; i++ )
+    // Randomize AES rounds
+    num = num % ( AES_SCA_CM_ROUNDS + 1 );
+
+    // add dummy rounds to the start (if needed)
+    for ( ; i < num + 2; i++ )
     {
-        num = mbedtls_platform_random_in_range( 0xff );
-        if( ( num % 2 ) == 0 )
-        {
-            tbl[tbl_len - ( i * 2 - 0 )] = 0x10;    // fake data
-            tbl[tbl_len - ( i * 2 - 1 )] = 0x00;    // real data
-        }
-        else
-        {
-            tbl[tbl_len - ( i * 2 - 0 )] = 0x00;    // real data
-            tbl[tbl_len - ( i * 2 - 1 )] = 0x10;    // fake data
-        }
+        tbl[i] = 0x10;  // dummy data
     }
+
+    // add dummy rounds to the last, (AES_SCA_CM_ROUNDS - num) rounds if needed
+    for ( j = tbl_len - AES_SCA_CM_ROUNDS + num; j < tbl_len; j++ )
+    {
+        tbl[j] = 0x10;  // dummy data
+    }
+#else /* AES_SCA_CM_ROUNDS != 0 */
+    tbl[i++] = 0x03; // real data + stop marker for the round key addition
 #endif /* AES_SCA_CM_ROUNDS != 0 */
 
-    // Fill real AES round data to the remaining places
+    // Fill real AES data to the remaining places
     is_even_pos = 1;
-    for( i = 0; i < tbl_len - 4; i++ )
+    for( ; i < tbl_len; i++ )
     {
         if( tbl[i] == 0 )
         {
             if( is_even_pos == 1 )
             {
-                tbl[i] = 0x04;  // real data, offset 4
+                tbl[i] = 0x04;  // real data, offset for rounds 1,3,5, etc...
                 is_even_pos = 0;
             }
             else
             {
-                tbl[i] = 0x00;  // real data, offset 0
+                tbl[i] = 0x00;  // real data, offset for rounds 2,4,6,...
                 is_even_pos = 1;
             }
+            j = i;  // remember the final round position in table
         }
     }
 
+    tbl[( tbl_len - 1)] |= 0x03;    // Stop marker for the last item in tbl
+    tbl[( j - 1 )] |= 0x03;         // stop marker for final - 1 real data
+
     return( AES_SCA_CM_ROUNDS );
 }
 
@@ -1051,30 +1053,31 @@
                                   const unsigned char input[16],
                                   unsigned char output[16] )
 {
-    int i, j, offset, start_fin_loops = 1;
+    int i, tindex, offset, stop_mark;
     aes_r_data_t aes_data_real;         // real data
 #if AES_SCA_CM_ROUNDS != 0
     aes_r_data_t aes_data_fake;         // fake data
 #endif /* AES_SCA_CM_ROUNDS != 0 */
-    aes_r_data_t *aes_data_ptr;         // pointer to aes_data_real or aes_data_fake
+    aes_r_data_t *aes_data_ptr;         // pointer to real or fake data
     aes_r_data_t *aes_data_table[2];    // pointers to real and fake data
-    int round_ctrl_table_len = ctx->nr - 1 + AES_SCA_CM_ROUNDS + 2 + 2;
+    int round_ctrl_table_len = ctx->nr + 1;
     volatile int flow_control;
-    // control bytes for AES rounds, reserve based on max ctx->nr
-    uint8_t round_ctrl_table[ 14 - 1 + AES_SCA_CM_ROUNDS + 2 + 2];
+    // control bytes for AES calculation rounds,
+    // reserve based on max rounds + dummy rounds + 2 (for initial key addition)
+    uint8_t round_ctrl_table[( 14 + AES_SCA_CM_ROUNDS + 2 )];
 
     aes_data_real.rk_ptr = ctx->rk;
     aes_data_table[0] = &aes_data_real;
 
 #if AES_SCA_CM_ROUNDS != 0
+    round_ctrl_table_len += ( AES_SCA_CM_ROUNDS + 1 );
     aes_data_table[1] = &aes_data_fake;
     aes_data_fake.rk_ptr = ctx->rk;
-    start_fin_loops = 2;
     for( i = 0; i < 4; i++ )
         aes_data_fake.xy_values[i] = mbedtls_platform_random_in_range( 0xffffffff );
 #endif
 
-    // Get randomized AES calculation control bytes
+    // Get AES calculation control bytes
     flow_control = aes_sca_cm_data_randomize( round_ctrl_table,
         round_ctrl_table_len );
 
@@ -1091,22 +1094,29 @@
         GET_UINT32_LE( aes_data_real.xy_values[i], input,  ( i * 4 ) );
     }
 
-    for( i = 0; i < 4; i++ )
+    tindex = 0;
+    do
     {
-        for( j = 0; j < start_fin_loops; j++ )
-        {
-            aes_data_ptr =
-                aes_data_table[round_ctrl_table[ round_ctrl_table_len - 2 + j ] >> 4];
-            aes_data_ptr->xy_values[i] ^= *aes_data_ptr->rk_ptr++;
-            flow_control++;
-        }
-    }
+        // Get pointer to the real or fake data
+        aes_data_ptr = aes_data_table[round_ctrl_table[tindex] >> 4];
+        stop_mark = round_ctrl_table[tindex] & 0x03;
 
-    for( i = 0; i < ( ctx->nr - 1 + AES_SCA_CM_ROUNDS ); i++ )
+        // initial round key addition
+        for( i = 0; i < 4; i++ )
+        {
+            aes_data_ptr->xy_values[i] ^= *aes_data_ptr->rk_ptr++;
+        }
+        tindex++;
+        flow_control++;
+    } while( stop_mark == 0 );
+
+    // Calculate AES rounds (9, 11 or 13 rounds) + dummy rounds
+    do
     {
-        // Read AES control data
-        aes_data_ptr = aes_data_table[round_ctrl_table[i] >> 4];
-        offset = round_ctrl_table[i] & 0x0f;
+        // Get pointer to the real or fake data
+        aes_data_ptr = aes_data_table[round_ctrl_table[tindex] >> 4];
+        offset = round_ctrl_table[tindex] & 0x04;
+        stop_mark = round_ctrl_table[tindex] & 0x03;
 
         aes_data_ptr->rk_ptr = aes_fround( aes_data_ptr->rk_ptr,
             &aes_data_ptr->xy_values[0 + offset],
@@ -1117,12 +1127,15 @@
             aes_data_ptr->xy_values[5 - offset],
             aes_data_ptr->xy_values[6 - offset],
             aes_data_ptr->xy_values[7 - offset] );
+        tindex++;
         flow_control++;
-    }
+    } while( stop_mark == 0 );
 
-    for( j = 0; j < start_fin_loops; j++ )
+    // Calculate final AES round + dummy rounds
+    do
     {
-        aes_data_ptr = aes_data_table[round_ctrl_table[ i + j ] >> 4];
+        aes_data_ptr = aes_data_table[round_ctrl_table[tindex] >> 4];
+        stop_mark = round_ctrl_table[tindex] & 0x03;
         aes_fround_final( aes_data_ptr->rk_ptr,
             &aes_data_ptr->xy_values[0],
             &aes_data_ptr->xy_values[1],
@@ -1133,7 +1146,8 @@
             aes_data_ptr->xy_values[6],
             aes_data_ptr->xy_values[7] );
         flow_control++;
-    }
+        tindex++;
+    } while( stop_mark == 0 );
 
     mbedtls_platform_memset( output, 0, 16 );
     offset = mbedtls_platform_random_in_range( 4 );
@@ -1150,8 +1164,7 @@
         flow_control++;
     }
 
-    if( flow_control == ( AES_SCA_CM_ROUNDS + ( 4 * start_fin_loops ) +
-        ctx->nr - 1 + AES_SCA_CM_ROUNDS + start_fin_loops + 4 )  )
+    if( flow_control == round_ctrl_table_len + AES_SCA_CM_ROUNDS + 4 )
     {
         /* Validate control path due possible fault injection */
         return 0;
@@ -1232,50 +1245,70 @@
                                   const unsigned char input[16],
                                   unsigned char output[16] )
 {
-    int i, j, offset, start_fin_loops = 1;
+    int i, tindex, offset, stop_mark;
     aes_r_data_t aes_data_real;         // real data
 #if AES_SCA_CM_ROUNDS != 0
     aes_r_data_t aes_data_fake;         // fake data
 #endif /* AES_SCA_CM_ROUNDS != 0 */
-    aes_r_data_t *aes_data_ptr;         // pointer to aes_data_real or aes_data_fake
+    aes_r_data_t *aes_data_ptr;         // pointer to real or fake data
     aes_r_data_t *aes_data_table[2];    // pointers to real and fake data
-    int round_ctrl_table_len = ctx->nr - 1 + AES_SCA_CM_ROUNDS + 2 + 2;
-    // control bytes for AES rounds, reserve based on max ctx->nr
+    int round_ctrl_table_len = ctx->nr + 1;
     volatile int flow_control;
-    uint8_t round_ctrl_table[ 14 - 1 + AES_SCA_CM_ROUNDS + 2 + 2 ];
+    // control bytes for AES calculation rounds,
+    // reserve based on max rounds + dummy rounds + 2 (for initial key addition)
+    uint8_t round_ctrl_table[( 14 + AES_SCA_CM_ROUNDS + 2 )];
 
     aes_data_real.rk_ptr = ctx->rk;
     aes_data_table[0] = &aes_data_real;
 
 #if AES_SCA_CM_ROUNDS != 0
+    round_ctrl_table_len += ( AES_SCA_CM_ROUNDS + 1 );
     aes_data_table[1] = &aes_data_fake;
     aes_data_fake.rk_ptr = ctx->rk;
-    start_fin_loops = 2;
     for( i = 0; i < 4; i++ )
         aes_data_fake.xy_values[i] = mbedtls_platform_random_in_range( 0xffffffff );
 #endif
 
-    // Get randomized AES calculation control bytes
+    // Get AES calculation control bytes
     flow_control = aes_sca_cm_data_randomize( round_ctrl_table,
         round_ctrl_table_len );
 
-    for( i = 0; i < 4; i++ )
+    mbedtls_platform_memset( aes_data_real.xy_values, 0, 16 );
+    offset = mbedtls_platform_random_in_range( 4 );
+
+    for( i = offset; i < 4; i++ )
     {
         GET_UINT32_LE( aes_data_real.xy_values[i], input,  ( i * 4 ) );
-        for( j = 0; j < start_fin_loops; j++ )
-        {
-            aes_data_ptr =
-                aes_data_table[round_ctrl_table[ round_ctrl_table_len - 4 + j ] >> 4];
-            aes_data_ptr->xy_values[i] ^= *aes_data_ptr->rk_ptr++;
-            flow_control++;
-        }
     }
 
-    for( i = 0; i < ( ctx->nr - 1 + AES_SCA_CM_ROUNDS ); i++ )
+    for( i = 0; i < offset; i++ )
     {
-        // Read AES control data
-        aes_data_ptr = aes_data_table[round_ctrl_table[i] >> 4];
-        offset = round_ctrl_table[i] & 0x0f;
+        GET_UINT32_LE( aes_data_real.xy_values[i], input,  ( i * 4 ) );
+    }
+
+    tindex = 0;
+    do
+    {
+        // Get pointer to the real or fake data
+        aes_data_ptr = aes_data_table[round_ctrl_table[tindex] >> 4];
+        stop_mark = round_ctrl_table[tindex] & 0x03;
+
+        // initial round key addition
+        for( i = 0; i < 4; i++ )
+        {
+            aes_data_ptr->xy_values[i] ^= *aes_data_ptr->rk_ptr++;
+        }
+        tindex++;
+        flow_control++;
+    } while( stop_mark == 0 );
+
+    // Calculate AES rounds (9, 11 or 13 rounds) + dummy rounds
+    do
+    {
+        // Get pointer to the real or fake data
+        aes_data_ptr = aes_data_table[round_ctrl_table[tindex] >> 4];
+        offset = round_ctrl_table[tindex] & 0x04;
+        stop_mark = round_ctrl_table[tindex] & 0x03;
 
         aes_data_ptr->rk_ptr = aes_rround( aes_data_ptr->rk_ptr,
             &aes_data_ptr->xy_values[0 + offset],
@@ -1286,12 +1319,15 @@
             aes_data_ptr->xy_values[5 - offset],
             aes_data_ptr->xy_values[6 - offset],
             aes_data_ptr->xy_values[7 - offset] );
+        tindex++;
         flow_control++;
-    }
+    } while( stop_mark == 0 );
 
-    for( j = 0; j < start_fin_loops; j++ )
+    // Calculate final AES round + dummy rounds
+    do
     {
-        aes_data_ptr = aes_data_table[round_ctrl_table[ i + j ] >> 4];
+        aes_data_ptr = aes_data_table[round_ctrl_table[tindex] >> 4];
+        stop_mark = round_ctrl_table[tindex] & 0x03;
         aes_rround_final( aes_data_ptr->rk_ptr,
             &aes_data_ptr->xy_values[0],
             &aes_data_ptr->xy_values[1],
@@ -1302,16 +1338,25 @@
             aes_data_ptr->xy_values[6],
             aes_data_ptr->xy_values[7] );
         flow_control++;
-    }
+        tindex++;
+    } while( stop_mark == 0 );
 
-    for( i = 0; i < 4; i++ )
+    mbedtls_platform_memset( output, 0, 16 );
+    offset = mbedtls_platform_random_in_range( 4 );
+
+    for( i = offset; i < 4; i++ )
     {
         PUT_UINT32_LE( aes_data_real.xy_values[i], output,  ( i * 4 ) );
         flow_control++;
     }
 
-    if( flow_control == ( AES_SCA_CM_ROUNDS + ( 4 * start_fin_loops ) +
-        ctx->nr - 1 + AES_SCA_CM_ROUNDS + start_fin_loops + 4 )  )
+    for( i = 0; i < offset; i++ )
+    {
+        PUT_UINT32_LE( aes_data_real.xy_values[i], output,  ( i * 4 ) );
+        flow_control++;
+    }
+
+    if( flow_control == round_ctrl_table_len + AES_SCA_CM_ROUNDS + 4 )
     {
         /* Validate control path due possible fault injection */
         return 0;