mpi_exp_mod: move X next to the precomputed values

With small exponents (for example, when doing RSA-1024 with CRT, each
prime is 512 bits and we'll use wsize = 5 which may be smaller that the
maximum - or even worse when doing public RSA operations which typically
have a 16-bit exponent so we'll use wsize = 1) the usage of W will have
pre-computed values, then empty space, then the accumulator at the very
end.

Move X next to the precomputed values to make accesses more efficient
and intuitive.

Signed-off-by: Janos Follath <janos.follath@arm.com>
diff --git a/library/bignum.c b/library/bignum.c
index 44d1aca..986a9e9 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2006,7 +2006,6 @@
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
-    const size_t w_table_size = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2037,6 +2036,7 @@
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+    const size_t w_table_used_size = ( 1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
@@ -2055,7 +2055,7 @@
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_table_size - 1;
+    const size_t x_index = w_table_used_size - 1;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2177,7 +2177,7 @@
             /*
              * out of window, square W[x_index]
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, x_index ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
@@ -2197,14 +2197,15 @@
              */
             for( i = 0; i < window_bitsize; i++ )
             {
-                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size,
+                                             x_index ) );
                 mpi_montmul( &W[x_index], &WW, N, mm, &T );
             }
 
             /*
              * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size,
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size,
                                          exponent_bits_in_window ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
@@ -2219,14 +2220,14 @@
      */
     for( i = 0; i < nbits; i++ )
     {
-        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, x_index ) );
         mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
         exponent_bits_in_window <<= 1;
 
         if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
         {
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, 1 ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
         }
     }