Merge pull request #178 from mpg/sha512-smaller

New config.h option to make SHA-512 smaller
diff --git a/include/mbedtls/config.h b/include/mbedtls/config.h
index 95ab1f2..3c1430c 100644
--- a/include/mbedtls/config.h
+++ b/include/mbedtls/config.h
@@ -1007,6 +1007,16 @@
 //#define MBEDTLS_SHA256_SMALLER
 
 /**
+ * \def MBEDTLS_SHA512_SMALLER
+ *
+ * Enable an implementation of SHA-512 that has lower ROM footprint but also
+ * lower performance.
+ *
+ * Uncomment to enable the smaller implementation of SHA512.
+ */
+//#define MBEDTLS_SHA512_SMALLER
+
+/**
  * \def MBEDTLS_THREADING_ALT
  *
  * Provide your own alternate threading implementation.
diff --git a/library/sha512.c b/library/sha512.c
index bdd20b2..2e2b797 100644
--- a/library/sha512.c
+++ b/library/sha512.c
@@ -92,6 +92,15 @@
 }
 #endif /* PUT_UINT64_BE */
 
+#if defined(MBEDTLS_SHA512_SMALLER)
+static void sha512_put_uint64_be( uint64_t n, unsigned char *b, uint8_t i )
+{
+    PUT_UINT64_BE(n, b, i);
+}
+#else
+#define sha512_put_uint64_be    PUT_UINT64_BE
+#endif /* MBEDTLS_SHA512_SMALLER */
+
 void mbedtls_sha512_init( mbedtls_sha512_context *ctx )
 {
     SHA512_VALIDATE( ctx != NULL );
@@ -219,7 +228,7 @@
 {
     int i;
     uint64_t temp1, temp2, W[80];
-    uint64_t A, B, C, D, E, F, G, H;
+    uint64_t A[8];
 
     SHA512_VALIDATE_RET( ctx != NULL );
     SHA512_VALIDATE_RET( (const unsigned char *)data != NULL );
@@ -244,6 +253,28 @@
         (d) += temp1; (h) = temp1 + temp2;                      \
     } while( 0 )
 
+    for( i = 0; i < 8; i++ )
+        A[i] = ctx->state[i];
+
+#if defined(MBEDTLS_SHA512_SMALLER)
+    for( i = 0; i < 80; i++ )
+    {
+        if( i < 16 )
+        {
+            GET_UINT64_BE( W[i], data, i << 3 );
+        }
+        else
+        {
+            W[i] = S1(W[i -  2]) + W[i -  7] +
+                   S0(W[i - 15]) + W[i - 16];
+        }
+
+        P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[i], K[i] );
+
+        temp1 = A[7]; A[7] = A[6]; A[6] = A[5]; A[5] = A[4]; A[4] = A[3];
+        A[3] = A[2]; A[2] = A[1]; A[1] = A[0]; A[0] = temp1;
+    }
+#else /* MBEDTLS_SHA512_SMALLER */
     for( i = 0; i < 16; i++ )
     {
         GET_UINT64_BE( W[i], data, i << 3 );
@@ -255,37 +286,23 @@
                S0(W[i - 15]) + W[i - 16];
     }
 
-    A = ctx->state[0];
-    B = ctx->state[1];
-    C = ctx->state[2];
-    D = ctx->state[3];
-    E = ctx->state[4];
-    F = ctx->state[5];
-    G = ctx->state[6];
-    H = ctx->state[7];
     i = 0;
-
     do
     {
-        P( A, B, C, D, E, F, G, H, W[i], K[i] ); i++;
-        P( H, A, B, C, D, E, F, G, W[i], K[i] ); i++;
-        P( G, H, A, B, C, D, E, F, W[i], K[i] ); i++;
-        P( F, G, H, A, B, C, D, E, W[i], K[i] ); i++;
-        P( E, F, G, H, A, B, C, D, W[i], K[i] ); i++;
-        P( D, E, F, G, H, A, B, C, W[i], K[i] ); i++;
-        P( C, D, E, F, G, H, A, B, W[i], K[i] ); i++;
-        P( B, C, D, E, F, G, H, A, W[i], K[i] ); i++;
+        P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[i], K[i] ); i++;
+        P( A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[i], K[i] ); i++;
+        P( A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[i], K[i] ); i++;
+        P( A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[i], K[i] ); i++;
+        P( A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[i], K[i] ); i++;
+        P( A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[i], K[i] ); i++;
+        P( A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[i], K[i] ); i++;
+        P( A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[i], K[i] ); i++;
     }
     while( i < 80 );
+#endif /* MBEDTLS_SHA512_SMALLER */
 
-    ctx->state[0] += A;
-    ctx->state[1] += B;
-    ctx->state[2] += C;
-    ctx->state[3] += D;
-    ctx->state[4] += E;
-    ctx->state[5] += F;
-    ctx->state[6] += G;
-    ctx->state[7] += H;
+    for( i = 0; i < 8; i++ )
+        ctx->state[i] += A[i];
 
     return( 0 );
 }
@@ -403,8 +420,8 @@
          | ( ctx->total[1] <<  3 );
     low  = ( ctx->total[0] <<  3 );
 
-    PUT_UINT64_BE( high, ctx->buffer, 112 );
-    PUT_UINT64_BE( low,  ctx->buffer, 120 );
+    sha512_put_uint64_be( high, ctx->buffer, 112 );
+    sha512_put_uint64_be( low,  ctx->buffer, 120 );
 
     if( ( ret = mbedtls_internal_sha512_process( ctx, ctx->buffer ) ) != 0 )
         return( ret );
@@ -412,17 +429,17 @@
     /*
      * Output final state
      */
-    PUT_UINT64_BE( ctx->state[0], output,  0 );
-    PUT_UINT64_BE( ctx->state[1], output,  8 );
-    PUT_UINT64_BE( ctx->state[2], output, 16 );
-    PUT_UINT64_BE( ctx->state[3], output, 24 );
-    PUT_UINT64_BE( ctx->state[4], output, 32 );
-    PUT_UINT64_BE( ctx->state[5], output, 40 );
+    sha512_put_uint64_be( ctx->state[0], output,  0 );
+    sha512_put_uint64_be( ctx->state[1], output,  8 );
+    sha512_put_uint64_be( ctx->state[2], output, 16 );
+    sha512_put_uint64_be( ctx->state[3], output, 24 );
+    sha512_put_uint64_be( ctx->state[4], output, 32 );
+    sha512_put_uint64_be( ctx->state[5], output, 40 );
 
     if( ctx->is384 == 0 )
     {
-        PUT_UINT64_BE( ctx->state[6], output, 48 );
-        PUT_UINT64_BE( ctx->state[7], output, 56 );
+        sha512_put_uint64_be( ctx->state[6], output, 48 );
+        sha512_put_uint64_be( ctx->state[7], output, 56 );
     }
 
     return( 0 );
diff --git a/library/version_features.c b/library/version_features.c
index cd75cc0..5404d79 100644
--- a/library/version_features.c
+++ b/library/version_features.c
@@ -408,6 +408,9 @@
 #if defined(MBEDTLS_SHA256_SMALLER)
     "MBEDTLS_SHA256_SMALLER",
 #endif /* MBEDTLS_SHA256_SMALLER */
+#if defined(MBEDTLS_SHA512_SMALLER)
+    "MBEDTLS_SHA512_SMALLER",
+#endif /* MBEDTLS_SHA512_SMALLER */
 #if defined(MBEDTLS_THREADING_ALT)
     "MBEDTLS_THREADING_ALT",
 #endif /* MBEDTLS_THREADING_ALT */
diff --git a/programs/test/query_config.c b/programs/test/query_config.c
index 4f28009..da3dfb0 100644
--- a/programs/test/query_config.c
+++ b/programs/test/query_config.c
@@ -1116,6 +1116,14 @@
     }
 #endif /* MBEDTLS_SHA256_SMALLER */
 
+#if defined(MBEDTLS_SHA512_SMALLER)
+    if( strcmp( "MBEDTLS_SHA512_SMALLER", config ) == 0 )
+    {
+        MACRO_EXPANSION_TO_STR( MBEDTLS_SHA512_SMALLER );
+        return( 0 );
+    }
+#endif /* MBEDTLS_SHA512_SMALLER */
+
 #if defined(MBEDTLS_THREADING_ALT)
     if( strcmp( "MBEDTLS_THREADING_ALT", config ) == 0 )
     {