Use bit operations for constant-flow padding check

The previous code used comparison operators >= and == that are quite likely to
be compiled to branches by some compilers on some architectures (with some
optimisation levels).

For example, take the following function:

void old_update( size_t data_len, size_t *padlen )
{
    *padlen  *= ( data_len >= *padlen + 1 );
}

With Clang 3.8, let's compile it for the Arm v6-M architecture:

% clang --target=arm-none-eabi -march=armv6-m -Os foo.c -S -o - |
    sed -n '/^old_update:$/,/\.size/p'

old_update:
        .fnstart
@ BB#0:
        .save   {r4, lr}
        push    {r4, lr}
        ldr     r2, [r1]
        adds    r4, r2, #1
        movs    r3, #0
        cmp     r4, r0
        bls     .LBB0_2
@ BB#1:
        mov     r2, r3
.LBB0_2:
        str     r2, [r1]
        pop     {r4, pc}
.Lfunc_end0:
        .size   old_update, .Lfunc_end0-old_update

We can see an unbalanced secret-dependant branch, resulting in a total
execution time depends on the value of the secret (here padlen) in a
straightforward way.

The new version, based on bit operations, doesn't have this issue:

new_update:
        .fnstart
@ BB#0:
        ldr     r2, [r1]
        subs    r0, r0, #1
        subs    r0, r0, r2
        asrs    r0, r0, #31
        bics    r2, r0
        str     r2, [r1]
        bx      lr
.Lfunc_end1:
        .size   new_update, .Lfunc_end1-new_update

(As a bonus, it's smaller and uses less stack.)

While there's no formal guarantee that the version based on bit operations in
C won't be translated using branches by the compiler, experiments tend to show
that's the case [1], and it is commonly accepted knowledge in the practical
crypto community that if we want to sick to C, bit operations are the safest
bet [2].

[1] https://github.com/mpg/ct/blob/master/results
[2] https://github.com/veorq/cryptocoding

Signed-off-by: Manuel Pégourié-Gonnard <manuel.pegourie-gonnard@arm.com>
diff --git a/library/ssl_msg.c b/library/ssl_msg.c
index 2ea3580..b4e4aea 100644
--- a/library/ssl_msg.c
+++ b/library/ssl_msg.c
@@ -1045,6 +1045,82 @@
 
 #if defined(MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC)
 /*
+ * Constant-flow mask generation for "less than" comparison:
+ * - if x < y,  return all bits 1, that is (size_t) -1
+ * - otherwise, return all bits 0, that is 0
+ *
+ * Use only bit operations to avoid branches that could be used by some
+ * compilers on some platforms to translate comparison operators.
+ */
+static size_t mbedtls_ssl_cf_mask_lt(size_t x, size_t y)
+{
+    /* This has the msb set if and only if x < y */
+    const size_t sub = x - y;
+
+    /* sub1 = (x < y) in {0, 1} */
+    const size_t sub1 = sub >> ( sizeof( sub ) * 8 - 1 );
+
+    /* MSVC has a warning about unary minus on unsigned integer types,
+     * but this is well-defined and precisely what we want to do here. */
+#if defined(_MSC_VER)
+#pragma warning( push )
+#pragma warning( disable : 4146 )
+#endif
+    /* mask = (x < y) ? 0xff... : 0x00... */
+    const size_t mask = -sub1;
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#endif
+
+    return( mask );
+}
+
+/*
+ * Constant-flow mask generation for "greater or equal" comparison:
+ * - if x >= y, return all bits 1, that is (size_t) -1
+ * - otherwise, return all bits 0, that is 0
+ *
+ * Use only bit operations to avoid branches that could be used by some
+ * compilers on some platforms to translate comparison operators.
+ */
+static size_t mbedtls_ssl_cf_mask_ge(size_t x, size_t y)
+{
+    return( ~mbedtls_ssl_cf_mask_lt(x, y) );
+}
+
+/*
+ * Constant-flow boolean "equal" comparison:
+ * return x == y
+ *
+ * Use only bit operations to avoid branches that could be used by some
+ * compilers on some platforms to translate comparison operators.
+ */
+static size_t mbedtls_ssl_cf_bool_eq(size_t x, size_t y)
+{
+    /* diff = 0 if x == y, non-zero otherwise */
+    const size_t diff = x ^ y;
+
+    /* MSVC has a warning about unary minus on unsigned integer types,
+     * but this is well-defined and precisely what we want to do here. */
+#if defined(_MSC_VER)
+#pragma warning( push )
+#pragma warning( disable : 4146 )
+#endif
+
+    /* diff_msb's most significant bit is equal to x != y */
+    const size_t diff_msb = ( diff | -diff );
+
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#endif
+
+    /* diff1 = (x != y) in {0, 1} */
+    const size_t diff1 = diff_msb >> ( sizeof( diff_msb ) * 8 - 1 );
+
+    return( 1 ^ diff1 );
+}
+
+/*
  * Constant-flow conditional memcpy:
  *  - if c1 == c2, equivalent to memcpy(dst, src, len),
  *  - otherwise, a no-op,
@@ -1071,7 +1147,7 @@
     /* diff_msb's most significant bit is equal to c1 != c2 */
     const size_t diff_msb = ( diff | -diff );
 
-    /* diff1 = c1 != c2 */
+    /* diff1 = (c1 != c2) in {0, 1} */
     const size_t diff1 = diff_msb >> ( sizeof( diff_msb ) * 8 - 1 );
 
     /* mask = c1 != c2 ? 0xff : 0x00 */
@@ -1528,8 +1604,11 @@
 
         if( auth_done == 1 )
         {
-            correct *= ( rec->data_len >= padlen + 1 );
-            padlen  *= ( rec->data_len >= padlen + 1 );
+            const size_t mask = mbedtls_ssl_cf_mask_ge(
+                                rec->data_len,
+                                padlen + 1 );
+            correct &= mask;
+            padlen  &= mask;
         }
         else
         {
@@ -1543,8 +1622,11 @@
             }
 #endif
 
-            correct *= ( rec->data_len >= transform->maclen + padlen + 1 );
-            padlen  *= ( rec->data_len >= transform->maclen + padlen + 1 );
+            const size_t mask = mbedtls_ssl_cf_mask_ge(
+                                rec->data_len,
+                                transform->maclen + padlen + 1 );
+            correct &= mask;
+            padlen  &= mask;
         }
 
         padlen++;
@@ -1555,6 +1637,9 @@
 #if defined(MBEDTLS_SSL_PROTO_SSL3)
         if( transform->minor_ver == MBEDTLS_SSL_MINOR_VERSION_0 )
         {
+            /* This is the SSL 3.0 path, we don't have to worry about Lucky
+             * 13, because there's a strictly worse padding attack built in
+             * the protocol (known as part of POODLE), so branches are OK. */
             if( padlen > transform->ivlen )
             {
 #if defined(MBEDTLS_SSL_DEBUG_ALL)
@@ -1578,7 +1663,6 @@
              * `min(256,plaintext_len)` reads (but take into account
              * only the last `padlen` bytes for the padding check). */
             size_t pad_count = 0;
-            size_t real_count = 0;
             volatile unsigned char* const check = data;
 
             /* Index of first padding byte; it has been ensured above
@@ -1590,10 +1674,15 @@
 
             for( idx = start_idx; idx < rec->data_len; idx++ )
             {
-                real_count |= ( idx >= padding_idx );
-                pad_count += real_count * ( check[idx] == padlen - 1 );
+                /* pad_count += (idx >= padding_idx) &&
+                 *              (chech[idx] == padlen - 1);
+                 */
+                const size_t mask = mbedtls_ssl_cf_mask_ge( idx, padding_idx );
+                const size_t equal = mbedtls_ssl_cf_bool_eq( check[idx],
+                                                             padlen - 1 );
+                pad_count += mask & equal;
             }
-            correct &= ( pad_count == padlen );
+            correct &= mbedtls_ssl_cf_bool_eq( pad_count, padlen );
 
 #if defined(MBEDTLS_SSL_DEBUG_ALL)
             if( padlen > 0 && correct == 0 )