Add optionally unsafe variant of exp_mod for perf
Attempt to partially solve the performance regression in 3.6.0 without
adding too much code size.
Signed-off-by: Manuel Pégourié-Gonnard <manuel.pegourie-gonnard@arm.com>
diff --git a/library/bignum.c b/library/bignum.c
index c45fd5b..4db2b10 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1610,9 +1610,9 @@
return 0;
}
-int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
- const mbedtls_mpi *E, const mbedtls_mpi *N,
- mbedtls_mpi *prec_RR)
+int mbedtls_mpi_exp_mod_optionally_safe(mbedtls_mpi *X, const mbedtls_mpi *A,
+ const mbedtls_mpi *E, const mbedtls_mpi *N,
+ mbedtls_mpi *prec_RR, int E_public)
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
@@ -1695,7 +1695,15 @@
{
mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N->p);
mbedtls_mpi_core_to_mont_rep(X->p, X->p, N->p, N->n, mm, RR.p, T);
- mbedtls_mpi_core_exp_mod(X->p, X->p, N->p, N->n, E->p, E->n, RR.p, T);
+ mbedtls_mpi_core_exp_mod_optionally_safe(X->p,
+ X->p,
+ N->p,
+ N->n,
+ E->p,
+ E->n,
+ RR.p,
+ T,
+ E_public);
mbedtls_mpi_core_from_mont_rep(X->p, X->p, N->p, N->n, mm, T);
}
@@ -1720,6 +1728,13 @@
return ret;
}
+int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
+ const mbedtls_mpi *E, const mbedtls_mpi *N,
+ mbedtls_mpi *prec_RR)
+{
+ return mbedtls_mpi_exp_mod_optionally_safe(X, A, E, N, prec_RR, MBEDTLS_MPI_IS_SECRET);
+}
+
/*
* Greatest common divisor: G = gcd(A, B) (HAC 14.54)
*/
diff --git a/library/bignum_core.c b/library/bignum_core.c
index 1a3e0b9..518b1bd 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -758,14 +758,15 @@
* (The difference is that the body in our loop processes a single bit instead
* of a full window.)
*/
-void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
- const mbedtls_mpi_uint *A,
- const mbedtls_mpi_uint *N,
- size_t AN_limbs,
- const mbedtls_mpi_uint *E,
- size_t E_limbs,
- const mbedtls_mpi_uint *RR,
- mbedtls_mpi_uint *T)
+void mbedtls_mpi_core_exp_mod_optionally_safe(mbedtls_mpi_uint *X,
+ const mbedtls_mpi_uint *A,
+ const mbedtls_mpi_uint *N,
+ size_t AN_limbs,
+ const mbedtls_mpi_uint *E,
+ size_t E_limbs,
+ const mbedtls_mpi_uint *RR,
+ mbedtls_mpi_uint *T,
+ int E_public)
{
const size_t wsize = exp_mod_get_window_size(E_limbs * biL);
const size_t welem = ((size_t) 1) << wsize;
@@ -803,6 +804,14 @@
* (limb_index=0, E_bit_index=0). */
size_t E_limb_index = E_limbs;
size_t E_bit_index = 0;
+ if (E_public == MBEDTLS_MPI_IS_PUBLIC) {
+ size_t E_bits = mbedtls_mpi_core_bitlen(E, E_limbs);
+ if (E_bits != 0) {
+ E_limb_index = E_bits / biL;
+ E_bit_index = E_bits % biL;
+ }
+ }
+
/* At any given time, window contains window_bits bits from E.
* window_bits can go up to wsize. */
size_t window_bits = 0;
@@ -828,10 +837,14 @@
* when we've finished processing the exponent. */
if (window_bits == wsize ||
(E_bit_index == 0 && E_limb_index == 0)) {
- /* Select Wtable[window] without leaking window through
- * memory access patterns. */
- mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable,
- AN_limbs, welem, window);
+ if (E_public == MBEDTLS_MPI_IS_PUBLIC) {
+ memcpy(Wselect, Wtable + window * AN_limbs, AN_limbs * ciL);
+ } else {
+ /* Select Wtable[window] without leaking window through
+ * memory access patterns. */
+ mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable,
+ AN_limbs, welem, window);
+ }
/* Multiply X by the selected element. */
mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm,
temp);
@@ -841,6 +854,24 @@
} while (!(E_bit_index == 0 && E_limb_index == 0));
}
+void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
+ const mbedtls_mpi_uint *A,
+ const mbedtls_mpi_uint *N, size_t AN_limbs,
+ const mbedtls_mpi_uint *E, size_t E_limbs,
+ const mbedtls_mpi_uint *RR,
+ mbedtls_mpi_uint *T)
+{
+ mbedtls_mpi_core_exp_mod_optionally_safe(X,
+ A,
+ N,
+ AN_limbs,
+ E,
+ E_limbs,
+ RR,
+ T,
+ MBEDTLS_MPI_IS_SECRET);
+}
+
mbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X,
const mbedtls_mpi_uint *A,
mbedtls_mpi_uint c, /* doubles as carry */
diff --git a/library/bignum_core.h b/library/bignum_core.h
index 92c8d47..c63cdee 100644
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@@ -605,6 +605,44 @@
size_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs);
/**
+ * \brief Perform a modular exponentiation with public or secret exponent:
+ * X = A^E mod N, where \p A is already in Montgomery form.
+ *
+ * \p X may be aliased to \p A, but not to \p RR or \p E, even if \p E_limbs ==
+ * \p AN_limbs.
+ *
+ * \param[out] X The destination MPI, as a little endian array of length
+ * \p AN_limbs.
+ * \param[in] A The base MPI, as a little endian array of length \p AN_limbs.
+ * Must be in Montgomery form.
+ * \param[in] N The modulus, as a little endian array of length \p AN_limbs.
+ * \param AN_limbs The number of limbs in \p X, \p A, \p N, \p RR.
+ * \param[in] E The exponent, as a little endian array of length \p E_limbs.
+ * \param E_limbs The number of limbs in \p E.
+ * \param[in] RR The precomputed residue of 2^{2*biL} modulo N, as a little
+ * endian array of length \p AN_limbs.
+ * \param[in,out] T Temporary storage of at least the number of limbs returned
+ * by `mbedtls_mpi_core_exp_mod_working_limbs()`.
+ * Its initial content is unused and its final content is
+ * indeterminate.
+ * It must not alias or otherwise overlap any of the other
+ * parameters.
+ * It is up to the caller to zeroize \p T when it is no
+ * longer needed, and before freeing it if it was dynamically
+ * allocated.
+ * \param[in] E_public Set to MBEDTLS_MPI_IS_PUBLIC to gain some performance
+ * when the value of E is public.
+ * Set to MBEDTLS_MPI_IS_SECRET when the value of E is secret.
+ */
+void mbedtls_mpi_core_exp_mod_optionally_safe(mbedtls_mpi_uint *X,
+ const mbedtls_mpi_uint *A,
+ const mbedtls_mpi_uint *N, size_t AN_limbs,
+ const mbedtls_mpi_uint *E, size_t E_limbs,
+ const mbedtls_mpi_uint *RR,
+ mbedtls_mpi_uint *T,
+ int E_public);
+
+/**
* \brief Perform a modular exponentiation with secret exponent:
* X = A^E mod N, where \p A is already in Montgomery form.
*