Merge pull request #7051 from gabor-mezei-arm/6376_Secp521r1_fast_reduction
Add a raw entry point to Secp521r1 fast reduction
diff --git a/library/ecp_curves.c b/library/ecp_curves.c
index 2a97b8c..1a027d6 100644
--- a/library/ecp_curves.c
+++ b/library/ecp_curves.c
@@ -4584,6 +4584,8 @@
#endif
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
static int ecp_mod_p521(mbedtls_mpi *);
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p521_raw(mbedtls_mpi_uint *N_p, size_t N_n);
#endif
#define NIST_MODP(P) grp->modp = ecp_mod_ ## P;
@@ -5189,11 +5191,6 @@
MBEDTLS_ECP_DP_SECP384R1_ENABLED */
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
-/*
- * Here we have an actual Mersenne prime, so things are more straightforward.
- * However, chunks are aligned on a 'weird' boundary (521 bits).
- */
-
/* Size of p521 in terms of mbedtls_mpi_uint */
#define P521_WIDTH (521 / 8 / sizeof(mbedtls_mpi_uint) + 1)
@@ -5201,48 +5198,81 @@
#define P521_MASK 0x01FF
/*
- * Fast quasi-reduction modulo p521 (FIPS 186-3 D.2.5)
- * Write N as A1 + 2^521 A0, return A0 + A1
+ * Fast quasi-reduction modulo p521 = 2^521 - 1 (FIPS 186-3 D.2.5)
*/
static int ecp_mod_p521(mbedtls_mpi *N)
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
- size_t i;
- mbedtls_mpi M;
- mbedtls_mpi_uint Mp[P521_WIDTH + 1];
- /* Worst case for the size of M is when mbedtls_mpi_uint is 16 bits:
- * we need to hold bits 513 to 1056, which is 34 limbs, that is
- * P521_WIDTH + 1. Otherwise P521_WIDTH is enough. */
-
- if (N->n < P521_WIDTH) {
- return 0;
- }
-
- /* M = A1 */
- M.s = 1;
- M.n = N->n - (P521_WIDTH - 1);
- if (M.n > P521_WIDTH + 1) {
- M.n = P521_WIDTH + 1;
- }
- M.p = Mp;
- memcpy(Mp, N->p + P521_WIDTH - 1, M.n * sizeof(mbedtls_mpi_uint));
- MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&M, 521 % (8 * sizeof(mbedtls_mpi_uint))));
-
- /* N = A0 */
- N->p[P521_WIDTH - 1] &= P521_MASK;
- for (i = P521_WIDTH; i < N->n; i++) {
- N->p[i] = 0;
- }
-
- /* N = A0 + A1 */
- MBEDTLS_MPI_CHK(mbedtls_mpi_add_abs(N, N, &M));
-
+ size_t expected_width = 2 * P521_WIDTH;
+ MBEDTLS_MPI_CHK(mbedtls_mpi_grow(N, expected_width));
+ ret = mbedtls_ecp_mod_p521_raw(N->p, expected_width);
cleanup:
return ret;
}
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p521_raw(mbedtls_mpi_uint *X, size_t X_limbs)
+{
+ mbedtls_mpi_uint carry = 0;
+
+ if (X_limbs != 2 * P521_WIDTH || X[2 * P521_WIDTH - 1] != 0) {
+ return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+ }
+
+ /* Step 1: Reduction to P521_WIDTH limbs */
+ /* Helper references for bottom part of X */
+ mbedtls_mpi_uint *X0 = X;
+ size_t X0_limbs = P521_WIDTH;
+ /* Helper references for top part of X */
+ mbedtls_mpi_uint *X1 = X + X0_limbs;
+ size_t X1_limbs = X_limbs - X0_limbs;
+ /* Split X as X0 + 2^P521_WIDTH X1 and compute X0 + 2^(biL - 9) X1.
+ * (We are using that 2^P521_WIDTH = 2^(512 + biL) and that
+ * 2^(512 + biL) X1 = 2^(biL - 9) X1 mod P521.)
+ * The high order limb of the result will be held in carry and the rest
+ * in X0 (that is the result will be represented as
+ * 2^P521_WIDTH carry + X0).
+ *
+ * Also, note that the resulting carry is either 0 or 1:
+ * X0 < 2^P521_WIDTH = 2^(512 + biL) and X1 < 2^(P521_WIDTH-biL) = 2^512
+ * therefore
+ * X0 + 2^(biL - 9) X1 < 2^(512 + biL) + 2^(512 + biL - 9)
+ * which in turn is less than 2 * 2^(512 + biL).
+ */
+ mbedtls_mpi_uint shift = ((mbedtls_mpi_uint) 1u) << (biL - 9);
+ carry = mbedtls_mpi_core_mla(X0, X0_limbs, X1, X1_limbs, shift);
+ /* Set X to X0 (by clearing the top part). */
+ memset(X1, 0, X1_limbs * sizeof(mbedtls_mpi_uint));
+
+ /* Step 2: Reduction modulo P521
+ *
+ * At this point X is reduced to P521_WIDTH limbs. What remains is to add
+ * the carry (that is 2^P521_WIDTH carry) and to reduce mod P521. */
+
+ /* 2^P521_WIDTH carry = 2^(512 + biL) carry = 2^(biL - 9) carry mod P521.
+ * Also, recall that carry is either 0 or 1. */
+ mbedtls_mpi_uint addend = carry << (biL - 9);
+ /* Keep the top 9 bits and reduce the rest, using 2^521 = 1 mod P521. */
+ addend += (X[P521_WIDTH - 1] >> 9);
+ X[P521_WIDTH - 1] &= P521_MASK;
+
+ /* Resuse the top part of X (already zeroed) as a helper array for
+ * carrying out the addition. */
+ mbedtls_mpi_uint *addend_arr = X + P521_WIDTH;
+ addend_arr[0] = addend;
+ (void) mbedtls_mpi_core_add(X, X, addend_arr, P521_WIDTH);
+ /* Both addends were less than P521 therefore X < 2 * P521. (This also means
+ * that the result fit in P521_WIDTH limbs and there won't be any carry.) */
+
+ /* Clear the reused part of X. */
+ addend_arr[0] = 0;
+
+ return 0;
+}
+
#undef P521_WIDTH
#undef P521_MASK
+
#endif /* MBEDTLS_ECP_DP_SECP521R1_ENABLED */
#endif /* MBEDTLS_ECP_NIST_OPTIM */
diff --git a/library/ecp_invasive.h b/library/ecp_invasive.h
index 3ee238e..3d1321c 100644
--- a/library/ecp_invasive.h
+++ b/library/ecp_invasive.h
@@ -95,6 +95,28 @@
#endif /* MBEDTLS_ECP_DP_SECP192R1_ENABLED */
+#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
+
+/** Fast quasi-reduction modulo p521 = 2^521 - 1 (FIPS 186-3 D.2.5)
+ *
+ * \param[in,out] X The address of the MPI to be converted.
+ * Must have twice as many limbs as the modulus
+ * (the modulus is 521 bits long). Upon return this
+ * holds the reduced value. The reduced value is
+ * in range `0 <= X < 2 * N` (where N is the modulus).
+ * and its the bitlength is one plus the bitlength
+ * of the modulus.
+ * \param[in] X_limbs The length of \p X in limbs.
+ *
+ * \return \c 0 on success.
+ * \return #MBEDTLS_ERR_ECP_BAD_INPUT_DATA if \p X_limbs does not have
+ * twice as many limbs as the modulus.
+ */
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p521_raw(mbedtls_mpi_uint *X, size_t X_limbs);
+
+#endif /* MBEDTLS_ECP_DP_SECP521R1_ENABLED */
+
#endif /* MBEDTLS_TEST_HOOKS && MBEDTLS_ECP_C */
#endif /* MBEDTLS_ECP_INVASIVE_H */
diff --git a/scripts/mbedtls_dev/ecp.py b/scripts/mbedtls_dev/ecp.py
index 93cd212..6370d25 100644
--- a/scripts/mbedtls_dev/ecp.py
+++ b/scripts/mbedtls_dev/ecp.py
@@ -75,3 +75,94 @@
@property
def is_valid(self) -> bool:
return True
+
+class EcpP521R1Raw(bignum_common.ModOperationCommon,
+ EcpTarget):
+ """Test cases for ecp quasi_reduction()."""
+ test_function = "ecp_mod_p521_raw"
+ test_name = "ecp_mod_p521_raw"
+ input_style = "arch_split"
+ arity = 1
+
+ moduli = [("01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
+ ] # type: List[str]
+
+ input_values = [
+ "0", "1",
+
+ # Corner case: maximum canonical P521 multiplication result
+ ("0003ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "fffff800"
+ "0000000000000000000000000000000000000000000000000000000000000000"
+ "0000000000000000000000000000000000000000000000000000000000000004"),
+
+ # Test case for overflow during addition
+ ("0001efffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "000001ef"
+ "0000000000000000000000000000000000000000000000000000000000000000"
+ "000000000000000000000000000000000000000000000000000000000f000000"),
+
+ # First 8 number generated by random.getrandbits(1042) - seed(2,2)
+ ("0003cc2e82523e86feac7eb7dc38f519b91751dacdbd47d364be8049a372db8f"
+ "6e405d93ffed9235288bc781ae66267594c9c9500925e4749b575bd13653f8dd"
+ "9b1f282e"
+ "4067c3584ee207f8da94e3e8ab73738fcf1822ffbc6887782b491044d5e34124"
+ "5c6e433715ba2bdd177219d30e7a269fd95bafc8f2a4d27bdcf4bb99f4bea973"),
+ ("00017052829e07b0829a48d422fe99a22c70501e533c91352d3d854e061b9030"
+ "3b08c6e33c7295782d6c797f8f7d9b782a1be9cd8697bbd0e2520e33e44c5055"
+ "6c71c4a6"
+ "6148a86fe8624fab5186ee32ee8d7ee9770348a05d300cb90706a045defc044a"
+ "09325626e6b58de744ab6cce80877b6f71e1f6d2ef8acd128b4f2fc15f3f57eb"),
+ ("00021f15a7a83ee0761ebfd2bd143fa9b714210c665d7435c1066932f4767f26"
+ "294365b2721dea3bf63f23d0dbe53fcafb2147df5ca495fa5a91c89b97eeab64"
+ "ca2ce6bc"
+ "5d3fd983c34c769fe89204e2e8168561867e5e15bc01bfce6a27e0dfcbf87544"
+ "72154e76e4c11ab2fec3f6b32e8d4b8a8f54f8ceacaab39e83844b40ffa9b9f1"),
+ ("000381bc2a838af8d5c44a4eb3172062d08f1bb2531d6460f0caeef038c89b38"
+ "a8acb5137c9260dc74e088a9b9492f258ebdbfe3eb9ac688b9d39cca91551e82"
+ "59cc60b1"
+ "7604e4b4e73695c3e652c71a74667bffe202849da9643a295a9ac6decbd4d3e2"
+ "d4dec9ef83f0be4e80371eb97f81375eecc1cb6347733e847d718d733ff98ff3"),
+ ("00034816c8c69069134bccd3e1cf4f589f8e4ce0af29d115ef24bd625dd961e6"
+ "830b54fa7d28f93435339774bb1e386c4fd5079e681b8f5896838b769da59b74"
+ "a6c3181c"
+ "81e220df848b1df78feb994a81167346d4c0dca8b4c9e755cc9c3adcf515a823"
+ "4da4daeb4f3f87777ad1f45ae9500ec9c5e2486c44a4a8f69dc8db48e86ec9c6"),
+ ("000397846c4454b90f756132e16dce72f18e859835e1f291d322a7353ead4efe"
+ "440e2b4fda9c025a22f1a83185b98f5fc11e60de1b343f52ea748db9e020307a"
+ "aeb6db2c"
+ "3a038a709779ac1f45e9dd320c855fdfa7251af0930cdbd30f0ad2a81b2d19a2"
+ "beaa14a7ff3fe32a30ffc4eed0a7bd04e85bfcdd0227eeb7b9d7d01f5769da05"),
+ ("00002c3296e6bc4d62b47204007ee4fab105d83e85e951862f0981aebc1b00d9"
+ "2838e766ef9b6bf2d037fe2e20b6a8464174e75a5f834da70569c018eb2b5693"
+ "babb7fbb"
+ "0a76c196067cfdcb11457d9cf45e2fa01d7f4275153924800600571fac3a5b26"
+ "3fdf57cd2c0064975c3747465cc36c270e8a35b10828d569c268a20eb78ac332"),
+ ("00009d23b4917fc09f20dbb0dcc93f0e66dfe717c17313394391b6e2e6eacb0f"
+ "0bb7be72bd6d25009aeb7fa0c4169b148d2f527e72daf0a54ef25c0707e33868"
+ "7d1f7157"
+ "5653a45c49390aa51cf5192bbf67da14be11d56ba0b4a2969d8055a9f03f2d71"
+ "581d8e830112ff0f0948eccaf8877acf26c377c13f719726fd70bddacb4deeec"),
+
+ # Next 2 number generated by random.getrandbits(521)
+ ("12b84ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238da1a1fe"
+ "3f9d6a179fa50f96cd4aff9261aa92c0e6f17ec940639bc2ccdf572df00790813e3"),
+ ("166049dd332a73fa0b26b75196cf87eb8a09b27ec714307c68c425424a1574f1"
+ "eedf5b0f16cdfdb839424d201e653f53d6883ca1c107ca6e706649889c0c7f38608")
+ ]
+
+ @property
+ def arg_a(self) -> str:
+ # Number of limbs: 2 * N
+ return super().format_arg('{:x}'.format(self.int_a)).zfill(2 * self.hex_digits)
+
+ def result(self) -> List[str]:
+ result = self.int_a % self.int_n
+ return [self.format_result(result)]
+
+ @property
+ def is_valid(self) -> bool:
+ return True
diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function
index b9d2b5e..4e74d9b 100644
--- a/tests/suites/test_suite_ecp.function
+++ b/tests/suites/test_suite_ecp.function
@@ -4,8 +4,8 @@
#include "mbedtls/ecdh.h"
#include "bignum_core.h"
-#include "bignum_mod_raw_invasive.h"
#include "ecp_invasive.h"
+#include "bignum_mod_raw_invasive.h"
#if defined(MBEDTLS_TEST_HOOKS) && \
(defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) || \
@@ -1344,3 +1344,46 @@
mbedtls_free(N);
}
/* END_CASE */
+
+/* BEGIN_CASE depends_on:MBEDTLS_TEST_HOOKS */
+void ecp_mod_p521_raw(char *input_N,
+ char *input_X,
+ char *result)
+{
+ mbedtls_mpi_uint *X = NULL;
+ mbedtls_mpi_uint *N = NULL;
+ mbedtls_mpi_uint *res = NULL;
+ size_t limbs_X;
+ size_t limbs_N;
+ size_t limbs_res;
+
+ mbedtls_mpi_mod_modulus m;
+ mbedtls_mpi_mod_modulus_init(&m);
+
+ TEST_EQUAL(mbedtls_test_read_mpi_core(&X, &limbs_X, input_X), 0);
+ TEST_EQUAL(mbedtls_test_read_mpi_core(&N, &limbs_N, input_N), 0);
+ TEST_EQUAL(mbedtls_test_read_mpi_core(&res, &limbs_res, result), 0);
+
+ size_t limbs = limbs_N;
+ size_t bytes = limbs * sizeof(mbedtls_mpi_uint);
+
+ TEST_EQUAL(limbs_X, 2 * limbs);
+ TEST_EQUAL(limbs_res, limbs);
+
+ TEST_EQUAL(mbedtls_mpi_mod_modulus_setup(
+ &m, N, limbs,
+ MBEDTLS_MPI_MOD_REP_MONTGOMERY), 0);
+
+ TEST_EQUAL(mbedtls_ecp_mod_p521_raw(X, limbs_X), 0);
+ TEST_LE_U(mbedtls_mpi_core_bitlen(X, limbs_X), 522);
+ mbedtls_mpi_mod_raw_fix_quasi_reduction(X, &m);
+ ASSERT_COMPARE(X, bytes, res, bytes);
+
+exit:
+ mbedtls_free(X);
+ mbedtls_free(res);
+
+ mbedtls_mpi_mod_modulus_free(&m);
+ mbedtls_free(N);
+}
+/* END_CASE */