Merge pull request #8741 from Ryan-Everett-arm/add-locking-macros

Add macros for locking/unlocking the key slot mutex
diff --git a/BRANCHES.md b/BRANCHES.md
index c085b16..b71247f 100644
--- a/BRANCHES.md
+++ b/BRANCHES.md
@@ -106,6 +106,6 @@
 - [`development`](https://github.com/Mbed-TLS/mbedtls/)
 - [`mbedtls-2.28`](https://github.com/Mbed-TLS/mbedtls/tree/mbedtls-2.28)
  maintained until at least the end of 2024, see
-  <https://github.com/Mbed-TLS/mbedtls/releases/tag/v2.28.6>.
+  <https://github.com/Mbed-TLS/mbedtls/releases/tag/v2.28.7>.
 
 Users are urged to always use the latest version of a maintained branch.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad05646..78599d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -381,7 +381,7 @@
     write_basic_package_version_file(
         "cmake/MbedTLSConfigVersion.cmake"
             COMPATIBILITY SameMajorVersion
-            VERSION 3.5.1)
+            VERSION 3.5.2)
 
     install(
         FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake/MbedTLSConfig.cmake"
diff --git a/ChangeLog b/ChangeLog
index 28c45f7..28f2654 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,20 @@
 Mbed TLS ChangeLog (Sorted per branch, date)
 
+= Mbed TLS 3.5.2 branch released 2024-01-26
+
+Security
+   * Fix a timing side channel in private key RSA operations. This side channel
+     could be sufficient for an attacker to recover the plaintext. A local
+     attacker or a remote attacker who is close to the victim on the network
+     might have precise enough timing measurements to exploit this. It requires
+     the attacker to send a large number of messages for decryption. For
+     details, see "Everlasting ROBOT: the Marvin Attack", Hubert Kario. Reported
+     by Hubert Kario, Red Hat.
+   * Fix a failure to validate input when writing x509 extensions lengths which
+     could result in an integer overflow, causing a zero-length buffer to be
+     allocated to hold the extension. The extension would then be copied into
+     the buffer, causing a heap buffer overflow.
+
 = Mbed TLS 3.5.1 branch released 2023-11-06
 
 Changes
diff --git a/ChangeLog.d/iar-gcc-perf.txt b/ChangeLog.d/iar-gcc-perf.txt
new file mode 100644
index 0000000..fb0fbb1
--- /dev/null
+++ b/ChangeLog.d/iar-gcc-perf.txt
@@ -0,0 +1,2 @@
+Features
+   * Improve performance for gcc (versions older than 9.3.0) and IAR.
diff --git a/doxygen/input/doc_mainpage.h b/doxygen/input/doc_mainpage.h
index c391c59..17762d7 100644
--- a/doxygen/input/doc_mainpage.h
+++ b/doxygen/input/doc_mainpage.h
@@ -10,7 +10,7 @@
  */
 
 /**
- * @mainpage Mbed TLS v3.5.1 API Documentation
+ * @mainpage Mbed TLS v3.5.2 API Documentation
  *
  * This documentation describes the internal structure of Mbed TLS.  It was
  * automatically generated from specially formatted comment blocks in
diff --git a/doxygen/mbedtls.doxyfile b/doxygen/mbedtls.doxyfile
index 89048f2..cbbb759 100644
--- a/doxygen/mbedtls.doxyfile
+++ b/doxygen/mbedtls.doxyfile
@@ -1,4 +1,4 @@
-PROJECT_NAME           = "Mbed TLS v3.5.1"
+PROJECT_NAME           = "Mbed TLS v3.5.2"
 OUTPUT_DIRECTORY       = ../apidoc/
 FULL_PATH_NAMES        = NO
 OPTIMIZE_OUTPUT_FOR_C  = YES
diff --git a/include/mbedtls/build_info.h b/include/mbedtls/build_info.h
index 7a70e25..2f336ba 100644
--- a/include/mbedtls/build_info.h
+++ b/include/mbedtls/build_info.h
@@ -26,16 +26,16 @@
  */
 #define MBEDTLS_VERSION_MAJOR  3
 #define MBEDTLS_VERSION_MINOR  5
-#define MBEDTLS_VERSION_PATCH  1
+#define MBEDTLS_VERSION_PATCH  2
 
 /**
  * The single version number has the following structure:
  *    MMNNPP00
  *    Major version | Minor version | Patch version
  */
-#define MBEDTLS_VERSION_NUMBER         0x03050100
-#define MBEDTLS_VERSION_STRING         "3.5.1"
-#define MBEDTLS_VERSION_STRING_FULL    "Mbed TLS 3.5.1"
+#define MBEDTLS_VERSION_NUMBER         0x03050200
+#define MBEDTLS_VERSION_STRING         "3.5.2"
+#define MBEDTLS_VERSION_STRING_FULL    "Mbed TLS 3.5.2"
 
 /* Macros for build-time platform detection */
 
@@ -83,6 +83,14 @@
 #endif
 #endif
 
+#if defined(__GNUC__) && !defined(__ARMCC_VERSION) && !defined(__clang__) \
+    && !defined(__llvm__) && !defined(__INTEL_COMPILER)
+/* Defined if the compiler really is gcc and not clang, etc */
+#define MBEDTLS_COMPILER_IS_GCC
+#define MBEDTLS_GCC_VERSION \
+    (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif
+
 #if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
 #define _CRT_SECURE_NO_DEPRECATE 1
 #endif
diff --git a/include/mbedtls/rsa.h b/include/mbedtls/rsa.h
index e5e172f..9136375 100644
--- a/include/mbedtls/rsa.h
+++ b/include/mbedtls/rsa.h
@@ -684,6 +684,10 @@
  *                 It is the generic wrapper for performing a PKCS#1 decryption
  *                 operation.
  *
+ * \warning        When \p ctx->padding is set to #MBEDTLS_RSA_PKCS_V15,
+ *                 mbedtls_rsa_rsaes_pkcs1_v15_decrypt() is called, which is an
+ *                 inherently dangerous function (CWE-242).
+ *
  * \note           The output buffer length \c output_max_len should be
  *                 as large as the size \p ctx->len of \p ctx->N (for example,
  *                 128 Bytes if RSA-1024 is used) to be able to hold an
@@ -720,6 +724,11 @@
  * \brief          This function performs a PKCS#1 v1.5 decryption
  *                 operation (RSAES-PKCS1-v1_5-DECRYPT).
  *
+ * \warning        This is an inherently dangerous function (CWE-242). Unless
+ *                 it is used in a side channel free and safe way (eg.
+ *                 implementing the TLS protocol as per 7.4.7.1 of RFC 5246),
+ *                 the calling code is vulnerable.
+ *
  * \note           The output buffer length \c output_max_len should be
  *                 as large as the size \p ctx->len of \p ctx->N, for example,
  *                 128 Bytes if RSA-1024 is used, to be able to hold an
diff --git a/include/psa/crypto_values.h b/include/psa/crypto_values.h
index 90d98fd..f311acb 100644
--- a/include/psa/crypto_values.h
+++ b/include/psa/crypto_values.h
@@ -1756,6 +1756,13 @@
      0)
 
 /** RSA PKCS#1 v1.5 encryption.
+ *
+ * \warning     Calling psa_asymmetric_decrypt() with this algorithm as a
+ *              parameter is considered an inherently dangerous function
+ *              (CWE-242). Unless it is used in a side channel free and safe
+ *              way (eg. implementing the TLS protocol as per 7.4.7.1 of
+ *              RFC 5246), the calling code is vulnerable.
+ *
  */
 #define PSA_ALG_RSA_PKCS1V15_CRYPT              ((psa_algorithm_t) 0x07000200)
 
diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt
index 5c297e0..b6ea73e 100644
--- a/library/CMakeLists.txt
+++ b/library/CMakeLists.txt
@@ -297,7 +297,7 @@
 if(USE_SHARED_MBEDTLS_LIBRARY)
     set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR})
     add_library(${mbedcrypto_target} SHARED ${src_crypto})
-    set_target_properties(${mbedcrypto_target} PROPERTIES VERSION 3.5.1 SOVERSION 15)
+    set_target_properties(${mbedcrypto_target} PROPERTIES VERSION 3.5.2 SOVERSION 15)
     target_link_libraries(${mbedcrypto_target} PUBLIC ${libs})
 
     if(TARGET ${everest_target})
@@ -309,11 +309,11 @@
     endif()
 
     add_library(${mbedx509_target} SHARED ${src_x509})
-    set_target_properties(${mbedx509_target} PROPERTIES VERSION 3.5.1 SOVERSION 6)
+    set_target_properties(${mbedx509_target} PROPERTIES VERSION 3.5.2 SOVERSION 6)
     target_link_libraries(${mbedx509_target} PUBLIC ${libs} ${mbedcrypto_target})
 
     add_library(${mbedtls_target} SHARED ${src_tls})
-    set_target_properties(${mbedtls_target} PROPERTIES VERSION 3.5.1 SOVERSION 20)
+    set_target_properties(${mbedtls_target} PROPERTIES VERSION 3.5.2 SOVERSION 20)
     target_link_libraries(${mbedtls_target} PUBLIC ${libs} ${mbedx509_target})
 endif(USE_SHARED_MBEDTLS_LIBRARY)
 
diff --git a/library/alignment.h b/library/alignment.h
index 9e1e044..248f29b 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -37,6 +37,52 @@
 #define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS
 #endif
 
+#if defined(__IAR_SYSTEMS_ICC__) && \
+    (defined(MBEDTLS_ARCH_IS_ARM64) || defined(MBEDTLS_ARCH_IS_ARM32) \
+    || defined(__ICCRX__) || defined(__ICCRL78__) || defined(__ICCRISCV__))
+#pragma language=save
+#pragma language=extended
+#define MBEDTLS_POP_IAR_LANGUAGE_PRAGMA
+/* IAR recommend this technique for accessing unaligned data in
+ * https://www.iar.com/knowledge/support/technical-notes/compiler/accessing-unaligned-data
+ * This results in a single load / store instruction (if unaligned access is supported).
+ * According to that document, this is only supported on certain architectures.
+ */
+    #define UINT_UNALIGNED
+typedef uint16_t __packed mbedtls_uint16_unaligned_t;
+typedef uint32_t __packed mbedtls_uint32_unaligned_t;
+typedef uint64_t __packed mbedtls_uint64_unaligned_t;
+#elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \
+    ((MBEDTLS_GCC_VERSION < 90300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)))
+/*
+ * Old versions of gcc, depending on how the target is specified, may generate a branch to memcpy
+ * for calls like `memcpy(dest, src, 4)` rather than generating some LDR or LDRB instructions
+ * (similar for stores).
+ * Recent versions where unaligned access is not enabled also do this.
+ *
+ * For performance (and code size, in some cases), we want to avoid the branch and just generate
+ * some inline load/store instructions since the access is small and constant-size.
+ *
+ * The manual states:
+ * "The aligned attribute specifies a minimum alignment for the variable or structure field,
+ * measured in bytes."
+ * https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html
+ *
+ * Tested with several versions of GCC from 4.5.0 up to 9.3.0
+ * We don't enable for older than 4.5.0 as this has not been tested.
+ */
+ #define UINT_UNALIGNED
+typedef uint16_t __attribute__((__aligned__(1))) mbedtls_uint16_unaligned_t;
+typedef uint32_t __attribute__((__aligned__(1))) mbedtls_uint32_unaligned_t;
+typedef uint64_t __attribute__((__aligned__(1))) mbedtls_uint64_unaligned_t;
+ #endif
+
+/*
+ * We try to force mbedtls_(get|put)_unaligned_uintXX to be always inline, because this results
+ * in code that is both smaller and faster. IAR and gcc both benefit from this when optimising
+ * for size.
+ */
+
 /**
  * Read the unsigned 16 bits integer from the given address, which need not
  * be aligned.
@@ -44,10 +90,20 @@
  * \param   p pointer to 2 bytes of data
  * \return  Data at the given address
  */
-inline uint16_t mbedtls_get_unaligned_uint16(const void *p)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline uint16_t mbedtls_get_unaligned_uint16(const void *p)
 {
     uint16_t r;
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+    r = *p16;
+#else
     memcpy(&r, p, sizeof(r));
+#endif
     return r;
 }
 
@@ -58,9 +114,19 @@
  * \param   p pointer to 2 bytes of data
  * \param   x data to write
  */
-inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x)
 {
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+    *p16 = x;
+#else
     memcpy(p, &x, sizeof(x));
+#endif
 }
 
 /**
@@ -70,10 +136,20 @@
  * \param   p pointer to 4 bytes of data
  * \return  Data at the given address
  */
-inline uint32_t mbedtls_get_unaligned_uint32(const void *p)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline uint32_t mbedtls_get_unaligned_uint32(const void *p)
 {
     uint32_t r;
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+    r = *p32;
+#else
     memcpy(&r, p, sizeof(r));
+#endif
     return r;
 }
 
@@ -84,9 +160,19 @@
  * \param   p pointer to 4 bytes of data
  * \param   x data to write
  */
-inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
 {
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+    *p32 = x;
+#else
     memcpy(p, &x, sizeof(x));
+#endif
 }
 
 /**
@@ -96,10 +182,20 @@
  * \param   p pointer to 8 bytes of data
  * \return  Data at the given address
  */
-inline uint64_t mbedtls_get_unaligned_uint64(const void *p)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline uint64_t mbedtls_get_unaligned_uint64(const void *p)
 {
     uint64_t r;
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+    r = *p64;
+#else
     memcpy(&r, p, sizeof(r));
+#endif
     return r;
 }
 
@@ -110,11 +206,25 @@
  * \param   p pointer to 8 bytes of data
  * \param   x data to write
  */
-inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x)
 {
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+    *p64 = x;
+#else
     memcpy(p, &x, sizeof(x));
+#endif
 }
 
+#if defined(MBEDTLS_POP_IAR_LANGUAGE_PRAGMA)
+#pragma language=restore
+#endif
+
 /** Byte Reading Macros
  *
  * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
diff --git a/library/common.h b/library/common.h
index e532777..3936ffd 100644
--- a/library/common.h
+++ b/library/common.h
@@ -27,15 +27,6 @@
 #define MBEDTLS_HAVE_NEON_INTRINSICS
 #endif
 
-
-#if defined(__GNUC__) && !defined(__ARMCC_VERSION) && !defined(__clang__) \
-    && !defined(__llvm__) && !defined(__INTEL_COMPILER)
-/* Defined if the compiler really is gcc and not clang, etc */
-#define MBEDTLS_COMPILER_IS_GCC
-#define MBEDTLS_GCC_VERSION \
-    (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#endif
-
 /** Helper to define a function as static except when building invasive tests.
  *
  * If a function is only used inside its own source file and should be
@@ -167,6 +158,12 @@
     return p == NULL ? NULL : p + n;
 }
 
+/* Always inline mbedtls_xor() for similar reasons as mbedtls_xor_no_simd(). */
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
 /**
  * Perform a fast block XOR operation, such that
  * r[i] = a[i] ^ b[i] where 0 <= i < n
@@ -177,8 +174,19 @@
  * \param   a Pointer to input (buffer of at least \p n bytes)
  * \param   b Pointer to input (buffer of at least \p n bytes)
  * \param   n Number of bytes to process.
+ *
+ * \note      Depending on the situation, it may be faster to use either mbedtls_xor() or
+ *            mbedtls_xor_no_simd() (these are functionally equivalent).
+ *            If the result is used immediately after the xor operation in non-SIMD code (e.g, in
+ *            AES-CBC), there may be additional latency to transfer the data from SIMD to scalar
+ *            registers, and in this case, mbedtls_xor_no_simd() may be faster. In other cases where
+ *            the result is not used immediately (e.g., in AES-CTR), mbedtls_xor() may be faster.
+ *            For targets without SIMD support, they will behave the same.
  */
-inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n)
+static inline void mbedtls_xor(unsigned char *r,
+                               const unsigned char *a,
+                               const unsigned char *b,
+                               size_t n)
 {
     size_t i = 0;
 #if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
@@ -191,17 +199,36 @@
         uint8x16_t x = veorq_u8(v1, v2);
         vst1q_u8(r + i, x);
     }
+#if defined(__IAR_SYSTEMS_ICC__)
+    /* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
+     * where n is a constant multiple of 16.
+     * For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time
+     * constant, and is a very small perf regression if n is not a compile-time constant. */
+    if (n % 16 == 0) {
+        return;
+    }
+#endif
 #elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64)
     /* This codepath probably only makes sense on architectures with 64-bit registers */
     for (; (i + 8) <= n; i += 8) {
         uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
         mbedtls_put_unaligned_uint64(r + i, x);
     }
+#if defined(__IAR_SYSTEMS_ICC__)
+    if (n % 8 == 0) {
+        return;
+    }
+#endif
 #else
     for (; (i + 4) <= n; i += 4) {
         uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
         mbedtls_put_unaligned_uint32(r + i, x);
     }
+#if defined(__IAR_SYSTEMS_ICC__)
+    if (n % 4 == 0) {
+        return;
+    }
+#endif
 #endif
 #endif
     for (; i < n; i++) {
@@ -209,11 +236,18 @@
     }
 }
 
+/* Always inline mbedtls_xor_no_simd() as we see significant perf regressions when it does not get
+ * inlined (e.g., observed about 3x perf difference in gcm_mult_largetable with gcc 7 - 12) */
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
 /**
  * Perform a fast block XOR operation, such that
  * r[i] = a[i] ^ b[i] where 0 <= i < n
  *
- * In some situations, this can perform better than mbedtls_xor (e.g., it's about 5%
+ * In some situations, this can perform better than mbedtls_xor() (e.g., it's about 5%
  * better in AES-CBC).
  *
  * \param   r Pointer to result (buffer of at least \p n bytes). \p r
@@ -222,6 +256,14 @@
  * \param   a Pointer to input (buffer of at least \p n bytes)
  * \param   b Pointer to input (buffer of at least \p n bytes)
  * \param   n Number of bytes to process.
+ *
+ * \note      Depending on the situation, it may be faster to use either mbedtls_xor() or
+ *            mbedtls_xor_no_simd() (these are functionally equivalent).
+ *            If the result is used immediately after the xor operation in non-SIMD code (e.g, in
+ *            AES-CBC), there may be additional latency to transfer the data from SIMD to scalar
+ *            registers, and in this case, mbedtls_xor_no_simd() may be faster. In other cases where
+ *            the result is not used immediately (e.g., in AES-CTR), mbedtls_xor() may be faster.
+ *            For targets without SIMD support, they will behave the same.
  */
 static inline void mbedtls_xor_no_simd(unsigned char *r,
                                        const unsigned char *a,
@@ -236,11 +278,25 @@
         uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
         mbedtls_put_unaligned_uint64(r + i, x);
     }
+#if defined(__IAR_SYSTEMS_ICC__)
+    /* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
+     * where n is a constant multiple of 8.
+     * For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time
+     * constant, and is a very small perf regression if n is not a compile-time constant. */
+    if (n % 8 == 0) {
+        return;
+    }
+#endif
 #else
     for (; (i + 4) <= n; i += 4) {
         uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
         mbedtls_put_unaligned_uint32(r + i, x);
     }
+#if defined(__IAR_SYSTEMS_ICC__)
+    if (n % 4 == 0) {
+        return;
+    }
+#endif
 #endif
 #endif
     for (; i < n; i++) {
diff --git a/library/platform_util.c b/library/platform_util.c
index 63643d2..9f5dcb8 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -217,27 +217,6 @@
 void (*mbedtls_test_hook_test_fail)(const char *, int, const char *);
 #endif /* MBEDTLS_TEST_HOOKS */
 
-/*
- * Provide external definitions of some inline functions so that the compiler
- * has the option to not inline them
- */
-extern inline void mbedtls_xor(unsigned char *r,
-                               const unsigned char *a,
-                               const unsigned char *b,
-                               size_t n);
-
-extern inline uint16_t mbedtls_get_unaligned_uint16(const void *p);
-
-extern inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x);
-
-extern inline uint32_t mbedtls_get_unaligned_uint32(const void *p);
-
-extern inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x);
-
-extern inline uint64_t mbedtls_get_unaligned_uint64(const void *p);
-
-extern inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x);
-
 #if defined(MBEDTLS_HAVE_TIME) && !defined(MBEDTLS_PLATFORM_MS_TIME_ALT)
 
 #include <time.h>
diff --git a/library/rsa.c b/library/rsa.c
index 2b9f85b..a90b83a 100644
--- a/library/rsa.c
+++ b/library/rsa.c
@@ -28,6 +28,7 @@
 #if defined(MBEDTLS_RSA_C)
 
 #include "mbedtls/rsa.h"
+#include "bignum_core.h"
 #include "rsa_alt_helpers.h"
 #include "rsa_internal.h"
 #include "mbedtls/oid.h"
@@ -971,6 +972,45 @@
 }
 
 /*
+ * Unblind
+ * T = T * Vf mod N
+ */
+static int rsa_unblind(mbedtls_mpi *T, mbedtls_mpi *Vf, const mbedtls_mpi *N)
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+    const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N->p);
+    const size_t nlimbs = N->n;
+    const size_t tlimbs = mbedtls_mpi_core_montmul_working_limbs(nlimbs);
+    mbedtls_mpi RR, M_T;
+
+    mbedtls_mpi_init(&RR);
+    mbedtls_mpi_init(&M_T);
+
+    MBEDTLS_MPI_CHK(mbedtls_mpi_core_get_mont_r2_unsafe(&RR, N));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&M_T, tlimbs));
+
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(T, nlimbs));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(Vf, nlimbs));
+
+    /* T = T * Vf mod N
+     * Reminder: montmul(A, B, N) = A * B * R^-1 mod N
+     * Usually both operands are multiplied by R mod N beforehand (by calling
+     * `to_mont_rep()` on them), yielding a result that's also * R mod N (aka
+     * "in the Montgomery domain"). Here we only multiply one operand by R mod
+     * N, so the result is directly what we want - no need to call
+     * `from_mont_rep()` on it. */
+    mbedtls_mpi_core_to_mont_rep(T->p, T->p, N->p, nlimbs, mm, RR.p, M_T.p);
+    mbedtls_mpi_core_montmul(T->p, T->p, Vf->p, nlimbs, N->p, nlimbs, mm, M_T.p);
+
+cleanup:
+
+    mbedtls_mpi_free(&RR);
+    mbedtls_mpi_free(&M_T);
+
+    return ret;
+}
+
+/*
  * Exponent blinding supposed to prevent side-channel attacks using multiple
  * traces of measurements to recover the RSA key. The more collisions are there,
  * the more bits of the key can be recovered. See [3].
@@ -1017,23 +1057,14 @@
     /* Temporaries holding the blinded exponents for
      * the mod p resp. mod q computation (if used). */
     mbedtls_mpi DP_blind, DQ_blind;
-
-    /* Pointers to actual exponents to be used - either the unblinded
-     * or the blinded ones, depending on the presence of a PRNG. */
-    mbedtls_mpi *DP = &ctx->DP;
-    mbedtls_mpi *DQ = &ctx->DQ;
 #else
     /* Temporary holding the blinded exponent (if used). */
     mbedtls_mpi D_blind;
-
-    /* Pointer to actual exponent to be used - either the unblinded
-     * or the blinded one, depending on the presence of a PRNG. */
-    mbedtls_mpi *D = &ctx->D;
 #endif /* MBEDTLS_RSA_NO_CRT */
 
     /* Temporaries holding the initial input and the double
      * checked result; should be the same in the end. */
-    mbedtls_mpi I, C;
+    mbedtls_mpi input_blinded, check_result_blinded;
 
     if (f_rng == NULL) {
         return MBEDTLS_ERR_RSA_BAD_INPUT_DATA;
@@ -1068,8 +1099,8 @@
     mbedtls_mpi_init(&TP); mbedtls_mpi_init(&TQ);
 #endif
 
-    mbedtls_mpi_init(&I);
-    mbedtls_mpi_init(&C);
+    mbedtls_mpi_init(&input_blinded);
+    mbedtls_mpi_init(&check_result_blinded);
 
     /* End of MPI initialization */
 
@@ -1079,8 +1110,6 @@
         goto cleanup;
     }
 
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&I, &T));
-
     /*
      * Blinding
      * T = T * Vi mod N
@@ -1089,6 +1118,8 @@
     MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&T, &T, &ctx->Vi));
     MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&T, &T, &ctx->N));
 
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&input_blinded, &T));
+
     /*
      * Exponent blinding
      */
@@ -1104,8 +1135,6 @@
     MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&D_blind, &P1, &Q1));
     MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&D_blind, &D_blind, &R));
     MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&D_blind, &D_blind, &ctx->D));
-
-    D = &D_blind;
 #else
     /*
      * DP_blind = ( P - 1 ) * R + DP
@@ -1116,8 +1145,6 @@
     MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&DP_blind, &DP_blind,
                                         &ctx->DP));
 
-    DP = &DP_blind;
-
     /*
      * DQ_blind = ( Q - 1 ) * R + DQ
      */
@@ -1126,12 +1153,10 @@
     MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&DQ_blind, &Q1, &R));
     MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&DQ_blind, &DQ_blind,
                                         &ctx->DQ));
-
-    DQ = &DQ_blind;
 #endif /* MBEDTLS_RSA_NO_CRT */
 
 #if defined(MBEDTLS_RSA_NO_CRT)
-    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&T, &T, D, &ctx->N, &ctx->RN));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&T, &T, &D_blind, &ctx->N, &ctx->RN));
 #else
     /*
      * Faster decryption using the CRT
@@ -1140,8 +1165,8 @@
      * TQ = input ^ dQ mod Q
      */
 
-    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&TP, &T, DP, &ctx->P, &ctx->RP));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&TQ, &T, DQ, &ctx->Q, &ctx->RQ));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&TP, &T, &DP_blind, &ctx->P, &ctx->RP));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&TQ, &T, &DQ_blind, &ctx->Q, &ctx->RQ));
 
     /*
      * T = (TP - TQ) * (Q^-1 mod P) mod P
@@ -1157,20 +1182,19 @@
     MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&T, &TQ, &TP));
 #endif /* MBEDTLS_RSA_NO_CRT */
 
+    /* Verify the result to prevent glitching attacks. */
+    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&check_result_blinded, &T, &ctx->E,
+                                        &ctx->N, &ctx->RN));
+    if (mbedtls_mpi_cmp_mpi(&check_result_blinded, &input_blinded) != 0) {
+        ret = MBEDTLS_ERR_RSA_VERIFY_FAILED;
+        goto cleanup;
+    }
+
     /*
      * Unblind
      * T = T * Vf mod N
      */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&T, &T, &ctx->Vf));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&T, &T, &ctx->N));
-
-    /* Verify the result to prevent glitching attacks. */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&C, &T, &ctx->E,
-                                        &ctx->N, &ctx->RN));
-    if (mbedtls_mpi_cmp_mpi(&C, &I) != 0) {
-        ret = MBEDTLS_ERR_RSA_VERIFY_FAILED;
-        goto cleanup;
-    }
+    MBEDTLS_MPI_CHK(rsa_unblind(&T, &ctx->Vf, &ctx->N));
 
     olen = ctx->len;
     MBEDTLS_MPI_CHK(mbedtls_mpi_write_binary(&T, output, olen));
@@ -1199,8 +1223,8 @@
     mbedtls_mpi_free(&TP); mbedtls_mpi_free(&TQ);
 #endif
 
-    mbedtls_mpi_free(&C);
-    mbedtls_mpi_free(&I);
+    mbedtls_mpi_free(&check_result_blinded);
+    mbedtls_mpi_free(&input_blinded);
 
     if (ret != 0 && ret >= -0x007f) {
         return MBEDTLS_ERROR_ADD(MBEDTLS_ERR_RSA_PRIVATE_FAILED, ret);
diff --git a/programs/pkey/dh_client.c b/programs/pkey/dh_client.c
index 0cb1562..165cee2 100644
--- a/programs/pkey/dh_client.c
+++ b/programs/pkey/dh_client.c
@@ -13,14 +13,13 @@
 
 #if defined(MBEDTLS_AES_C) && defined(MBEDTLS_DHM_C) && \
     defined(MBEDTLS_ENTROPY_C) && defined(MBEDTLS_NET_C) && \
-    defined(MBEDTLS_RSA_C) && defined(MBEDTLS_MD_CAN_SHA256) && \
-    defined(MBEDTLS_FS_IO) && defined(MBEDTLS_CTR_DRBG_C) && \
-    defined(MBEDTLS_MD_CAN_SHA1)
+    defined(MBEDTLS_RSA_C) && defined(MBEDTLS_SHA256_C) && \
+    defined(MBEDTLS_FS_IO) && defined(MBEDTLS_CTR_DRBG_C)
 #include "mbedtls/net_sockets.h"
 #include "mbedtls/aes.h"
 #include "mbedtls/dhm.h"
 #include "mbedtls/rsa.h"
-#include "mbedtls/sha1.h"
+#include "mbedtls/sha256.h"
 #include "mbedtls/entropy.h"
 #include "mbedtls/ctr_drbg.h"
 
@@ -33,9 +32,8 @@
 
 #if !defined(MBEDTLS_AES_C) || !defined(MBEDTLS_DHM_C) ||     \
     !defined(MBEDTLS_ENTROPY_C) || !defined(MBEDTLS_NET_C) ||  \
-    !defined(MBEDTLS_RSA_C) || !defined(MBEDTLS_MD_CAN_SHA256) ||    \
-    !defined(MBEDTLS_FS_IO) || !defined(MBEDTLS_CTR_DRBG_C) || \
-    !defined(MBEDTLS_SHA1_C)
+    !defined(MBEDTLS_RSA_C) || !defined(MBEDTLS_SHA256_C) ||    \
+    !defined(MBEDTLS_FS_IO) || !defined(MBEDTLS_CTR_DRBG_C)
 int main(void)
 {
     mbedtls_printf("MBEDTLS_AES_C and/or MBEDTLS_DHM_C and/or MBEDTLS_ENTROPY_C "
@@ -60,12 +58,14 @@
 
     int ret = 1;
     int exit_code = MBEDTLS_EXIT_FAILURE;
+    unsigned int mdlen;
     size_t n, buflen;
     mbedtls_net_context server_fd;
 
     unsigned char *p, *end;
     unsigned char buf[2048];
-    unsigned char hash[32];
+    unsigned char hash[MBEDTLS_MD_MAX_SIZE];
+    mbedtls_mpi N, E;
     const char *pers = "dh_client";
 
     mbedtls_entropy_context entropy;
@@ -78,6 +78,8 @@
     mbedtls_dhm_init(&dhm);
     mbedtls_aes_init(&aes);
     mbedtls_ctr_drbg_init(&ctr_drbg);
+    mbedtls_mpi_init(&N);
+    mbedtls_mpi_init(&E);
 
     /*
      * 1. Setup the RNG
@@ -106,16 +108,13 @@
     }
 
     mbedtls_rsa_init(&rsa);
-
-    if ((ret = mbedtls_mpi_read_file(&rsa.MBEDTLS_PRIVATE(N), 16, f)) != 0 ||
-        (ret = mbedtls_mpi_read_file(&rsa.MBEDTLS_PRIVATE(E), 16, f)) != 0) {
+    if ((ret = mbedtls_mpi_read_file(&N, 16, f)) != 0 ||
+        (ret = mbedtls_mpi_read_file(&E, 16, f)) != 0 ||
+        (ret = mbedtls_rsa_import(&rsa, &N, NULL, NULL, NULL, &E) != 0)) {
         mbedtls_printf(" failed\n  ! mbedtls_mpi_read_file returned %d\n\n", ret);
         fclose(f);
         goto exit;
     }
-
-    rsa.MBEDTLS_PRIVATE(len) = (mbedtls_mpi_bitlen(&rsa.MBEDTLS_PRIVATE(N)) + 7) >> 3;
-
     fclose(f);
 
     /*
@@ -182,18 +181,24 @@
 
     p += 2;
 
-    if ((n = (size_t) (end - p)) != rsa.MBEDTLS_PRIVATE(len)) {
+    if ((n = (size_t) (end - p)) != mbedtls_rsa_get_len(&rsa)) {
         mbedtls_printf(" failed\n  ! Invalid RSA signature size\n\n");
         goto exit;
     }
 
-    if ((ret = mbedtls_sha1(buf, (int) (p - 2 - buf), hash)) != 0) {
-        mbedtls_printf(" failed\n  ! mbedtls_sha1 returned %d\n\n", ret);
+    mdlen = (unsigned int) mbedtls_md_get_size(mbedtls_md_info_from_type(MBEDTLS_MD_SHA256));
+    if (mdlen == 0) {
+        mbedtls_printf(" failed\n  ! Invalid digest type\n\n");
+        goto exit;
+    }
+
+    if ((ret = mbedtls_sha256(buf, (int) (p - 2 - buf), hash, 0)) != 0) {
+        mbedtls_printf(" failed\n  ! mbedtls_sha256 returned %d\n\n", ret);
         goto exit;
     }
 
     if ((ret = mbedtls_rsa_pkcs1_verify(&rsa, MBEDTLS_MD_SHA256,
-                                        32, hash, p)) != 0) {
+                                        mdlen, hash, p)) != 0) {
         mbedtls_printf(" failed\n  ! mbedtls_rsa_pkcs1_verify returned %d\n\n", ret);
         goto exit;
     }
@@ -273,6 +278,8 @@
     mbedtls_dhm_free(&dhm);
     mbedtls_ctr_drbg_free(&ctr_drbg);
     mbedtls_entropy_free(&entropy);
+    mbedtls_mpi_free(&N);
+    mbedtls_mpi_free(&E);
 
     mbedtls_exit(exit_code);
 }
diff --git a/programs/pkey/dh_server.c b/programs/pkey/dh_server.c
index adddbf2..91bac0e 100644
--- a/programs/pkey/dh_server.c
+++ b/programs/pkey/dh_server.c
@@ -13,14 +13,13 @@
 
 #if defined(MBEDTLS_AES_C) && defined(MBEDTLS_DHM_C) && \
     defined(MBEDTLS_ENTROPY_C) && defined(MBEDTLS_NET_C) && \
-    defined(MBEDTLS_RSA_C) && defined(MBEDTLS_MD_CAN_SHA256) && \
-    defined(MBEDTLS_FS_IO) && defined(MBEDTLS_CTR_DRBG_C) && \
-    defined(MBEDTLS_MD_CAN_SHA1)
+    defined(MBEDTLS_RSA_C) && defined(MBEDTLS_SHA256_C) && \
+    defined(MBEDTLS_FS_IO) && defined(MBEDTLS_CTR_DRBG_C)
 #include "mbedtls/net_sockets.h"
 #include "mbedtls/aes.h"
 #include "mbedtls/dhm.h"
 #include "mbedtls/rsa.h"
-#include "mbedtls/sha1.h"
+#include "mbedtls/sha256.h"
 #include "mbedtls/entropy.h"
 #include "mbedtls/ctr_drbg.h"
 
@@ -33,9 +32,8 @@
 
 #if !defined(MBEDTLS_AES_C) || !defined(MBEDTLS_DHM_C) ||     \
     !defined(MBEDTLS_ENTROPY_C) || !defined(MBEDTLS_NET_C) ||  \
-    !defined(MBEDTLS_RSA_C) || !defined(MBEDTLS_MD_CAN_SHA256) ||    \
-    !defined(MBEDTLS_FS_IO) || !defined(MBEDTLS_CTR_DRBG_C) || \
-    !defined(MBEDTLS_SHA1_C)
+    !defined(MBEDTLS_RSA_C) || !defined(MBEDTLS_SHA256_C) ||    \
+    !defined(MBEDTLS_FS_IO) || !defined(MBEDTLS_CTR_DRBG_C)
 int main(void)
 {
     mbedtls_printf("MBEDTLS_AES_C and/or MBEDTLS_DHM_C and/or MBEDTLS_ENTROPY_C "
@@ -53,11 +51,12 @@
 
     int ret = 1;
     int exit_code = MBEDTLS_EXIT_FAILURE;
+    unsigned int mdlen;
     size_t n, buflen;
     mbedtls_net_context listen_fd, client_fd;
 
     unsigned char buf[2048];
-    unsigned char hash[32];
+    unsigned char hash[MBEDTLS_MD_MAX_SIZE];
     unsigned char buf2[2];
     const char *pers = "dh_server";
 
@@ -186,21 +185,30 @@
     /*
      * 5. Sign the parameters and send them
      */
-    if ((ret = mbedtls_sha1(buf, n, hash)) != 0) {
-        mbedtls_printf(" failed\n  ! mbedtls_sha1 returned %d\n\n", ret);
+
+    mdlen = (unsigned int) mbedtls_md_get_size(mbedtls_md_info_from_type(MBEDTLS_MD_SHA256));
+    if (mdlen == 0) {
+        mbedtls_printf(" failed\n  ! Invalid digest type\n\n");
         goto exit;
     }
 
-    buf[n] = (unsigned char) (rsa.MBEDTLS_PRIVATE(len) >> 8);
-    buf[n + 1] = (unsigned char) (rsa.MBEDTLS_PRIVATE(len));
+    if ((ret = mbedtls_sha256(buf, n, hash, 0)) != 0) {
+        mbedtls_printf(" failed\n  ! mbedtls_sha256 returned %d\n\n", ret);
+        goto exit;
+    }
 
-    if ((ret = mbedtls_rsa_pkcs1_sign(&rsa, NULL, NULL, MBEDTLS_MD_SHA256,
-                                      32, hash, buf + n + 2)) != 0) {
+    const size_t rsa_key_len = mbedtls_rsa_get_len(&rsa);
+    buf[n] = (unsigned char) (rsa_key_len >> 8);
+    buf[n + 1] = (unsigned char) (rsa_key_len);
+
+    if ((ret = mbedtls_rsa_pkcs1_sign(&rsa, mbedtls_ctr_drbg_random, &ctr_drbg,
+                                      MBEDTLS_MD_SHA256, mdlen,
+                                      hash, buf + n + 2)) != 0) {
         mbedtls_printf(" failed\n  ! mbedtls_rsa_pkcs1_sign returned %d\n\n", ret);
         goto exit;
     }
 
-    buflen = n + 2 + rsa.MBEDTLS_PRIVATE(len);
+    buflen = n + 2 + rsa_key_len;
     buf2[0] = (unsigned char) (buflen >> 8);
     buf2[1] = (unsigned char) (buflen);
 
diff --git a/programs/pkey/rsa_decrypt.c b/programs/pkey/rsa_decrypt.c
index 76bfddf..a84af50 100644
--- a/programs/pkey/rsa_decrypt.c
+++ b/programs/pkey/rsa_decrypt.c
@@ -133,7 +133,7 @@
 
     fclose(f);
 
-    if (i != rsa.MBEDTLS_PRIVATE(len)) {
+    if (i != mbedtls_rsa_get_len(&rsa)) {
         mbedtls_printf("\n  ! Invalid RSA signature format\n\n");
         goto exit;
     }
diff --git a/programs/pkey/rsa_encrypt.c b/programs/pkey/rsa_encrypt.c
index 4bbb54e..6538f8a 100644
--- a/programs/pkey/rsa_encrypt.c
+++ b/programs/pkey/rsa_encrypt.c
@@ -126,7 +126,7 @@
         goto exit;
     }
 
-    for (i = 0; i < rsa.MBEDTLS_PRIVATE(len); i++) {
+    for (i = 0; i < mbedtls_rsa_get_len(&rsa); i++) {
         mbedtls_fprintf(f, "%02X%s", buf[i],
                         (i + 1) % 16 == 0 ? "\r\n" : " ");
     }
diff --git a/programs/pkey/rsa_sign.c b/programs/pkey/rsa_sign.c
index 9d8ebe3..e14953b 100644
--- a/programs/pkey/rsa_sign.c
+++ b/programs/pkey/rsa_sign.c
@@ -131,7 +131,7 @@
         goto exit;
     }
 
-    for (i = 0; i < rsa.MBEDTLS_PRIVATE(len); i++) {
+    for (i = 0; i < mbedtls_rsa_get_len(&rsa); i++) {
         mbedtls_fprintf(f, "%02X%s", buf[i],
                         (i + 1) % 16 == 0 ? "\r\n" : " ");
     }
diff --git a/programs/pkey/rsa_verify.c b/programs/pkey/rsa_verify.c
index e7d72fd..4a9af77 100644
--- a/programs/pkey/rsa_verify.c
+++ b/programs/pkey/rsa_verify.c
@@ -37,11 +37,14 @@
     int exit_code = MBEDTLS_EXIT_FAILURE;
     size_t i;
     mbedtls_rsa_context rsa;
+    mbedtls_mpi N, E;
     unsigned char hash[32];
     unsigned char buf[MBEDTLS_MPI_MAX_SIZE];
     char filename[512];
 
     mbedtls_rsa_init(&rsa);
+    mbedtls_mpi_init(&N);
+    mbedtls_mpi_init(&E);
 
     if (argc != 2) {
         mbedtls_printf("usage: rsa_verify <filename>\n");
@@ -62,15 +65,13 @@
         goto exit;
     }
 
-    if ((ret = mbedtls_mpi_read_file(&rsa.MBEDTLS_PRIVATE(N), 16, f)) != 0 ||
-        (ret = mbedtls_mpi_read_file(&rsa.MBEDTLS_PRIVATE(E), 16, f)) != 0) {
+    if ((ret = mbedtls_mpi_read_file(&N, 16, f)) != 0 ||
+        (ret = mbedtls_mpi_read_file(&E, 16, f)) != 0 ||
+        (ret = mbedtls_rsa_import(&rsa, &N, NULL, NULL, NULL, &E) != 0)) {
         mbedtls_printf(" failed\n  ! mbedtls_mpi_read_file returned %d\n\n", ret);
         fclose(f);
         goto exit;
     }
-
-    rsa.MBEDTLS_PRIVATE(len) = (mbedtls_mpi_bitlen(&rsa.MBEDTLS_PRIVATE(N)) + 7) >> 3;
-
     fclose(f);
 
     /*
@@ -91,7 +92,7 @@
 
     fclose(f);
 
-    if (i != rsa.MBEDTLS_PRIVATE(len)) {
+    if (i != mbedtls_rsa_get_len(&rsa)) {
         mbedtls_printf("\n  ! Invalid RSA signature format\n\n");
         goto exit;
     }
@@ -124,6 +125,8 @@
 exit:
 
     mbedtls_rsa_free(&rsa);
+    mbedtls_mpi_free(&N);
+    mbedtls_mpi_free(&E);
 
     mbedtls_exit(exit_code);
 }
diff --git a/tests/include/test/macros.h b/tests/include/test/macros.h
index 8de9c4d..a73e06f 100644
--- a/tests/include/test/macros.h
+++ b/tests/include/test/macros.h
@@ -125,8 +125,8 @@
     do {                                                    \
         TEST_ASSERT((pointer) == NULL);                     \
         if ((item_count) != 0) {                            \
-            (pointer) = mbedtls_calloc(sizeof(*(pointer)),  \
-                                       (item_count));       \
+            (pointer) = mbedtls_calloc((item_count),        \
+                                       sizeof(*(pointer))); \
             TEST_ASSERT((pointer) != NULL);                 \
         }                                                   \
     } while (0)
@@ -155,8 +155,8 @@
 #define TEST_CALLOC_NONNULL(pointer, item_count)            \
     do {                                                    \
         TEST_ASSERT((pointer) == NULL);                     \
-        (pointer) = mbedtls_calloc(sizeof(*(pointer)),      \
-                                   (item_count));           \
+        (pointer) = mbedtls_calloc((item_count),            \
+                                   sizeof(*(pointer)));     \
         if (((pointer) == NULL) && ((item_count) == 0)) {   \
             (pointer) = mbedtls_calloc(1, 1);               \
         }                                                   \
@@ -175,8 +175,8 @@
     do {                                                    \
         TEST_ASSERT((pointer) == NULL);                     \
         if ((item_count) != 0) {                            \
-            (pointer) = mbedtls_calloc(sizeof(*(pointer)),  \
-                                       (item_count));       \
+            (pointer) = mbedtls_calloc((item_count),        \
+                                       sizeof(*(pointer))); \
             TEST_ASSUME((pointer) != NULL);                 \
         }                                                   \
     } while (0)
diff --git a/tests/suites/test_suite_version.data b/tests/suites/test_suite_version.data
index faa3166..6290331 100644
--- a/tests/suites/test_suite_version.data
+++ b/tests/suites/test_suite_version.data
@@ -1,8 +1,8 @@
 Check compile time library version
-check_compiletime_version:"3.5.1"
+check_compiletime_version:"3.5.2"
 
 Check runtime library version
-check_runtime_version:"3.5.1"
+check_runtime_version:"3.5.2"
 
 Check for MBEDTLS_VERSION_C
 check_feature:"MBEDTLS_VERSION_C":0