Merge pull request #5600 from yuhaoth/pr/refactor-cookie-members-of-handshake

Refactor cookie members of handshake
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 94fb020..50a4901 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -14,6 +14,6 @@
 
 ## Notes for the submitter
 
-Please refer to the [contributing guidelines](../CONTRIBUTING.md), especially the
+Please refer to the [contributing guidelines](https://github.com/Mbed-TLS/mbedtls/blob/development/CONTRIBUTING.md), especially the
 checklist for PR contributors.
 
diff --git a/.uncrustify.cfg b/.uncrustify.cfg
new file mode 100644
index 0000000..ac9173e
--- /dev/null
+++ b/.uncrustify.cfg
@@ -0,0 +1,254 @@
+# Configuration options for Uncrustify specifying the Mbed TLS code style.
+#
+# Note: The code style represented by this file has not yet been introduced
+# to Mbed TLS.
+#
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Line length options
+
+# Wrap lines at 100 characters
+code_width = 100
+
+# Allow splitting long for statements between the condition statements
+ls_for_split_full = true
+
+# Allow splitting function calls between arguments
+ls_func_split_full = true
+
+input_tab_size = 4
+
+# Spaces-only indentation
+indent_with_tabs = 0
+
+indent_columns = 4
+
+# Indent 'case' 1 level from 'switch'
+indent_switch_case = indent_columns
+
+# Line-up strings broken by '\'
+indent_align_string = true
+
+# Braces on the same line (Egyptian-style braces)
+nl_enum_brace = remove
+nl_union_brace = remove
+nl_struct_brace = remove
+nl_do_brace = remove
+nl_if_brace = remove
+nl_for_brace = remove
+nl_else_brace = remove
+nl_while_brace = remove
+nl_switch_brace = remove
+
+# Braces on same line as keywords that follow them - 'else' and the 'while' in 'do {} while ()';
+nl_brace_else = remove
+nl_brace_while = remove
+# Space before else on the same line
+sp_brace_else = add
+# If else is on the same line as '{', force exactly 1 space between them
+sp_else_brace = force
+
+# Functions are the exception and have braces on the next line
+nl_fcall_brace = add
+nl_fdef_brace = add
+
+# Force exactly one space between ')' and '{' in statements
+sp_sparen_brace = force
+
+# At least 1 space around assignment
+sp_assign = add
+
+# Remove spaces around the preprocessor '##' token-concatenate
+sp_pp_concat = ignore
+
+# At least 1 space around '||' and '&&'
+sp_bool = add
+
+# But no space after the '!' operator
+sp_not = remove
+
+# No space after the bitwise-not '~' operator
+sp_inv = remove
+
+# No space after the addressof '&' operator
+sp_addr = remove
+
+# No space around the member '.' and '->' operators
+sp_member = remove
+
+# No space after the dereference '*' operator
+sp_deref = remove
+
+# No space after a unary negation '-'
+sp_sign = remove
+
+# No space between the '++'/'--' operator and its operand
+sp_incdec = remove
+
+# At least 1 space around comparison operators
+sp_compare = add
+
+# Remove spaces inside all kinds of parentheses:
+
+# Remove spaces inside parentheses
+sp_inside_paren = remove
+
+# No spaces inside statement parentheses
+sp_inside_sparen = remove
+
+# No spaces inside cast parentheses '( char )x' -> '(char)x'
+sp_inside_paren_cast = remove
+
+# No spaces inside function parentheses
+sp_inside_fparen = remove
+# (The case where the function has no parameters/arguments)
+sp_inside_fparens = remove
+
+# No spaces inside the first parentheses in a function type
+sp_inside_tparen = remove
+
+# (Uncrustify >= 0.74.0) No spaces inside parens in for statements
+sp_inside_for = remove
+
+# Remove spaces between nested parentheses '( (' -> '(('
+sp_paren_paren = remove
+# (Uncrustify >= 0.74.0)
+sp_sparen_paren = remove
+
+# Remove spaces between ')' and adjacent '('
+sp_cparen_oparen = remove
+
+# (Uncrustify >= 0.73.0) space between 'do' and '{'
+sp_do_brace_open = force
+
+# (Uncrustify >= 0.73.0) space between '}' and 'while'
+sp_brace_close_while = force
+
+# At least 1 space before a '*' pointer star
+sp_before_ptr_star = add
+
+# Remove spaces between pointer stars
+sp_between_ptr_star = remove
+
+# No space after a pointer star
+sp_after_ptr_star = remove
+
+# But allow a space in the case of e.g. char * const x;
+sp_after_ptr_star_qualifier = ignore
+
+# Remove space after star in a function return type
+sp_after_ptr_star_func = remove
+
+# At least 1 space after a type in variable definition etc
+sp_after_type = add
+
+# Force exactly 1 space between a statement keyword (e.g. 'if') and an opening parenthesis
+sp_before_sparen = force
+
+# Remove a space before a ';'
+sp_before_semi = remove
+# (Uncrustify >= 0.73.0) Remove space before a semi in a non-empty for
+sp_before_semi_for = remove
+# (Uncrustify >= 0.73.0) Remove space in empty first statement of a for
+sp_before_semi_for_empty = remove
+# (Uncrustify >= 0.74.0) Remove space in empty middle statement of a for
+sp_between_semi_for_empty = remove
+
+# Add a space after a ';' (unless a comment follows)
+sp_after_semi = add
+# (Uncrustify >= 0.73.0) Add a space after a semi in non-empty for statements
+sp_after_semi_for = add
+# (Uncrustify >= 0.73.0) No space after final semi in empty for statements
+sp_after_semi_for_empty = remove
+
+# Remove spaces on the inside of square brackets '[]'
+sp_inside_square = remove
+
+# Must have at least 1 space after a comma
+sp_after_comma = add
+
+# Must not have a space before a comma
+sp_before_comma = remove
+
+# No space before the ':' in a case statement
+sp_before_case_colon = remove
+
+# No space after a cast - '(char) x' -> '(char)x'
+sp_after_cast = remove
+
+# No space between 'sizeof' and '('
+sp_sizeof_paren = remove
+
+# At least 1 space inside '{ }'
+sp_inside_braces = add
+
+# At least 1 space inside '{ }' in an enum
+sp_inside_braces_enum = add
+
+# At least 1 space inside '{ }' in a struct
+sp_inside_braces_struct = add
+
+# At least 1 space between a function return type and the function name
+sp_type_func = add
+
+# No space between a function name and its arguments/parameters
+sp_func_proto_paren = remove
+sp_func_def_paren = remove
+sp_func_call_paren = remove
+
+# No space between '__attribute__' and '('
+sp_attribute_paren = remove
+
+# No space between 'defined' and '(' in preprocessor conditions
+sp_defined_paren = remove
+
+# At least 1 space between a macro's name and its definition
+sp_macro = add
+sp_macro_func = add
+
+# Force exactly 1 space between a '}' and the name of a typedef if on the same line
+sp_brace_typedef = force
+
+# At least 1 space before a '\' line continuation
+sp_before_nl_cont = add
+
+# At least 1 space around '?' and ':' in ternary statements
+sp_cond_colon = add
+sp_cond_question = add
+
+# Space between #else/#endif and comment afterwards
+sp_endif_cmt = add
+
+# Remove newlines at the start of a file
+nl_start_of_file = remove
+
+# At least 1 newline at the end of a file
+nl_end_of_file = add
+nl_end_of_file_min = 1
+
+# Add braces in single-line statements
+mod_full_brace_do = add
+mod_full_brace_for = add
+mod_full_brace_if = add
+mod_full_brace_while = add
+
+# Remove parentheses from return statements
+mod_paren_on_return = remove
+
+# Disable removal of leading spaces in a multi-line comment if the first and
+# last lines are the same length
+cmt_multi_check_last = false
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 59a960a..f72de64 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -355,7 +355,7 @@
     install(
         FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake/MbedTLSConfig.cmake"
               "${CMAKE_CURRENT_BINARY_DIR}/cmake/MbedTLSConfigVersion.cmake"
-        DESTINATION "cmake")
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/MbedTLS")
 
     export(
         EXPORT MbedTLSTargets
@@ -365,7 +365,7 @@
     install(
         EXPORT MbedTLSTargets
         NAMESPACE MbedTLS::
-        DESTINATION "cmake"
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/MbedTLS"
         FILE "MbedTLSTargets.cmake")
 
     if(CMAKE_VERSION VERSION_GREATER 3.15 OR CMAKE_VERSION VERSION_EQUAL 3.15)
diff --git a/ChangeLog b/ChangeLog
index 80b8617..cb277dc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,7 +3,7 @@
 = Mbed TLS 3.2.1 branch released 2022-07-12
 
 Bugfix
-   *  Re-add missing generated file library/ssl_debug_helpers_generated.c
+   *  Re-add missing generated file library/psa_crypto_driver_wrappers.c
 
 = Mbed TLS 3.2.0 branch released 2022-07-11
 
diff --git a/ChangeLog.d/LMS.txt b/ChangeLog.d/LMS.txt
index 6de374f..785bfcf 100644
--- a/ChangeLog.d/LMS.txt
+++ b/ChangeLog.d/LMS.txt
@@ -3,9 +3,9 @@
       Signature verification is production-ready, but generation is for testing
       purposes only. This currently only supports one parameter set
       (LMS_SHA256_M32_H10), meaning that each private key can be used to sign
-      1024 messages. As such, it is not intended for use in TLS, but instead for
-      verification of assets transmitted over an insecure channel, particularly
-      firmware images.
+      1024 messages. As such, it is not intended for use in TLS, but instead
+      for verification of assets transmitted over an insecure channel,
+      particularly firmware images.
     * Add the LM-OTS post-quantum-safe one-time signature scheme, which is
-      required for LMS. This can be used independently, but each key can only be
-      used to sign one message so is impractical for most circumstances.
+      required for LMS. This can be used independently, but each key can only
+      be used to sign one message so is impractical for most circumstances.
diff --git a/ChangeLog.d/add-rsa-pss-rsae-support-for-tls12.txt b/ChangeLog.d/add-rsa-pss-rsae-support-for-tls12.txt
index f88eb9e..0d40968 100644
--- a/ChangeLog.d/add-rsa-pss-rsae-support-for-tls12.txt
+++ b/ChangeLog.d/add-rsa-pss-rsae-support-for-tls12.txt
@@ -1,8 +1,8 @@
 Features
-   * When GnuTLS/Openssl server is configured in TLS 1.2 mode with a certificate
-     declaring an RSA public key and Mbed TLS is configured in hybrid mode, if
-     `rsa_pss_rsae_*` algorithms are before `rsa_pkcs1_*` ones in this list then
-     the GnuTLS/Openssl server chooses an `rsa_pss_rsae_*` signature algorithm
-     for its signature in the key exchange message. As Mbed TLS 1.2 does not
-     support them, the handshake fails. Add `rsa_pss_rsae_*` support for TLS 1.2
-     to resolve the compitablity issue.
+   * Support rsa_pss_rsae_* signature algorithms in TLS 1.2.
+Bugfix
+   * Fix an interoperability failure between an Mbed TLS client with both
+     TLS 1.2 and TLS 1.3 support, and a TLS 1.2 server that supports
+     rsa_pss_rsae_* signature algorithms. This failed because Mbed TLS
+     advertised support for PSS in both TLS 1.2 and 1.3, but only
+     actually supported PSS in TLS 1.3.
diff --git a/ChangeLog.d/alignment-perf.txt b/ChangeLog.d/alignment-perf.txt
new file mode 100644
index 0000000..7a8e6fb
--- /dev/null
+++ b/ChangeLog.d/alignment-perf.txt
@@ -0,0 +1,8 @@
+Features
+   * General performance improvements by accessing multiple bytes at a time.
+     Fixes #1666.
+   * Improvements to use of unaligned and byte-swapped memory, reducing code
+     size and improving performance (depending on compiler and target
+     architecture).
+Changes
+   * Mixed-endian systems are explicitly not supported any more.
diff --git a/ChangeLog.d/cmake-install.txt b/ChangeLog.d/cmake-install.txt
new file mode 100644
index 0000000..d8eb72e
--- /dev/null
+++ b/ChangeLog.d/cmake-install.txt
@@ -0,0 +1,3 @@
+Changes
+  * Install the .cmake files into CMAKE_INSTALL_LIBDIR/cmake/MbedTLS,
+    typically /usr/lib/cmake/MbedTLS.
diff --git a/ChangeLog.d/dtls-connection-id.txt b/ChangeLog.d/dtls-connection-id.txt
index eb9e216..840f837 100644
--- a/ChangeLog.d/dtls-connection-id.txt
+++ b/ChangeLog.d/dtls-connection-id.txt
@@ -3,14 +3,15 @@
      MBEDTLS_SSL_DTLS_CONNECTION_ID (enabled by default) and configured with
      mbedtls_ssl_set_cid().
 
-Changes
+Default behavior changes
    * Previously the macro MBEDTLS_SSL_DTLS_CONNECTION_ID implemented version 05
-     of the draft, and was marked experimental and disabled by default. It is
-     now no longer experimental, and implements the final version from RFC 9146,
-     which is not interoperable with the draft-05 version. If you need to
-     communicate with peers that use earlier versions of Mbed TLS, you
-     need to define MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT to 1, but then you
-     won't be able to communicate with peers that use the standard (non-draft)
-     version. If you need to interoperate with both classes of peers with the
+     of the IETF draft, and was marked experimental and disabled by default.
+     It is now no longer experimental, and implements the final version from
+     RFC 9146, which is not interoperable with the draft-05 version.
+     If you need to communicate with peers that use earlier versions of
+     Mbed TLS, then you need to define MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT
+     to 1, but then you won't be able to communicate with peers that use the
+     standard (non-draft) version.
+     If you need to interoperate with both classes of peers with the
      same build of Mbed TLS, please let us know about your situation on the
      mailing list or GitHub.
diff --git a/ChangeLog.d/extend-query_compile_time_config-to-psa_want.txt b/ChangeLog.d/extend-query_compile_time_config-to-psa_want.txt
index b268fd4..99b2ec4 100644
--- a/ChangeLog.d/extend-query_compile_time_config-to-psa_want.txt
+++ b/ChangeLog.d/extend-query_compile_time_config-to-psa_want.txt
@@ -1,2 +1,2 @@
 Changes
-   * Add the ability to query PSA_WANT_xxx macros to query_compile_time_config
+   * Add the ability to query PSA_WANT_xxx macros to query_compile_time_config.
diff --git a/ChangeLog.d/fix-tls12server-sent-sigalgs.txt b/ChangeLog.d/fix-tls12server-sent-sigalgs.txt
index 9abde2b..b74c6ec 100644
--- a/ChangeLog.d/fix-tls12server-sent-sigalgs.txt
+++ b/ChangeLog.d/fix-tls12server-sent-sigalgs.txt
@@ -1,5 +1,5 @@
 Bugfix
-    * Fix a bug whereby the the list of signature algorithms sent as part of the
-      TLS 1.2 server certificate request would get corrupted, meaning the first
-      algorithm would not get sent and an entry consisting of two random bytes
-      would be sent instead. Found by Serban Bejan and Dudek Sebastian.
+    * Fix a bug whereby the list of signature algorithms sent as part of
+      the TLS 1.2 server certificate request would get corrupted, meaning the
+      first algorithm would not get sent and an entry consisting of two random
+      bytes would be sent instead. Found by Serban Bejan and Dudek Sebastian.
diff --git a/ChangeLog.d/fix_arm_compile_erorr.txt b/ChangeLog.d/fix_arm_compile_erorr.txt
new file mode 100644
index 0000000..28c1d45
--- /dev/null
+++ b/ChangeLog.d/fix_arm_compile_erorr.txt
@@ -0,0 +1,3 @@
+Bugfix
+    * Fix a build error when compiling the bignum module for some Arm platforms.
+      Fixes #6089, #6124, #6217.
diff --git a/ChangeLog.d/fix_build_error_for_mbedtls_deprecated_removed.txt b/ChangeLog.d/fix_build_error_for_mbedtls_deprecated_removed.txt
index a70521a..f0fa000 100644
--- a/ChangeLog.d/fix_build_error_for_mbedtls_deprecated_removed.txt
+++ b/ChangeLog.d/fix_build_error_for_mbedtls_deprecated_removed.txt
@@ -1,3 +1,3 @@
 Bugfix
-    * Fix build error due to missing prototype
-      warning when MBEDTLS_DEPRECATED_REMOVED is enabled
+    * Fix a build error due to a missing prototype warning when
+      MBEDTLS_DEPRECATED_REMOVED is enabled.
diff --git a/ChangeLog.d/fix_build_tls1_2_with_single_encryption_type.txt b/ChangeLog.d/fix_build_tls1_2_with_single_encryption_type.txt
index bac4910..c7d2691 100644
--- a/ChangeLog.d/fix_build_tls1_2_with_single_encryption_type.txt
+++ b/ChangeLog.d/fix_build_tls1_2_with_single_encryption_type.txt
@@ -1,4 +1,3 @@
 Bugfix
-    * Fix bugs and missing dependencies when
-      building and testing configurations with
-      only one encryption type enabled in TLS 1.2.
+    * Fix bugs and missing dependencies when building and testing
+      configurations with only one encryption type enabled in TLS 1.2.
diff --git a/ChangeLog.d/fix_cmake_gen_files b/ChangeLog.d/fix_cmake_gen_files
deleted file mode 100644
index 3b2c099..0000000
--- a/ChangeLog.d/fix_cmake_gen_files
+++ /dev/null
@@ -1,3 +0,0 @@
-Bugfix
-   * Fix an issue in releases with GEN_FILES turned off whereby missing
-     generated files could be turned into symlinks to themselves.
diff --git a/ChangeLog.d/fix_cmake_gen_files.txt b/ChangeLog.d/fix_cmake_gen_files.txt
new file mode 100644
index 0000000..cdec6e8
--- /dev/null
+++ b/ChangeLog.d/fix_cmake_gen_files.txt
@@ -0,0 +1,4 @@
+Bugfix
+   * Fix an issue with in-tree CMake builds in releases with GEN_FILES
+     turned off: if a shipped file was missing from the working directory,
+     it could be turned into a symbolic link to itself.
diff --git a/ChangeLog.d/fix_cmake_using_iar_toolchain.txt b/ChangeLog.d/fix_cmake_using_iar_toolchain.txt
index ecc09c2..9ec6e0d 100644
--- a/ChangeLog.d/fix_cmake_using_iar_toolchain.txt
+++ b/ChangeLog.d/fix_cmake_using_iar_toolchain.txt
@@ -1,3 +1,3 @@
 Bugfix
-   * Fixed an issue that cause compile error using CMake IAR toolchain.
+   * Fix a compilation error when using CMake with an IAR toolchain.
      Fixes #5964.
diff --git a/ChangeLog.d/fix_hard_link_across_drives b/ChangeLog.d/fix_hard_link_across_drives
deleted file mode 100644
index 0c55c30..0000000
--- a/ChangeLog.d/fix_hard_link_across_drives
+++ /dev/null
@@ -1,3 +0,0 @@
-Bugfix
-   * Fix a build issue on Windows where the source and build directory could not be on
-     different drives (#5751).
diff --git a/ChangeLog.d/fix_hard_link_across_drives.txt b/ChangeLog.d/fix_hard_link_across_drives.txt
new file mode 100644
index 0000000..46d05c0
--- /dev/null
+++ b/ChangeLog.d/fix_hard_link_across_drives.txt
@@ -0,0 +1,3 @@
+Bugfix
+   * Fix a build issue on Windows using CMake where the source and build
+     directories could not be on different drives. Fixes #5751.
diff --git a/ChangeLog.d/fix_tls13_session_resumption_fail_when_hostname_is_not_localhost.txt b/ChangeLog.d/fix_tls13_session_resumption_fail_when_hostname_is_not_localhost.txt
index 5797f48..9f5c649 100644
--- a/ChangeLog.d/fix_tls13_session_resumption_fail_when_hostname_is_not_localhost.txt
+++ b/ChangeLog.d/fix_tls13_session_resumption_fail_when_hostname_is_not_localhost.txt
@@ -1,4 +1,4 @@
 Bugfix
-    * Fix TLS 1.3 session resumption fail. Fixes #6488.
-    * Add configuration check to exclude TLS 1.3 optional authentication of
-      client.
+    * Fix TLS 1.3 session resumption. Fixes #6488.
+    * Add a configuration check to exclude optional client authentication
+      in TLS 1.3 (where it is forbidden).
diff --git a/ChangeLog.d/fix_zeroization.txt b/ChangeLog.d/fix_zeroization.txt
index ad74d9c..8b00dcc 100644
--- a/ChangeLog.d/fix_zeroization.txt
+++ b/ChangeLog.d/fix_zeroization.txt
@@ -1,3 +1,3 @@
 Bugfix
-   * Fix possible crash in TLS PRF code, if a failure to allocate memory occurs.
-     Reported by Michael Madsen in #6516.
+   * Fix a possible null pointer dereference if a memory allocation fails
+     in TLS PRF code. Reported by Michael Madsen in #6516.
diff --git a/ChangeLog.d/mbedtls_asn1_type_free.txt b/ChangeLog.d/mbedtls_asn1_type_free.txt
index 81f3a20..3459bbe 100644
--- a/ChangeLog.d/mbedtls_asn1_type_free.txt
+++ b/ChangeLog.d/mbedtls_asn1_type_free.txt
@@ -1,6 +1,8 @@
 Features
-   * Shared code to free x509 structs like mbedtls_x509_named_data
+   * The new functions mbedtls_asn1_free_named_data_list() and
+     mbedtls_asn1_free_named_data_list_shallow() simplify the management
+     of memory in named data lists in X.509 structures.
 New deprecations
    * Deprecate mbedtls_asn1_free_named_data().
      Use mbedtls_asn1_free_named_data_list()
-     or mbedtls_asn1_free_named_data_list_shallow()
+     or mbedtls_asn1_free_named_data_list_shallow().
diff --git a/ChangeLog.d/pkcs7-parser.txt b/ChangeLog.d/pkcs7-parser.txt
deleted file mode 100644
index 7f85f0c..0000000
--- a/ChangeLog.d/pkcs7-parser.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Features
-   * Added partial support for parsing the PKCS7 cryptographic message syntax,
-     as defined in RFC 2315. Currently, support is limited to the following:
-     - Only the signed data content type, version 1 is supported.
-     - Only DER encoding is supported.
-     - Only a single digest algorithm per message is supported.
-     - Only 0 or 1, certificate is supported per message, which must be in
-       X509 format.
-     - There is no support for certificate-revocation lists.
-     - The authenticated and unauthenticated attribute fields of SignerInfo
-       must be empty.
-     Many thanks to Daniel Axtens, Nayna Jain, and Nick Child from IBM for
-     contributing this feature.
diff --git a/ChangeLog.d/psa_crypto_code_gen_1_1.txt b/ChangeLog.d/psa_crypto_code_gen_1_1.txt
index 2c18e6f..e10a81c 100644
--- a/ChangeLog.d/psa_crypto_code_gen_1_1.txt
+++ b/ChangeLog.d/psa_crypto_code_gen_1_1.txt
@@ -1,6 +1,13 @@
 Features
-    * Brought in PSA code geneneration JSON driver list.
-      Added auto generated templating support for key management.
-      Added Support for transparent and opaque keys (import/export/copy).
-      Included some general JSON validation for the given entry points.
-      Addresses version 1.1 of #5137.
+    * The PSA driver wrapper generator generate_driver_wrappers.py now
+      supports a subset of the driver description language, including
+      the following entry points: import_key, export_key, export_public_key,
+      get_builtin_key, copy_key.
+
+Requirement changes
+   * When building with PSA drivers using generate_driver_wrappers.py, or
+     when building the library from the development branch rather than
+     from a release, the Python module jsonschema is now necessary, in
+     addition to jinja2. The official list of required Python modules is
+     maintained in scripts/basic.requirements.txt and may change again
+     in the future.
diff --git a/ChangeLog.d/remove_ssl_session_compression.txt b/ChangeLog.d/remove_ssl_session_compression.txt
deleted file mode 100644
index dc59f1c..0000000
--- a/ChangeLog.d/remove_ssl_session_compression.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-Removals
-   * Remove compression property from SSL session struct.
-     MBEDTLS_SSL_COMPRESS_NULL is now the only supported
-     compression option and can be used for compatibility
-     reasons. Changes requested in #4223.
diff --git a/ChangeLog.d/tls13-misc.txt b/ChangeLog.d/tls13-misc.txt
index 497ed38..6733173 100644
--- a/ChangeLog.d/tls13-misc.txt
+++ b/ChangeLog.d/tls13-misc.txt
@@ -1,9 +1,8 @@
 Features
-   * Mbed TLS supports TLS 1.3 key establishment via pre-shared keys,
-     pre-shared keys provisioned externally or via the ticket mechanism
-     (session resumption).
-     The MBEDTLS_SSL_SESSION_TICKETS configuration option controls the support
-     for the ticket mechanism.
-     MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_xxx_ENABLED configuration options
-     have been introduced to control the support for the three possible
-     TLS 1.3 key exchange modes.
+   * Mbed TLS now supports TLS 1.3 key establishment via pre-shared keys.
+     The pre-shared keys can be provisioned externally or via the ticket
+     mechanism (session resumption).
+     The ticket mechanism is supported when the configuration option
+     MBEDTLS_SSL_SESSION_TICKETS is enabled.
+     New options MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_xxx_ENABLED
+     control the support for the three possible TLS 1.3 key exchange modes.
diff --git a/README.md b/README.md
index 1a4edb0..8a23bd2 100644
--- a/README.md
+++ b/README.md
@@ -261,6 +261,7 @@
 - Signed integers must be represented using two's complement.
 - `int` and `size_t` must be at least 32 bits wide.
 - The types `uint8_t`, `uint16_t`, `uint32_t` and their signed equivalents must be available.
+- Mixed-endian platforms are not supported.
 
 PSA cryptography API
 --------------------
diff --git a/doxygen/input/doc_encdec.h b/doxygen/input/doc_encdec.h
index 96734bd..ec149ae 100644
--- a/doxygen/input/doc_encdec.h
+++ b/doxygen/input/doc_encdec.h
@@ -38,7 +38,7 @@
  * All symmetric encryption algorithms are accessible via the generic cipher layer
  * (see \c mbedtls_cipher_setup()).
  *
- * The asymmetric encryptrion algorithms are accessible via the generic public
+ * The asymmetric encryption algorithms are accessible via the generic public
  * key layer (see \c mbedtls_pk_init()).
  *
  * The following algorithms are provided:
diff --git a/include/mbedtls/asn1write.h b/include/mbedtls/asn1write.h
index 5554720..e6f0479 100644
--- a/include/mbedtls/asn1write.h
+++ b/include/mbedtls/asn1write.h
@@ -352,7 +352,7 @@
  *                  the existing buffer to fit \p val_len.
  *
  * \return          A pointer to the new / existing entry on success.
- * \return          \c NULL if if there was a memory allocation error.
+ * \return          \c NULL if there was a memory allocation error.
  */
 mbedtls_asn1_named_data *mbedtls_asn1_store_named_data( mbedtls_asn1_named_data **list,
                                         const char *oid, size_t oid_len,
diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h
index 7f55580..b791344 100644
--- a/include/mbedtls/check_config.h
+++ b/include/mbedtls/check_config.h
@@ -23,6 +23,7 @@
 #ifndef MBEDTLS_CHECK_CONFIG_H
 #define MBEDTLS_CHECK_CONFIG_H
 
+/* *INDENT-OFF* */
 /*
  * We assume CHAR_BIT is 8 in many places. In practice, this is true on our
  * target platforms, so not an issue, but let's just be extra sure.
@@ -32,6 +33,8 @@
 #error "mbed TLS requires a platform with 8-bit chars"
 #endif
 
+#include <stdint.h>
+
 #if defined(_WIN32)
 #if !defined(MBEDTLS_PLATFORM_C)
 #error "MBEDTLS_PLATFORM_C is required on Windows"
@@ -327,7 +330,7 @@
 
 /* Use of EC J-PAKE in TLS requires SHA-256.
  * This will be taken from MD if it is present, or from PSA if MD is absent.
- * Note: ECJPAKE_C depends on MD_C || PSA_CRYPTO_C. */
+ * Note: MBEDTLS_ECJPAKE_C depends on MBEDTLS_MD_C || MBEDTLS_PSA_CRYPTO_C. */
 #if defined(MBEDTLS_KEY_EXCHANGE_ECJPAKE_ENABLED) &&                    \
     !( defined(MBEDTLS_MD_C) && defined(MBEDTLS_SHA256_C) ) &&          \
     !( !defined(MBEDTLS_MD_C) && defined(PSA_WANT_ALG_SHA_256) )
@@ -849,6 +852,13 @@
 #error "MBEDTLS_SSL_EARLY_DATA  defined, but not all prerequisites"
 #endif
 
+#if defined(MBEDTLS_SSL_EARLY_DATA) && defined(MBEDTLS_SSL_SRV_C) && \
+    ( !defined(MBEDTLS_SSL_MAX_EARLY_DATA_SIZE)     || \
+      ( MBEDTLS_SSL_MAX_EARLY_DATA_SIZE < 0 )       || \
+      ( MBEDTLS_SSL_MAX_EARLY_DATA_SIZE > UINT32_MAX ) )
+#error "MBEDTLS_SSL_MAX_EARLY_DATA_SIZE MUST be defined and in range(0..UINT32_MAX)"
+#endif
+
 #if defined(MBEDTLS_SSL_PROTO_DTLS)     && \
     !defined(MBEDTLS_SSL_PROTO_TLS1_2)
 #error "MBEDTLS_SSL_PROTO_DTLS defined, but not all prerequisites"
@@ -905,7 +915,7 @@
 
 #if defined(MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT)     &&                 \
     !defined(MBEDTLS_SSL_DTLS_CONNECTION_ID)
-#error "MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT defined, but not all prerequsites"
+#error "MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT defined, but not all prerequisites"
 #endif
 
 #if defined(MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT) && MBEDTLS_SSL_DTLS_CONNECTION_ID_COMPAT != 0
@@ -1090,4 +1100,5 @@
  */
 typedef int mbedtls_iso_c_forbids_empty_translation_units;
 
+/* *INDENT-ON* */
 #endif /* MBEDTLS_CHECK_CONFIG_H */
diff --git a/include/mbedtls/config_psa.h b/include/mbedtls/config_psa.h
index 5b27dda..5727c5e 100644
--- a/include/mbedtls/config_psa.h
+++ b/include/mbedtls/config_psa.h
@@ -7,7 +7,7 @@
  *  those definitions to define symbols used in the library code.
  *
  *  Users and integrators should not edit this file, please edit
- *  include/mbedtls/mbedtls_config.h for MBETLS_XXX settings or
+ *  include/mbedtls/mbedtls_config.h for MBEDTLS_XXX settings or
  *  include/psa/crypto_config.h for PSA_WANT_XXX settings.
  */
 /*
diff --git a/include/mbedtls/legacy_or_psa.h b/include/mbedtls/legacy_or_psa.h
index f872dda..35798a5 100644
--- a/include/mbedtls/legacy_or_psa.h
+++ b/include/mbedtls/legacy_or_psa.h
@@ -64,7 +64,7 @@
  * The naming scheme for these macros is:
  *      MBEDTLS_HAS_feature_VIA_legacy_OR_PSA(_condition)
  * where:
- * - feature is expressed the same way as in PSA_WANT macros, for example:
+ * - feature is expressed the same way as in PSA_WANT_xxx macros, for example:
  *   KEY_TYPE_AES, ALG_SHA_256, ECC_SECP_R1_256;
  * - legacy is either LOWLEVEL or the name of the layer: MD, CIPHER;
  * - condition is omitted if it's based on availability, else it's
diff --git a/include/mbedtls/lms.h b/include/mbedtls/lms.h
index fe87d40..1179cd1 100644
--- a/include/mbedtls/lms.h
+++ b/include/mbedtls/lms.h
@@ -58,7 +58,7 @@
 #define MBEDTLS_LMS_TYPE_LEN            (4)
 #define MBEDTLS_LMS_H_TREE_HEIGHT(type) ((type) == MBEDTLS_LMS_SHA256_M32_H10 ? 10u : 0)
 
-/* The length of a hash output, Currently only imlemented for SHA256.
+/* The length of a hash output, Currently only implemented for SHA256.
  * Max is 32 bytes.
  */
 #define MBEDTLS_LMS_M_NODE_BYTES(type) ((type) == MBEDTLS_LMS_SHA256_M32_H10 ? 32 : 0)
@@ -82,7 +82,7 @@
 
 /** The Identifier of the LMS parameter set, as per
  * https://www.iana.org/assignments/leighton-micali-signatures/leighton-micali-signatures.xhtml
- * We are only implementing a subset of the types, particularly H10, for the sake of simplicty.
+ * We are only implementing a subset of the types, particularly H10, for the sake of simplicity.
  */
 typedef enum {
     MBEDTLS_LMS_SHA256_M32_H10 = 0x6,
@@ -90,7 +90,7 @@
 
 /** The Identifier of the LMOTS parameter set, as per
  *  https://www.iana.org/assignments/leighton-micali-signatures/leighton-micali-signatures.xhtml.
- *  We are only implementing a subset of the types, particularly N32_W8, for the sake of simplicty.
+ *  We are only implementing a subset of the types, particularly N32_W8, for the sake of simplicity.
  */
 typedef enum {
     MBEDTLS_LMOTS_SHA256_N32_W8 = 4
diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index c719073..78c3635 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -1543,7 +1543,7 @@
  * Requires: MBEDTLS_SSL_KEEP_PEER_CERTIFICATE
  * Requires: MBEDTLS_PSA_CRYPTO_C
  *
- * Note: even though TLS 1.3 depends on PSA Crypto, and uses it unconditonally
+ * Note: even though TLS 1.3 depends on PSA Crypto, and uses it unconditionally
  * for most operations, if you want it to only use PSA for all crypto
  * operations, you need to also enable MBEDTLS_USE_PSA_CRYPTO; otherwise X.509
  * operations, and functions that are common with TLS 1.2 (record protection,
@@ -1674,6 +1674,23 @@
 //#define MBEDTLS_SSL_EARLY_DATA
 
 /**
+ * \def MBEDTLS_SSL_MAX_EARLY_DATA_SIZE
+ *
+ * The default maximum amount of 0-RTT data. See the documentation of
+ * \c mbedtls_ssl_tls13_conf_max_early_data_size() for more information.
+ *
+ * It must be positive and smaller than UINT32_MAX.
+ *
+ * If MBEDTLS_SSL_EARLY_DATA is not defined, this default value does not
+ * have any impact on the build.
+ *
+ * This feature is experimental, not completed and thus not ready for
+ * production.
+ *
+ */
+#define MBEDTLS_SSL_MAX_EARLY_DATA_SIZE        1024
+
+/**
  * \def MBEDTLS_SSL_PROTO_DTLS
  *
  * Enable support for DTLS (all available versions).
@@ -2817,6 +2834,10 @@
 /**
  * \def MBEDTLS_PKCS7_C
  *
+ * This feature is a work in progress and not ready for production. Testing and
+ * validation is incomplete, and handling of malformed inputs may not be robust.
+ * The API may change.
+ *
  * Enable PKCS7 core for using PKCS7 formatted signatures.
  * RFC Link - https://tools.ietf.org/html/rfc2315
  *
@@ -2828,7 +2849,7 @@
  *
  * This module is required for the PKCS7 parsing modules.
  */
-#define MBEDTLS_PKCS7_C
+//#define MBEDTLS_PKCS7_C
 
 /**
  * \def MBEDTLS_PKCS12_C
diff --git a/include/mbedtls/pkcs12.h b/include/mbedtls/pkcs12.h
index 1b87aea..327996a 100644
--- a/include/mbedtls/pkcs12.h
+++ b/include/mbedtls/pkcs12.h
@@ -94,7 +94,7 @@
  *                   no byte order mark and with a null terminator (i.e. the
  *                   last two bytes should be 0x00 0x00).
  * \param pwdlen     length of the password (may be 0).
- * \param salt       Salt buffer to use This may only be \c NULL when
+ * \param salt       Salt buffer to use. This may only be \c NULL when
  *                   \p saltlen is 0.
  * \param saltlen    length of the salt (may be zero)
  * \param mbedtls_md mbedtls_md type to use during the derivation
diff --git a/include/mbedtls/pkcs7.h b/include/mbedtls/pkcs7.h
index 52895ac..bf61a63 100644
--- a/include/mbedtls/pkcs7.h
+++ b/include/mbedtls/pkcs7.h
@@ -22,6 +22,11 @@
  */
 
 /**
+ * This feature is a work in progress and not ready for production. The API may
+ * change. Furthermore, please note that the implementation has only been
+ * validated with well-formed inputs, not yet with untrusted inputs (which is
+ * almost always the case in practice).
+ *
  * Note: For the time being, this implementation of the PKCS7 cryptographic
  * message syntax is a partial implementation of RFC 2315.
  * Differences include:
@@ -179,7 +184,7 @@
  *
  * \param pkcs7    The pkcs7 structure to be filled by parser for the output.
  * \param buf      The buffer holding the DER encoded pkcs7.
- * \param buflen   The size in Bytes of \p buf.
+ * \param buflen   The size in bytes of \p buf.
  *
  * \note           This function makes an internal copy of the PKCS7 buffer
  *                 \p buf. In particular, \p buf may be destroyed or reused
@@ -192,7 +197,18 @@
                              const size_t buflen );
 
 /**
- * \brief          Verification of PKCS7 signature.
+ * \brief          Verification of PKCS7 signature against a caller-supplied
+ *                 certificate.
+ *
+ *                 For each signer in the PKCS structure, this function computes
+ *                 a signature over the supplied data, using the supplied
+ *                 certificate and the same digest algorithm as specified by the
+ *                 signer. It then compares this signature against the
+ *                 signer's signature; verification succeeds if any comparison
+ *                 matches.
+ *
+ *                 This function does not use the certificates held within the
+ *                 PKCS7 structure itself.
  *
  * \param pkcs7    PKCS7 structure containing signature.
  * \param cert     Certificate containing key to verify signature.
@@ -202,7 +218,7 @@
  * \note           This function internally calculates the hash on the supplied
  *                 plain data for signature verification.
  *
- * \return         A negative error code on failure.
+ * \return         0 if the signature verifies, or a negative error code on failure.
  */
 int mbedtls_pkcs7_signed_data_verify( mbedtls_pkcs7 *pkcs7,
                                       const mbedtls_x509_crt *cert,
@@ -210,7 +226,18 @@
                                       size_t datalen );
 
 /**
- * \brief          Verification of PKCS7 signature.
+ * \brief          Verification of PKCS7 signature against a caller-supplied
+ *                 certificate.
+ *
+ *                 For each signer in the PKCS structure, this function computes
+ *                 a signature over the supplied hash, using the supplied
+ *                 certificate and the same digest algorithm as specified by the
+ *                 signer. It then compares this signature against the
+ *                 signer's signature; verification succeeds if any comparison
+ *                 matches.
+ *
+ *                 This function does not use the certificates held within the
+ *                 PKCS7 structure itself.
  *
  * \param pkcs7    PKCS7 structure containing signature.
  * \param cert     Certificate containing key to verify signature.
@@ -218,9 +245,9 @@
  * \param hashlen  Length of the hash.
  *
  * \note           This function is different from mbedtls_pkcs7_signed_data_verify()
- *                 in a way that it directly recieves the hash of the data.
+ *                 in a way that it directly receives the hash of the data.
  *
- * \return         A negative error code on failure.
+ * \return         0 if the signature verifies, or a negative error code on failure.
  */
 int mbedtls_pkcs7_signed_hash_verify( mbedtls_pkcs7 *pkcs7,
                                       const mbedtls_x509_crt *cert,
diff --git a/include/mbedtls/rsa.h b/include/mbedtls/rsa.h
index 002551f..2bfaf8f 100644
--- a/include/mbedtls/rsa.h
+++ b/include/mbedtls/rsa.h
@@ -239,7 +239,7 @@
  * \param N        The RSA modulus. This may be \c NULL.
  * \param N_len    The Byte length of \p N; it is ignored if \p N == NULL.
  * \param P        The first prime factor of \p N. This may be \c NULL.
- * \param P_len    The Byte length of \p P; it ns ignored if \p P == NULL.
+ * \param P_len    The Byte length of \p P; it is ignored if \p P == NULL.
  * \param Q        The second prime factor of \p N. This may be \c NULL.
  * \param Q_len    The Byte length of \p Q; it is ignored if \p Q == NULL.
  * \param D        The private exponent. This may be \c NULL.
diff --git a/include/mbedtls/ssl.h b/include/mbedtls/ssl.h
index 3165cd5..3f48377 100644
--- a/include/mbedtls/ssl.h
+++ b/include/mbedtls/ssl.h
@@ -1527,6 +1527,12 @@
     int MBEDTLS_PRIVATE(early_data_enabled);     /*!< Early data enablement:
                                                   *   - MBEDTLS_SSL_EARLY_DATA_DISABLED,
                                                   *   - MBEDTLS_SSL_EARLY_DATA_ENABLED */
+
+#if defined(MBEDTLS_SSL_SRV_C)
+    /* The maximum amount of 0-RTT data. RFC 8446 section 4.6.1 */
+    uint32_t MBEDTLS_PRIVATE(max_early_data_size);
+#endif /* MBEDTLS_SSL_SRV_C */
+
 #endif /* MBEDTLS_SSL_EARLY_DATA */
 
 #if defined(MBEDTLS_SSL_ALPN)
@@ -1964,6 +1970,35 @@
 */
 void mbedtls_ssl_tls13_conf_early_data( mbedtls_ssl_config *conf,
                                         int early_data_enabled );
+
+#if defined(MBEDTLS_SSL_SRV_C)
+/**
+ * \brief Set the maximum amount of 0-RTT data in bytes
+ *        Default:  #MBEDTLS_SSL_MAX_EARLY_DATA_SIZE
+ *
+ *        This function sets the value of the max_early_data_size
+ *        field of the early data indication extension included in
+ *        the NewSessionTicket messages that the server may send.
+ *
+ *        The value defines the maximum amount of 0-RTT data
+ *        in bytes that a client will be allowed to send when using
+ *        one of the tickets defined by the NewSessionTicket messages.
+ *
+ * \note When resuming a session using a ticket, if the server receives more
+ *       early data than allowed for the ticket, it terminates the connection.
+ *       The maximum amount of 0-RTT data should thus be large enough
+ *       to allow a minimum of early data to be exchanged.
+ *
+ * \param[in] conf                  The SSL configuration to use.
+ * \param[in] max_early_data_size   The maximum amount of 0-RTT data.
+ *
+ * \warning This interface is experimental and may change without notice.
+ *
+ */
+void mbedtls_ssl_tls13_conf_max_early_data_size(
+         mbedtls_ssl_config *conf, uint32_t max_early_data_size );
+#endif /* MBEDTLS_SSL_SRV_C */
+
 #endif /* MBEDTLS_SSL_PROTO_TLS1_3 && MBEDTLS_SSL_EARLY_DATA */
 
 #if defined(MBEDTLS_X509_CRT_PARSE_C)
@@ -2136,7 +2171,7 @@
  *                      the `ServerHello` contains the CID extension, too,
  *                      the CID extension will actually be put to use.
  *                    - On the Server, enabling the use of the CID through
- *                      this call implies that that the server will look for
+ *                      this call implies that the server will look for
  *                      the CID extension in a `ClientHello` from the client,
  *                      and, if present, reply with a CID extension in its
  *                      `ServerHello`.
@@ -2582,7 +2617,7 @@
  * \note The library stores \c p without accessing it. It is the responsibility
  *       of the caller to ensure that the pointer remains valid.
  *
- * \param ssl            The SSL context context to modify.
+ * \param ssl            The SSL context to modify.
  * \param p              The new value of the user data.
  */
 static inline void mbedtls_ssl_set_user_data_p(
@@ -2596,7 +2631,7 @@
  *
  * You can retrieve this value later with mbedtls_ssl_get_user_data_n().
  *
- * \param ssl            The SSL context context to modify.
+ * \param ssl            The SSL context to modify.
  * \param n              The new value of the user data.
  */
 static inline void mbedtls_ssl_set_user_data_n(
@@ -2613,7 +2648,7 @@
  * called. The value is undefined if mbedtls_ssl_set_user_data_n() has
  * been called without a subsequent call to mbedtls_ssl_set_user_data_p().
  *
- * \param ssl            The SSL context context to modify.
+ * \param ssl            The SSL context to modify.
  * \return               The current value of the user data.
  */
 static inline void *mbedtls_ssl_get_user_data_p(
@@ -2629,7 +2664,7 @@
  * called. The value is undefined if mbedtls_ssl_set_user_data_p() has
  * been called without a subsequent call to mbedtls_ssl_set_user_data_n().
  *
- * \param ssl            The SSL context context to modify.
+ * \param ssl            The SSL context to modify.
  * \return               The current value of the user data.
  */
 static inline uintptr_t mbedtls_ssl_get_user_data_n(
diff --git a/include/psa/crypto.h b/include/psa/crypto.h
index a71ca3f..03181ed 100644
--- a/include/psa/crypto.h
+++ b/include/psa/crypto.h
@@ -525,7 +525,7 @@
  *
  * This function destroys a key from both volatile
  * memory and, if applicable, non-volatile storage. Implementations shall
- * make a best effort to ensure that that the key material cannot be recovered.
+ * make a best effort to ensure that the key material cannot be recovered.
  *
  * This function also erases any metadata such as policies and frees
  * resources associated with the key.
@@ -3823,7 +3823,7 @@
  * compares those bytes to an expected value, provided as key of type
  * #PSA_KEY_TYPE_PASSWORD_HASH.
  * If you view the key derivation's output as a stream of bytes, this
- * function destructively reads the number of bytes corresponding the the
+ * function destructively reads the number of bytes corresponding to the
  * length of the expected value from the stream before comparing them.
  * The operation's capacity decreases by the number of bytes read.
  *
diff --git a/include/psa/crypto_se_driver.h b/include/psa/crypto_se_driver.h
index f0252c8..225fb17 100644
--- a/include/psa/crypto_se_driver.h
+++ b/include/psa/crypto_se_driver.h
@@ -226,7 +226,7 @@
  * operation by comparing the resulting MAC against a provided value
  *
  * \param[in,out] op_context    A hardware-specific structure for the previously
- *                              started MAC operation to be fiinished
+ *                              started MAC operation to be finished
  * \param[in] p_mac             The MAC value against which the resulting MAC
  *                              will be compared against
  * \param[in] mac_length        The size in bytes of the value stored in `p_mac`
@@ -337,7 +337,7 @@
     /** Function that completes a MAC operation with a verify check
      */
     psa_drv_se_mac_finish_verify_t  MBEDTLS_PRIVATE(p_finish_verify);
-    /** Function that aborts a previoustly started MAC operation
+    /** Function that aborts a previously started MAC operation
      */
     psa_drv_se_mac_abort_t          MBEDTLS_PRIVATE(p_abort);
     /** Function that performs a MAC operation in one call
@@ -746,7 +746,7 @@
                                                   size_t ciphertext_size,
                                                   size_t *p_ciphertext_length);
 
-/** A function that peforms a secure element authenticated decryption operation
+/** A function that performs a secure element authenticated decryption operation
  *
  * \param[in,out] drv_context           The driver context structure.
  * \param[in] key_slot                  Slot containing the key to use
@@ -1157,7 +1157,7 @@
  *
  * Different key derivation algorithms require a different number of inputs.
  * Instead of having an API that takes as input variable length arrays, which
- * can be problemmatic to manage on embedded platforms, the inputs are passed
+ * can be problematic to manage on embedded platforms, the inputs are passed
  * to the driver via a function, `psa_drv_se_key_derivation_collateral`, that
  * is called multiple times with different `collateral_id`s. Thus, for a key
  * derivation algorithm that required 3 parameter inputs, the flow would look
@@ -1271,7 +1271,7 @@
     psa_drv_se_key_derivation_collateral_t MBEDTLS_PRIVATE(p_collateral);
     /** Function that performs a final key derivation step */
     psa_drv_se_key_derivation_derive_t     MBEDTLS_PRIVATE(p_derive);
-    /** Function that perforsm a final key derivation or agreement and
+    /** Function that performs a final key derivation or agreement and
      * exports the key */
     psa_drv_se_key_derivation_export_t     MBEDTLS_PRIVATE(p_export);
 } psa_drv_se_key_derivation_t;
diff --git a/include/psa/crypto_sizes.h b/include/psa/crypto_sizes.h
index 231ea62..b42b2df 100644
--- a/include/psa/crypto_sizes.h
+++ b/include/psa/crypto_sizes.h
@@ -717,7 +717,7 @@
     (PSA_KEY_EXPORT_ASN1_INTEGER_MAX_SIZE(key_bits) + 11)
 
 /* Maximum size of the export encoding of an RSA key pair.
- * Assumes thatthe public exponent is less than 2^32 and that the size
+ * Assumes that the public exponent is less than 2^32 and that the size
  * difference between the two primes is at most 1 bit.
  *
  * RSAPrivateKey ::= SEQUENCE {
diff --git a/include/psa/crypto_types.h b/include/psa/crypto_types.h
index 739062d..af61aea 100644
--- a/include/psa/crypto_types.h
+++ b/include/psa/crypto_types.h
@@ -297,7 +297,7 @@
 
 #else /* MBEDTLS_PSA_CRYPTO_KEY_ID_ENCODES_OWNER */
 /* Implementation-specific: The Mbed Cryptography library can be built as
- * part of a multi-client service that exposes the PSA Cryptograpy API in each
+ * part of a multi-client service that exposes the PSA Cryptography API in each
  * client and encodes the client identity in the key identifier argument of
  * functions such as psa_open_key().
  */
diff --git a/include/psa/crypto_values.h b/include/psa/crypto_values.h
index b465ddb..cf8a7b2 100644
--- a/include/psa/crypto_values.h
+++ b/include/psa/crypto_values.h
@@ -1760,7 +1760,7 @@
 #define PSA_ALG_HKDF_BASE                       ((psa_algorithm_t)0x08000100)
 /** Macro to build an HKDF algorithm.
  *
- * For example, `PSA_ALG_HKDF(PSA_ALG_SHA256)` is HKDF using HMAC-SHA-256.
+ * For example, `PSA_ALG_HKDF(PSA_ALG_SHA_256)` is HKDF using HMAC-SHA-256.
  *
  * This key derivation algorithm uses the following inputs:
  * - #PSA_KEY_DERIVATION_INPUT_SALT is the salt used in the "extract" step.
@@ -1805,7 +1805,7 @@
 #define PSA_ALG_HKDF_EXTRACT_BASE                       ((psa_algorithm_t)0x08000400)
 /** Macro to build an HKDF-Extract algorithm.
  *
- * For example, `PSA_ALG_HKDF_EXTRACT(PSA_ALG_SHA256)` is
+ * For example, `PSA_ALG_HKDF_EXTRACT(PSA_ALG_SHA_256)` is
  * HKDF-Extract using HMAC-SHA-256.
  *
  * This key derivation algorithm uses the following inputs:
@@ -1854,7 +1854,7 @@
 #define PSA_ALG_HKDF_EXPAND_BASE                       ((psa_algorithm_t)0x08000500)
 /** Macro to build an HKDF-Expand algorithm.
  *
- * For example, `PSA_ALG_HKDF_EXPAND(PSA_ALG_SHA256)` is
+ * For example, `PSA_ALG_HKDF_EXPAND(PSA_ALG_SHA_256)` is
  * HKDF-Expand using HMAC-SHA-256.
  *
  * This key derivation algorithm uses the following inputs:
@@ -1925,7 +1925,7 @@
  * concatenation of ServerHello.Random + ClientHello.Random,
  * and the label is "key expansion".
  *
- * For example, `PSA_ALG_TLS12_PRF(PSA_ALG_SHA256)` represents the
+ * For example, `PSA_ALG_TLS12_PRF(PSA_ALG_SHA_256)` represents the
  * TLS 1.2 PRF using HMAC-SHA-256.
  *
  * \param hash_alg      A hash algorithm (\c PSA_ALG_XXX value such that
@@ -1995,7 +1995,7 @@
  *   PSA_ALG_RSA_PKCS1V15_CRYPT, passed to the key derivation operation
  *   with `psa_key_derivation_input_bytes()`.
  *
- * For example, `PSA_ALG_TLS12_PSK_TO_MS(PSA_ALG_SHA256)` represents the
+ * For example, `PSA_ALG_TLS12_PSK_TO_MS(PSA_ALG_SHA_256)` represents the
  * TLS-1.2 PSK to MasterSecret derivation PRF using HMAC-SHA-256.
  *
  * \param hash_alg      A hash algorithm (\c PSA_ALG_XXX value such that
@@ -2050,7 +2050,7 @@
  * PBKDF2 is defined by PKCS#5, republished as RFC 8018 (section 5.2).
  * This macro specifies the PBKDF2 algorithm constructed using a PRF based on
  * HMAC with the specified hash.
- * For example, `PSA_ALG_PBKDF2_HMAC(PSA_ALG_SHA256)` specifies PBKDF2
+ * For example, `PSA_ALG_PBKDF2_HMAC(PSA_ALG_SHA_256)` specifies PBKDF2
  * using the PRF HMAC-SHA-256.
  *
  * This key derivation algorithm uses the following inputs, which must be
@@ -2577,7 +2577,7 @@
  *
  * This flag allows the key to be used for a MAC verification operation
  * or for an asymmetric signature verification operation,
- * if otherwise permitted by by the key's type and policy.
+ * if otherwise permitted by the key's type and policy.
  *
  * For a key pair, this concerns the public key.
  */
@@ -2587,7 +2587,7 @@
  * hash.
  *
  * This flag allows the key to be used for a key derivation operation or for
- * a key agreement operation, if otherwise permitted by by the key's type and
+ * a key agreement operation, if otherwise permitted by the key's type and
  * policy.
  *
  * If this flag is present on all keys used in calls to
@@ -2603,7 +2603,7 @@
  * This flag allows the key to be used:
  *
  * This flag allows the key to be used in a key derivation operation, if
- * otherwise permitted by by the key's type and policy.
+ * otherwise permitted by the key's type and policy.
  *
  * If this flag is present on all keys used in calls to
  * psa_key_derivation_input_key() for a key derivation operation, then it
diff --git a/library/aes.c b/library/aes.c
index 7d03524..56dc5cf 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -978,7 +978,6 @@
                     const unsigned char *input,
                     unsigned char *output )
 {
-    int i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char temp[16];
 
@@ -1009,8 +1008,7 @@
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 16 );
 
             memcpy( iv, temp, 16 );
 
@@ -1023,8 +1021,7 @@
     {
         while( length > 0 )
         {
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 16 );
 
             ret = mbedtls_aes_crypt_ecb( ctx, mode, output, output );
             if( ret != 0 )
@@ -1106,28 +1103,24 @@
 
     while( blocks-- )
     {
-        size_t i;
-
         if( leftover && ( mode == MBEDTLS_AES_DECRYPT ) && blocks == 0 )
         {
             /* We are on the last block in a decrypt operation that has
              * leftover bytes, so we need to use the next tweak for this block,
-             * and this tweak for the lefover bytes. Save the current tweak for
+             * and this tweak for the leftover bytes. Save the current tweak for
              * the leftovers and then update the current tweak for use on this,
              * the last full block. */
             memcpy( prev_tweak, tweak, sizeof( tweak ) );
             mbedtls_gf128mul_x_ble( tweak, tweak );
         }
 
-        for( i = 0; i < 16; i++ )
-            tmp[i] = input[i] ^ tweak[i];
+        mbedtls_xor( tmp, input, tweak, 16 );
 
         ret = mbedtls_aes_crypt_ecb( &ctx->crypt, mode, tmp, tmp );
         if( ret != 0 )
             return( ret );
 
-        for( i = 0; i < 16; i++ )
-            output[i] = tmp[i] ^ tweak[i];
+        mbedtls_xor( output, tmp, tweak, 16 );
 
         /* Update the tweak for the next block. */
         mbedtls_gf128mul_x_ble( tweak, tweak );
@@ -1148,19 +1141,18 @@
         unsigned char *prev_output = output - 16;
 
         /* Copy ciphertext bytes from the previous block to our output for each
-         * byte of ciphertext we won't steal. At the same time, copy the
-         * remainder of the input for this final round (since the loop bounds
-         * are the same). */
+         * byte of ciphertext we won't steal. */
         for( i = 0; i < leftover; i++ )
         {
             output[i] = prev_output[i];
-            tmp[i] = input[i] ^ t[i];
         }
 
+        /* Copy the remainder of the input for this final round. */
+        mbedtls_xor( tmp, input, t, leftover );
+
         /* Copy ciphertext bytes from the previous block for input in this
          * round. */
-        for( ; i < 16; i++ )
-            tmp[i] = prev_output[i] ^ t[i];
+        mbedtls_xor( tmp + i, prev_output + i, t + i, 16 - i );
 
         ret = mbedtls_aes_crypt_ecb( &ctx->crypt, mode, tmp, tmp );
         if( ret != 0 )
@@ -1168,8 +1160,7 @@
 
         /* Write the result back to the previous block, overriding the previous
          * output we copied. */
-        for( i = 0; i < 16; i++ )
-            prev_output[i] = tmp[i] ^ t[i];
+        mbedtls_xor( prev_output, tmp, t, 16 );
     }
 
     return( 0 );
diff --git a/library/alignment.h b/library/alignment.h
new file mode 100644
index 0000000..3c5fa23
--- /dev/null
+++ b/library/alignment.h
@@ -0,0 +1,494 @@
+/**
+ * \file alignment.h
+ *
+ * \brief Utility code for dealing with unaligned memory accesses
+ */
+/*
+ *  Copyright The Mbed TLS Contributors
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#ifndef MBEDTLS_LIBRARY_ALIGNMENT_H
+#define MBEDTLS_LIBRARY_ALIGNMENT_H
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "mbedtls/build_info.h"
+
+/**
+ * Read the unsigned 16 bits integer from the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 2 bytes of data
+ * \return  Data at the given address
+ */
+inline uint16_t mbedtls_get_unaligned_uint16( const void *p )
+{
+    uint16_t r;
+    memcpy( &r, p, sizeof( r ) );
+    return r;
+}
+
+/**
+ * Write the unsigned 16 bits integer to the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 2 bytes of data
+ * \param   x data to write
+ */
+inline void mbedtls_put_unaligned_uint16( void *p, uint16_t x )
+{
+    memcpy( p, &x, sizeof( x ) );
+}
+
+/**
+ * Read the unsigned 32 bits integer from the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 4 bytes of data
+ * \return  Data at the given address
+ */
+inline uint32_t mbedtls_get_unaligned_uint32( const void *p )
+{
+    uint32_t r;
+    memcpy( &r, p, sizeof( r ) );
+    return r;
+}
+
+/**
+ * Write the unsigned 32 bits integer to the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 4 bytes of data
+ * \param   x data to write
+ */
+inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
+{
+    memcpy( p, &x, sizeof( x ) );
+}
+
+/**
+ * Read the unsigned 64 bits integer from the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 8 bytes of data
+ * \return  Data at the given address
+ */
+inline uint64_t mbedtls_get_unaligned_uint64( const void *p )
+{
+    uint64_t r;
+    memcpy( &r, p, sizeof( r ) );
+    return r;
+}
+
+/**
+ * Write the unsigned 64 bits integer to the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 8 bytes of data
+ * \param   x data to write
+ */
+inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x )
+{
+    memcpy( p, &x, sizeof( x ) );
+}
+
+/** Byte Reading Macros
+ *
+ * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
+ * byte from x, where byte 0 is the least significant byte.
+ */
+#define MBEDTLS_BYTE_0( x ) ( (uint8_t) (   ( x )         & 0xff ) )
+#define MBEDTLS_BYTE_1( x ) ( (uint8_t) ( ( ( x ) >> 8  ) & 0xff ) )
+#define MBEDTLS_BYTE_2( x ) ( (uint8_t) ( ( ( x ) >> 16 ) & 0xff ) )
+#define MBEDTLS_BYTE_3( x ) ( (uint8_t) ( ( ( x ) >> 24 ) & 0xff ) )
+#define MBEDTLS_BYTE_4( x ) ( (uint8_t) ( ( ( x ) >> 32 ) & 0xff ) )
+#define MBEDTLS_BYTE_5( x ) ( (uint8_t) ( ( ( x ) >> 40 ) & 0xff ) )
+#define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
+#define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
+
+/*
+ * Detect GCC built-in byteswap routines
+ */
+#if defined(__GNUC__) && defined(__GNUC_PREREQ)
+#if __GNUC_PREREQ(4,8)
+#define MBEDTLS_BSWAP16 __builtin_bswap16
+#endif /* __GNUC_PREREQ(4,8) */
+#if __GNUC_PREREQ(4,3)
+#define MBEDTLS_BSWAP32 __builtin_bswap32
+#define MBEDTLS_BSWAP64 __builtin_bswap64
+#endif /* __GNUC_PREREQ(4,3) */
+#endif /* defined(__GNUC__) && defined(__GNUC_PREREQ) */
+
+/*
+ * Detect Clang built-in byteswap routines
+ */
+#if defined(__clang__) && defined(__has_builtin)
+#if __has_builtin(__builtin_bswap16)
+#define MBEDTLS_BSWAP16 __builtin_bswap16
+#endif /* __has_builtin(__builtin_bswap16) */
+#if __has_builtin(__builtin_bswap32)
+#define MBEDTLS_BSWAP32 __builtin_bswap32
+#endif /* __has_builtin(__builtin_bswap32) */
+#if __has_builtin(__builtin_bswap64)
+#define MBEDTLS_BSWAP64 __builtin_bswap64
+#endif /* __has_builtin(__builtin_bswap64) */
+#endif /* defined(__clang__) && defined(__has_builtin) */
+
+/*
+ * Detect MSVC built-in byteswap routines
+ */
+#if defined(_MSC_VER)
+#define MBEDTLS_BSWAP16 _byteswap_ushort
+#define MBEDTLS_BSWAP32 _byteswap_ulong
+#define MBEDTLS_BSWAP64 _byteswap_uint64
+#endif /* defined(_MSC_VER) */
+
+/* Detect armcc built-in byteswap routine */
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 410000)
+#define MBEDTLS_BSWAP32 __rev
+#endif
+
+/*
+ * Where compiler built-ins are not present, fall back to C code that the
+ * compiler may be able to detect and transform into the relevant bswap or
+ * similar instruction.
+ */
+#if !defined(MBEDTLS_BSWAP16)
+static inline uint16_t mbedtls_bswap16( uint16_t x ) {
+    return
+         ( x & 0x00ff ) << 8 |
+         ( x & 0xff00 ) >> 8;
+}
+#define MBEDTLS_BSWAP16 mbedtls_bswap16
+#endif /* !defined(MBEDTLS_BSWAP16) */
+
+#if !defined(MBEDTLS_BSWAP32)
+static inline uint32_t mbedtls_bswap32( uint32_t x ) {
+    return
+         ( x & 0x000000ff ) << 24 |
+         ( x & 0x0000ff00 ) <<  8 |
+         ( x & 0x00ff0000 ) >>  8 |
+         ( x & 0xff000000 ) >> 24;
+}
+#define MBEDTLS_BSWAP32 mbedtls_bswap32
+#endif /* !defined(MBEDTLS_BSWAP32) */
+
+#if !defined(MBEDTLS_BSWAP64)
+static inline uint64_t mbedtls_bswap64( uint64_t x ) {
+    return
+         ( x & 0x00000000000000ff ) << 56 |
+         ( x & 0x000000000000ff00 ) << 40 |
+         ( x & 0x0000000000ff0000 ) << 24 |
+         ( x & 0x00000000ff000000 ) <<  8 |
+         ( x & 0x000000ff00000000 ) >>  8 |
+         ( x & 0x0000ff0000000000 ) >> 24 |
+         ( x & 0x00ff000000000000 ) >> 40 |
+         ( x & 0xff00000000000000 ) >> 56;
+}
+#define MBEDTLS_BSWAP64 mbedtls_bswap64
+#endif /* !defined(MBEDTLS_BSWAP64) */
+
+#if !defined(__BYTE_ORDER__)
+static const uint16_t mbedtls_byte_order_detector = { 0x100 };
+#define MBEDTLS_IS_BIG_ENDIAN (*((unsigned char *) (&mbedtls_byte_order_detector)) == 0x01)
+#else
+#define MBEDTLS_IS_BIG_ENDIAN ((__BYTE_ORDER__) == (__ORDER_BIG_ENDIAN__))
+#endif /* !defined(__BYTE_ORDER__) */
+
+/**
+ * Get the unsigned 32 bits integer corresponding to four bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the four bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the four bytes to build the 32 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT32_BE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? mbedtls_get_unaligned_uint32((data) + (offset))                  \
+        : MBEDTLS_BSWAP32(mbedtls_get_unaligned_uint32((data) + (offset))) \
+    )
+
+/**
+ * Put in memory a 32 bits unsigned integer in big-endian order.
+ *
+ * \param   n       32 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 32
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 32 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT32_BE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), (uint32_t)(n));      \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), MBEDTLS_BSWAP32((uint32_t)(n))); \
+    }                                                                        \
+}
+
+/**
+ * Get the unsigned 32 bits integer corresponding to four bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the four bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the four bytes to build the 32 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT32_LE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? MBEDTLS_BSWAP32(mbedtls_get_unaligned_uint32((data) + (offset))) \
+        : mbedtls_get_unaligned_uint32((data) + (offset))                  \
+    )
+
+
+/**
+ * Put in memory a 32 bits unsigned integer in little-endian order.
+ *
+ * \param   n       32 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 32
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 32 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT32_LE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), MBEDTLS_BSWAP32((uint32_t)(n))); \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), ((uint32_t)(n)));      \
+    }                                                                        \
+}
+
+/**
+ * Get the unsigned 16 bits integer corresponding to two bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the two bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the two bytes to build the 16 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT16_LE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? MBEDTLS_BSWAP16(mbedtls_get_unaligned_uint16((data) + (offset))) \
+        : mbedtls_get_unaligned_uint16((data) + (offset))                  \
+    )
+
+/**
+ * Put in memory a 16 bits unsigned integer in little-endian order.
+ *
+ * \param   n       16 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 16
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 16 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT16_LE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), MBEDTLS_BSWAP16((uint16_t)(n))); \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), (uint16_t)(n));      \
+    }                                                                        \
+}
+
+/**
+ * Get the unsigned 16 bits integer corresponding to two bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the two bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the two bytes to build the 16 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT16_BE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? mbedtls_get_unaligned_uint16((data) + (offset))                  \
+        : MBEDTLS_BSWAP16(mbedtls_get_unaligned_uint16((data) + (offset))) \
+    )
+
+/**
+ * Put in memory a 16 bits unsigned integer in big-endian order.
+ *
+ * \param   n       16 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 16
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 16 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT16_BE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), (uint16_t)(n));      \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), MBEDTLS_BSWAP16((uint16_t)(n))); \
+    }                                                                        \
+}
+
+/**
+ * Get the unsigned 24 bits integer corresponding to three bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the three bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the three bytes to build the 24 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT24_BE( data , offset )                  \
+    (                                                           \
+          ( (uint32_t) ( data )[( offset )    ] << 16 )         \
+        | ( (uint32_t) ( data )[( offset ) + 1] << 8  )         \
+        | ( (uint32_t) ( data )[( offset ) + 2]       )         \
+    )
+
+/**
+ * Put in memory a 24 bits unsigned integer in big-endian order.
+ *
+ * \param   n       24 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 24
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 24 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT24_BE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_2( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_0( n );             \
+}
+
+/**
+ * Get the unsigned 24 bits integer corresponding to three bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the three bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the three bytes to build the 24 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT24_LE( data, offset )                   \
+    (                                                           \
+          ( (uint32_t) ( data )[( offset )    ]       )         \
+        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
+        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
+    )
+
+/**
+ * Put in memory a 24 bits unsigned integer in little-endian order.
+ *
+ * \param   n       24 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 24
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 24 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT24_LE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
+}
+
+/**
+ * Get the unsigned 64 bits integer corresponding to eight bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the eight bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the eight bytes to build the 64 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT64_BE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? mbedtls_get_unaligned_uint64((data) + (offset))                  \
+        : MBEDTLS_BSWAP64(mbedtls_get_unaligned_uint64((data) + (offset))) \
+    )
+
+/**
+ * Put in memory a 64 bits unsigned integer in big-endian order.
+ *
+ * \param   n       64 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 64
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 64 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT64_BE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), (uint64_t)(n));      \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), MBEDTLS_BSWAP64((uint64_t)(n))); \
+    }                                                                        \
+}
+
+/**
+ * Get the unsigned 64 bits integer corresponding to eight bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the eight bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the eight bytes to build the 64 bits unsigned
+ *                  integer from.
+ */
+#define MBEDTLS_GET_UINT64_LE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? MBEDTLS_BSWAP64(mbedtls_get_unaligned_uint64((data) + (offset))) \
+        : mbedtls_get_unaligned_uint64((data) + (offset))                  \
+    )
+
+/**
+ * Put in memory a 64 bits unsigned integer in little-endian order.
+ *
+ * \param   n       64 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 64
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 64 bits unsigned integer \p n.
+ */
+#define MBEDTLS_PUT_UINT64_LE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), MBEDTLS_BSWAP64((uint64_t)(n))); \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), (uint64_t)(n));      \
+    }                                                                        \
+}
+
+#endif /* MBEDTLS_LIBRARY_ALIGNMENT_H */
diff --git a/library/aria.c b/library/aria.c
index 5e52eea..517e10a 100644
--- a/library/aria.c
+++ b/library/aria.c
@@ -98,47 +98,8 @@
  * modify byte order: ( A B C D ) -> ( D C B A ), i.e. change endianness
  *
  * This is submatrix P3 in [1] Appendix B.1
- *
- * Some compilers fail to translate this to a single instruction,
- * so let's provide asm versions for common platforms with C fallback.
  */
-#if defined(MBEDTLS_HAVE_ASM)
-#if defined(__arm__) /* rev available from v6 up */
-/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
-#if defined(__GNUC__) && \
-    ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 ) && \
-    __ARM_ARCH >= 6
-static inline uint32_t aria_p3( uint32_t x )
-{
-    uint32_t r;
-    __asm( "rev %0, %1" : "=l" (r) : "l" (x) );
-    return( r );
-}
-#define ARIA_P3 aria_p3
-#elif defined(__ARMCC_VERSION) && __ARMCC_VERSION < 6000000 && \
-    ( __TARGET_ARCH_ARM >= 6 || __TARGET_ARCH_THUMB >= 3 )
-static inline uint32_t aria_p3( uint32_t x )
-{
-    uint32_t r;
-    __asm( "rev r, x" );
-    return( r );
-}
-#define ARIA_P3 aria_p3
-#endif
-#endif /* arm */
-#if defined(__GNUC__) && \
-    defined(__i386__) || defined(__amd64__) || defined( __x86_64__)
-static inline uint32_t aria_p3( uint32_t x )
-{
-    __asm( "bswap %0" : "=r" (x) : "0" (x) );
-    return( x );
-}
-#define ARIA_P3 aria_p3
-#endif /* x86 gnuc */
-#endif /* MBEDTLS_HAVE_ASM && GNUC */
-#if !defined(ARIA_P3)
-#define ARIA_P3(x) ARIA_P2( ARIA_P1 ( x ) )
-#endif
+#define ARIA_P3(x) MBEDTLS_BSWAP32(x)
 
 /*
  * ARIA Affine Transform
@@ -583,7 +544,6 @@
                             const unsigned char *input,
                             unsigned char *output )
 {
-    int i;
     unsigned char temp[MBEDTLS_ARIA_BLOCKSIZE];
 
     ARIA_VALIDATE_RET( ctx != NULL );
@@ -603,8 +563,7 @@
             memcpy( temp, input, MBEDTLS_ARIA_BLOCKSIZE );
             mbedtls_aria_crypt_ecb( ctx, input, output );
 
-            for( i = 0; i < MBEDTLS_ARIA_BLOCKSIZE; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, MBEDTLS_ARIA_BLOCKSIZE );
 
             memcpy( iv, temp, MBEDTLS_ARIA_BLOCKSIZE );
 
@@ -617,8 +576,7 @@
     {
         while( length > 0 )
         {
-            for( i = 0; i < MBEDTLS_ARIA_BLOCKSIZE; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, MBEDTLS_ARIA_BLOCKSIZE );
 
             mbedtls_aria_crypt_ecb( ctx, output, output );
             memcpy( iv, output, MBEDTLS_ARIA_BLOCKSIZE );
diff --git a/library/bignum_core.c b/library/bignum_core.c
index 6635351..1ce8457 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -83,45 +83,25 @@
 
 static mbedtls_mpi_uint mpi_bigendian_to_host( mbedtls_mpi_uint a )
 {
-#if defined(__BYTE_ORDER__)
-
-/* Nothing to do on bigendian systems. */
-#if ( __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
-    return( a );
-#endif /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
-
-#if ( __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
-
-/* For GCC and Clang, have builtins for byte swapping. */
-#if defined(__GNUC__) && defined(__GNUC_PREREQ)
-#if __GNUC_PREREQ(4,3)
-#define have_bswap
-#endif
-#endif
-
-#if defined(__clang__) && defined(__has_builtin)
-#if __has_builtin(__builtin_bswap32)  &&                 \
-    __has_builtin(__builtin_bswap64)
-#define have_bswap
-#endif
-#endif
-
-#if defined(have_bswap)
-    /* The compiler is hopefully able to statically evaluate this! */
-    switch( sizeof(mbedtls_mpi_uint) )
+    if ( MBEDTLS_IS_BIG_ENDIAN )
     {
-        case 4:
-            return( __builtin_bswap32(a) );
-        case 8:
-            return( __builtin_bswap64(a) );
+        /* Nothing to do on bigendian systems. */
+        return( a );
     }
-#endif
-#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
-#endif /* __BYTE_ORDER__ */
+    else
+    {
+        switch( sizeof(mbedtls_mpi_uint) )
+        {
+            case 4:
+                return (mbedtls_mpi_uint) MBEDTLS_BSWAP32( (uint32_t)a );
+            case 8:
+                return (mbedtls_mpi_uint) MBEDTLS_BSWAP64( (uint64_t)a );
+        }
 
-    /* Fall back to C-based reordering if we don't know the byte order
-     * or we couldn't use a compiler-specific builtin. */
-    return( mpi_bigendian_to_host_c( a ) );
+        /* Fall back to C-based reordering if we don't know the byte order
+        * or we couldn't use a compiler-specific builtin. */
+        return( mpi_bigendian_to_host_c( a ) );
+    }
 }
 
 void mbedtls_mpi_core_bigendian_to_host( mbedtls_mpi_uint *A,
@@ -596,6 +576,19 @@
     return( wsize );
 }
 
+size_t mbedtls_mpi_core_exp_mod_working_limbs( size_t AN_limbs, size_t E_limbs )
+{
+    const size_t wsize = exp_mod_get_window_size( E_limbs * biL );
+    const size_t welem = ( (size_t) 1 ) << wsize;
+
+    /* How big does each part of the working memory pool need to be? */
+    const size_t table_limbs   = welem * AN_limbs;
+    const size_t select_limbs  = AN_limbs;
+    const size_t temp_limbs    = 2 * AN_limbs + 1;
+
+    return( table_limbs + select_limbs + temp_limbs );
+}
+
 static void exp_mod_precompute_window( const mbedtls_mpi_uint *A,
                                        const mbedtls_mpi_uint *N,
                                        size_t AN_limbs,
@@ -610,9 +603,9 @@
     Wtable[0] = 1;
     mbedtls_mpi_core_montmul( Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp );
 
-    /* W[1] = A * R^2 * R^-1 mod N = A * R mod N */
+    /* W[1] = A (already in Montgomery presentation) */
     mbedtls_mpi_uint *W1 = Wtable + AN_limbs;
-    mbedtls_mpi_core_montmul( W1, A, RR, AN_limbs, N, AN_limbs, mm, temp );
+    memcpy( W1, A, AN_limbs * ciL );
 
     /* W[i+1] = W[i] * W[1], i >= 2 */
     mbedtls_mpi_uint *Wprev = W1;
@@ -626,6 +619,8 @@
 
 /* Exponentiation: X := A^E mod N.
  *
+ * A must already be in Montgomery form.
+ *
  * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero.
  *
  * RR must contain 2^{2*biL} mod N.
@@ -634,35 +629,27 @@
  * (The difference is that the body in our loop processes a single bit instead
  * of a full window.)
  */
-int mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
-                              const mbedtls_mpi_uint *A,
-                              const mbedtls_mpi_uint *N,
-                              size_t AN_limbs,
-                              const mbedtls_mpi_uint *E,
-                              size_t E_limbs,
-                              const mbedtls_mpi_uint *RR )
+void mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
+                               const mbedtls_mpi_uint *A,
+                               const mbedtls_mpi_uint *N,
+                               size_t AN_limbs,
+                               const mbedtls_mpi_uint *E,
+                               size_t E_limbs,
+                               const mbedtls_mpi_uint *RR,
+                               mbedtls_mpi_uint *T )
 {
     const size_t wsize = exp_mod_get_window_size( E_limbs * biL );
     const size_t welem = ( (size_t) 1 ) << wsize;
 
-    /* Allocate memory pool and set pointers to parts of it */
-    const size_t table_limbs   = welem * AN_limbs;
-    const size_t temp_limbs    = 2 * AN_limbs + 1;
-    const size_t select_limbs  = AN_limbs;
-    const size_t total_limbs   = table_limbs + temp_limbs + select_limbs;
+    /* This is how we will use the temporary storage T, which must have space
+     * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */
+    const size_t table_limbs  = welem * AN_limbs;
+    const size_t select_limbs = AN_limbs;
 
-    /* heap allocated memory pool */
-    mbedtls_mpi_uint *mempool =
-        mbedtls_calloc( total_limbs, sizeof(mbedtls_mpi_uint) );
-    if( mempool == NULL )
-    {
-        return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
-    }
-
-    /* pointers to temporaries within memory pool */
-    mbedtls_mpi_uint *const Wtable  = mempool;
-    mbedtls_mpi_uint *const Wselect = Wtable    + table_limbs;
-    mbedtls_mpi_uint *const temp    = Wselect  + select_limbs;
+    /* Pointers to specific parts of the temporary working memory pool */
+    mbedtls_mpi_uint *const Wtable  = T;
+    mbedtls_mpi_uint *const Wselect = Wtable  +  table_limbs;
+    mbedtls_mpi_uint *const temp    = Wselect + select_limbs;
 
     /*
      * Window precomputation
@@ -729,14 +716,6 @@
         }
     }
     while( ! ( E_bit_index == 0 && E_limb_index == 0 ) );
-
-    /* Convert X back to normal presentation */
-    const mbedtls_mpi_uint one = 1;
-    mbedtls_mpi_core_montmul( X, X, &one, 1, N, AN_limbs, mm, temp );
-
-    mbedtls_platform_zeroize( mempool, total_limbs * sizeof(mbedtls_mpi_uint) );
-    mbedtls_free( mempool );
-    return( 0 );
 }
 
 /* END MERGE SLOT 1 */
diff --git a/library/bignum_core.h b/library/bignum_core.h
index 24559c6..b7af4d0 100644
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@@ -499,27 +499,53 @@
 /* BEGIN MERGE SLOT 1 */
 
 /**
- * \brief          Perform a modular exponentiation with secret exponent:
- *                 X = A^E mod N
+ * \brief          Returns the number of limbs of working memory required for
+ *                 a call to `mbedtls_mpi_core_exp_mod()`.
  *
- * \param[out] X   The destination MPI, as a little endian array of length
- *                 \p AN_limbs.
- * \param[in] A    The base MPI, as a little endian array of length \p AN_limbs.
- * \param[in] N    The modulus, as a little endian array of length \p AN_limbs.
- * \param AN_limbs The number of limbs in \p X, \p A, \p N, \p RR.
- * \param[in] E    The exponent, as a little endian array of length \p E_limbs.
- * \param E_limbs  The number of limbs in \p E.
- * \param[in] RR   The precomputed residue of 2^{2*biL} modulo N, as a little
- *                 endian array of length \p AN_limbs.
+ * \param AN_limbs The number of limbs in the input `A` and the modulus `N`
+ *                 (they must be the same size) that will be given to
+ *                 `mbedtls_mpi_core_exp_mod()`.
+ * \param E_limbs  The number of limbs in the exponent `E` that will be given
+ *                 to `mbedtls_mpi_core_exp_mod()`.
  *
- * \return         \c 0 if successful.
- * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
+ * \return         The number of limbs of working memory required by
+ *                 `mbedtls_mpi_core_exp_mod()`.
  */
-int mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
-                              const mbedtls_mpi_uint *A,
-                              const mbedtls_mpi_uint *N, size_t AN_limbs,
-                              const mbedtls_mpi_uint *E, size_t E_limbs,
-                              const mbedtls_mpi_uint *RR );
+size_t mbedtls_mpi_core_exp_mod_working_limbs( size_t AN_limbs, size_t E_limbs );
+
+/**
+ * \brief            Perform a modular exponentiation with secret exponent:
+ *                   X = A^E mod N, where \p A is already in Montgomery form.
+ *
+ * \p X may be aliased to \p A, but not to \p RR or \p E, even if \p E_limbs ==
+ * \p AN_limbs.
+ *
+ * \param[out] X     The destination MPI, as a little endian array of length
+ *                   \p AN_limbs.
+ * \param[in] A      The base MPI, as a little endian array of length \p AN_limbs.
+ *                   Must be in Montgomery form.
+ * \param[in] N      The modulus, as a little endian array of length \p AN_limbs.
+ * \param AN_limbs   The number of limbs in \p X, \p A, \p N, \p RR.
+ * \param[in] E      The exponent, as a little endian array of length \p E_limbs.
+ * \param E_limbs    The number of limbs in \p E.
+ * \param[in] RR     The precomputed residue of 2^{2*biL} modulo N, as a little
+ *                   endian array of length \p AN_limbs.
+ * \param[in,out] T  Temporary storage of at least the number of limbs returned
+ *                   by `mbedtls_mpi_core_exp_mod_working_limbs()`.
+ *                   Its initial content is unused and its final content is
+ *                   indeterminate.
+ *                   It must not alias or otherwise overlap any of the other
+ *                   parameters.
+ *                   It is up to the caller to zeroize \p T when it is no
+ *                   longer needed, and before freeing it if it was dynamically
+ *                   allocated.
+ */
+void mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
+                               const mbedtls_mpi_uint *A,
+                               const mbedtls_mpi_uint *N, size_t AN_limbs,
+                               const mbedtls_mpi_uint *E, size_t E_limbs,
+                               const mbedtls_mpi_uint *RR,
+                               mbedtls_mpi_uint *T );
 
 /* END MERGE SLOT 1 */
 
diff --git a/library/bignum_mod.c b/library/bignum_mod.c
index 7a5539d..7cf2fb2 100644
--- a/library/bignum_mod.c
+++ b/library/bignum_mod.c
@@ -179,7 +179,18 @@
 /* END MERGE SLOT 2 */
 
 /* BEGIN MERGE SLOT 3 */
+int mbedtls_mpi_mod_sub( mbedtls_mpi_mod_residue *X,
+                         const mbedtls_mpi_mod_residue *A,
+                         const mbedtls_mpi_mod_residue *B,
+                         const mbedtls_mpi_mod_modulus *N )
+{
+    if( X->limbs != N->limbs || A->limbs != N->limbs || B->limbs != N->limbs )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
 
+    mbedtls_mpi_mod_raw_sub( X->p, A->p, B->p, N );
+
+    return( 0 );
+}
 /* END MERGE SLOT 3 */
 
 /* BEGIN MERGE SLOT 4 */
diff --git a/library/bignum_mod.h b/library/bignum_mod.h
index d92f21e..0a8f4d3 100644
--- a/library/bignum_mod.h
+++ b/library/bignum_mod.h
@@ -163,7 +163,35 @@
 /* END MERGE SLOT 2 */
 
 /* BEGIN MERGE SLOT 3 */
-
+/**
+ * \brief Perform a fixed-size modular subtraction.
+ *
+ * Calculate `A - B modulo N`.
+ *
+ * \p A, \p B and \p X must all have the same number of limbs as \p N.
+ *
+ * \p X may be aliased to \p A or \p B, or even both, but may not overlap
+ * either otherwise.
+ *
+ * \note This function does not check that \p A or \p B are in canonical
+ *       form (that is, are < \p N) - that will have been done by
+ *       mbedtls_mpi_mod_residue_setup().
+ *
+ * \param[out] X    The address of the result MPI. Must be initialized.
+ *                  Must have the same number of limbs as the modulus \p N.
+ * \param[in]  A    The address of the first MPI.
+ * \param[in]  B    The address of the second MPI.
+ * \param[in]  N    The address of the modulus. Used to perform a modulo
+ *                  operation on the result of the subtraction.
+ *
+ * \return          \c 0 if successful.
+ * \return          #MBEDTLS_ERR_MPI_BAD_INPUT_DATA if the given MPIs do not
+ *                  have the correct number of limbs.
+ */
+int mbedtls_mpi_mod_sub( mbedtls_mpi_mod_residue *X,
+                         const mbedtls_mpi_mod_residue *A,
+                         const mbedtls_mpi_mod_residue *B,
+                         const mbedtls_mpi_mod_modulus *N );
 /* END MERGE SLOT 3 */
 
 /* BEGIN MERGE SLOT 4 */
diff --git a/library/bignum_mod_raw.c b/library/bignum_mod_raw.c
index 22e56b7..c98a1c1 100644
--- a/library/bignum_mod_raw.c
+++ b/library/bignum_mod_raw.c
@@ -124,6 +124,37 @@
 
 /* BEGIN MERGE SLOT 3 */
 
+size_t mbedtls_mpi_mod_raw_inv_prime_working_limbs( size_t AN_limbs )
+{
+    /* mbedtls_mpi_mod_raw_inv_prime() needs a temporary for the exponent,
+     * which will be the same size as the modulus and input (AN_limbs),
+     * and additional space to pass to mbedtls_mpi_core_exp_mod(). */
+    return( AN_limbs +
+            mbedtls_mpi_core_exp_mod_working_limbs( AN_limbs, AN_limbs ) );
+}
+
+void mbedtls_mpi_mod_raw_inv_prime( mbedtls_mpi_uint *X,
+                                    const mbedtls_mpi_uint *A,
+                                    const mbedtls_mpi_uint *N,
+                                    size_t AN_limbs,
+                                    const mbedtls_mpi_uint *RR,
+                                    mbedtls_mpi_uint *T )
+{
+    /* Inversion by power: g^|G| = 1 => g^(-1) = g^(|G|-1), and
+     *                       |G| = N - 1, so we want
+     *                 g^(|G|-1) = g^(N - 2)
+     */
+
+    /* Use the first AN_limbs of T to hold N - 2 */
+    mbedtls_mpi_uint *Nminus2 = T;
+    (void) mbedtls_mpi_core_sub_int( Nminus2, N, 2, AN_limbs );
+
+    /* Rest of T is given to exp_mod for its working space */
+    mbedtls_mpi_core_exp_mod( X,
+                              A, N, AN_limbs, Nminus2, AN_limbs,
+                              RR, T + AN_limbs );
+}
+
 /* END MERGE SLOT 3 */
 
 /* BEGIN MERGE SLOT 4 */
@@ -182,6 +213,18 @@
     mbedtls_free( T );
     return( 0 );
 }
+
+void mbedtls_mpi_mod_raw_neg( mbedtls_mpi_uint *X,
+                              const mbedtls_mpi_uint *A,
+                              const mbedtls_mpi_mod_modulus *m )
+{
+    mbedtls_mpi_core_sub( X, m->p, A, m->limbs );
+
+    /* If A=0 initially, then X=N now. Detect this by
+     * subtracting N and catching the carry. */
+    mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub( X, X, m->p, m->limbs );
+    (void) mbedtls_mpi_core_add_if( X, m->p, m->limbs, (unsigned) borrow  );
+}
 /* END MERGE SLOT 7 */
 
 /* BEGIN MERGE SLOT 8 */
diff --git a/library/bignum_mod_raw.h b/library/bignum_mod_raw.h
index d7b6dd1..f9968ba 100644
--- a/library/bignum_mod_raw.h
+++ b/library/bignum_mod_raw.h
@@ -174,6 +174,51 @@
 
 /* BEGIN MERGE SLOT 3 */
 
+/**
+ * \brief          Returns the number of limbs of working memory required for
+ *                 a call to `mbedtls_mpi_mod_raw_inv_prime()`.
+ *
+ * \param AN_limbs The number of limbs in the input `A` and the modulus `N`
+ *                 (they must be the same size) that will be given to
+ *                 `mbedtls_mpi_mod_raw_inv_prime()`.
+ *
+ * \return         The number of limbs of working memory required by
+ *                 `mbedtls_mpi_mod_raw_inv_prime()`.
+ */
+size_t mbedtls_mpi_mod_raw_inv_prime_working_limbs( size_t AN_limbs );
+
+/**
+ * \brief Perform fixed-width modular inversion of a Montgomery-form MPI with
+ *        respect to a modulus \p N that must be prime.
+ *
+ * \p X may be aliased to \p A, but not to \p N or \p RR.
+ *
+ * \param[out] X     The modular inverse of \p A with respect to \p N.
+ *                   Will be in Montgomery form.
+ * \param[in] A      The number to calculate the modular inverse of.
+ *                   Must be in Montgomery form. Must not be 0.
+ * \param[in] N      The modulus, as a little-endian array of length \p AN_limbs.
+ *                   Must be prime.
+ * \param AN_limbs   The number of limbs in \p A, \p N and \p RR.
+ * \param[in] RR     The precomputed residue of 2^{2*biL} modulo N, as a little-
+ *                   endian array of length \p AN_limbs.
+ * \param[in,out] T  Temporary storage of at least the number of limbs returned
+ *                   by `mbedtls_mpi_mod_raw_inv_prime_working_limbs()`.
+ *                   Its initial content is unused and its final content is
+ *                   indeterminate.
+ *                   It must not alias or otherwise overlap any of the other
+ *                   parameters.
+ *                   It is up to the caller to zeroize \p T when it is no
+ *                   longer needed, and before freeing it if it was dynamically
+ *                   allocated.
+ */
+void mbedtls_mpi_mod_raw_inv_prime( mbedtls_mpi_uint *X,
+                                    const mbedtls_mpi_uint *A,
+                                    const mbedtls_mpi_uint *N,
+                                    size_t AN_limbs,
+                                    const mbedtls_mpi_uint *RR,
+                                    mbedtls_mpi_uint *T );
+
 /* END MERGE SLOT 3 */
 
 /* BEGIN MERGE SLOT 4 */
@@ -233,6 +278,23 @@
  */
 int mbedtls_mpi_mod_raw_from_mont_rep( mbedtls_mpi_uint *X,
                                        const mbedtls_mpi_mod_modulus *m );
+
+/** \brief  Perform fixed width modular negation.
+ *
+ * The size of the operation is determined by \p m. \p A must have
+ * the same number of limbs as \p m.
+ *
+ * \p X may be aliased to \p A.
+ *
+ * \param[out] X        The result of the modular negation.
+ *                      This must be initialized.
+ * \param[in] A         Little-endian presentation of the input operand. This
+ *                      must be less than or equal to \p m.
+ * \param[in] m         The modulus to use.
+ */
+void mbedtls_mpi_mod_raw_neg( mbedtls_mpi_uint *X,
+                              const mbedtls_mpi_uint *A,
+                              const mbedtls_mpi_mod_modulus *m);
 /* END MERGE SLOT 7 */
 
 /* BEGIN MERGE SLOT 8 */
diff --git a/library/camellia.c b/library/camellia.c
index 5dd6c56..6e781c7 100644
--- a/library/camellia.c
+++ b/library/camellia.c
@@ -526,7 +526,6 @@
                                 const unsigned char *input,
                                 unsigned char *output )
 {
-    int i;
     unsigned char temp[16];
     if( mode != MBEDTLS_CAMELLIA_ENCRYPT && mode != MBEDTLS_CAMELLIA_DECRYPT )
         return MBEDTLS_ERR_CAMELLIA_BAD_INPUT_DATA;
@@ -541,8 +540,7 @@
             memcpy( temp, input, 16 );
             mbedtls_camellia_crypt_ecb( ctx, mode, input, output );
 
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 16 );
 
             memcpy( iv, temp, 16 );
 
@@ -555,8 +553,7 @@
     {
         while( length > 0 )
         {
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 16 );
 
             mbedtls_camellia_crypt_ecb( ctx, mode, output, output );
             memcpy( iv, output, 16 );
diff --git a/library/ccm.c b/library/ccm.c
index 3edfba3..065eb60 100644
--- a/library/ccm.c
+++ b/library/ccm.c
@@ -112,7 +112,6 @@
                               const unsigned char *input,
                               unsigned char *output )
 {
-    size_t i;
     size_t olen = 0;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char tmp_buf[16] = {0};
@@ -125,8 +124,7 @@
         return ret;
     }
 
-    for( i = 0; i < use_len; i++ )
-        output[i] = input[i] ^ tmp_buf[offset + i];
+    mbedtls_xor( output, input, tmp_buf + offset, use_len );
 
     mbedtls_platform_zeroize(tmp_buf, sizeof(tmp_buf));
     return ret;
@@ -144,7 +142,7 @@
     unsigned char i;
     size_t len_left, olen;
 
-    /* length calulcation can be done only after both
+    /* length calculation can be done only after both
      * mbedtls_ccm_starts() and mbedtls_ccm_set_lengths() have been executed
      */
     if( !(ctx->state & CCM_STATE__STARTED) || !(ctx->state & CCM_STATE__LENGTHS_SET) )
@@ -269,7 +267,6 @@
                            size_t add_len )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    unsigned char i;
     size_t olen, use_len, offset;
 
     if( ctx->state & CCM_STATE__ERROR )
@@ -310,8 +307,7 @@
             if( use_len > add_len )
                 use_len = add_len;
 
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i + offset] ^= add[i];
+            mbedtls_xor( ctx->y + offset, ctx->y + offset, add, use_len );
 
             ctx->processed += use_len;
             add_len -= use_len;
@@ -381,8 +377,7 @@
         if( ctx->mode == MBEDTLS_CCM_ENCRYPT || \
             ctx->mode == MBEDTLS_CCM_STAR_ENCRYPT )
         {
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i + offset] ^= input[i];
+            mbedtls_xor( ctx->y + offset, ctx->y + offset, input, use_len );
 
             if( use_len + offset == 16 || ctx->processed == ctx->plaintext_len )
             {
@@ -411,8 +406,7 @@
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i + offset] ^= local_output[i];
+            mbedtls_xor( ctx->y + offset, ctx->y + offset, local_output, use_len );
 
             memcpy( output, local_output, use_len );
             mbedtls_platform_zeroize( local_output, 16 );
diff --git a/library/chacha20.c b/library/chacha20.c
index 85d7461..d17c58c 100644
--- a/library/chacha20.c
+++ b/library/chacha20.c
@@ -217,7 +217,6 @@
                               unsigned char *output )
 {
     size_t offset = 0U;
-    size_t i;
 
     /* Use leftover keystream bytes, if available */
     while( size > 0U && ctx->keystream_bytes_used < CHACHA20_BLOCK_SIZE_BYTES )
@@ -237,17 +236,7 @@
         chacha20_block( ctx->state, ctx->keystream8 );
         ctx->state[CHACHA20_CTR_INDEX]++;
 
-        for( i = 0U; i < 64U; i += 8U )
-        {
-            output[offset + i  ] = input[offset + i  ] ^ ctx->keystream8[i  ];
-            output[offset + i+1] = input[offset + i+1] ^ ctx->keystream8[i+1];
-            output[offset + i+2] = input[offset + i+2] ^ ctx->keystream8[i+2];
-            output[offset + i+3] = input[offset + i+3] ^ ctx->keystream8[i+3];
-            output[offset + i+4] = input[offset + i+4] ^ ctx->keystream8[i+4];
-            output[offset + i+5] = input[offset + i+5] ^ ctx->keystream8[i+5];
-            output[offset + i+6] = input[offset + i+6] ^ ctx->keystream8[i+6];
-            output[offset + i+7] = input[offset + i+7] ^ ctx->keystream8[i+7];
-        }
+        mbedtls_xor( output + offset, input + offset, ctx->keystream8, 64U );
 
         offset += CHACHA20_BLOCK_SIZE_BYTES;
         size   -= CHACHA20_BLOCK_SIZE_BYTES;
@@ -260,10 +249,7 @@
         chacha20_block( ctx->state, ctx->keystream8 );
         ctx->state[CHACHA20_CTR_INDEX]++;
 
-        for( i = 0U; i < size; i++)
-        {
-            output[offset + i] = input[offset + i] ^ ctx->keystream8[i];
-        }
+        mbedtls_xor( output + offset, input + offset, ctx->keystream8, size );
 
         ctx->keystream_bytes_used = size;
 
diff --git a/library/cmac.c b/library/cmac.c
index 3cc49d1..9870856 100644
--- a/library/cmac.c
+++ b/library/cmac.c
@@ -148,15 +148,6 @@
 #endif /* !defined(MBEDTLS_CMAC_ALT) || defined(MBEDTLS_SELF_TEST) */
 
 #if !defined(MBEDTLS_CMAC_ALT)
-static void cmac_xor_block( unsigned char *output, const unsigned char *input1,
-                            const unsigned char *input2,
-                            const size_t block_size )
-{
-    size_t idx;
-
-    for( idx = 0; idx < block_size; idx++ )
-        output[ idx ] = input1[ idx ] ^ input2[ idx ];
-}
 
 /*
  * Create padded last block from (partial) last block.
@@ -247,7 +238,7 @@
                 input,
                 block_size - cmac_ctx->unprocessed_len );
 
-        cmac_xor_block( state, cmac_ctx->unprocessed_block, state, block_size );
+        mbedtls_xor( state, cmac_ctx->unprocessed_block, state, block_size );
 
         if( ( ret = mbedtls_cipher_update( ctx, state, block_size, state,
                                            &olen ) ) != 0 )
@@ -267,7 +258,7 @@
      * final partial or complete block */
     for( j = 1; j < n; j++ )
     {
-        cmac_xor_block( state, input, state, block_size );
+        mbedtls_xor( state, input, state, block_size );
 
         if( ( ret = mbedtls_cipher_update( ctx, state, block_size, state,
                                            &olen ) ) != 0 )
@@ -319,16 +310,16 @@
     if( cmac_ctx->unprocessed_len < block_size )
     {
         cmac_pad( M_last, block_size, last_block, cmac_ctx->unprocessed_len );
-        cmac_xor_block( M_last, M_last, K2, block_size );
+        mbedtls_xor( M_last, M_last, K2, block_size );
     }
     else
     {
         /* Last block is complete block */
-        cmac_xor_block( M_last, last_block, K1, block_size );
+        mbedtls_xor( M_last, last_block, K1, block_size );
     }
 
 
-    cmac_xor_block( state, M_last, state, block_size );
+    mbedtls_xor( state, M_last, state, block_size );
     if( ( ret = mbedtls_cipher_update( ctx, state, block_size, state,
                                        &olen ) ) != 0 )
     {
diff --git a/library/common.h b/library/common.h
index 25d5294..9d3b8fe 100644
--- a/library/common.h
+++ b/library/common.h
@@ -24,9 +24,11 @@
 #define MBEDTLS_LIBRARY_COMMON_H
 
 #include "mbedtls/build_info.h"
+#include "alignment.h"
 
 #include <stddef.h>
 #include <stdint.h>
+#include <stddef.h>
 
 /** Helper to define a function as static except when building invasive tests.
  *
@@ -107,327 +109,30 @@
     return( p == NULL ? NULL : p + n );
 }
 
-/** Byte Reading Macros
- *
- * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
- * byte from x, where byte 0 is the least significant byte.
- */
-#define MBEDTLS_BYTE_0( x ) ( (uint8_t) (   ( x )         & 0xff ) )
-#define MBEDTLS_BYTE_1( x ) ( (uint8_t) ( ( ( x ) >> 8  ) & 0xff ) )
-#define MBEDTLS_BYTE_2( x ) ( (uint8_t) ( ( ( x ) >> 16 ) & 0xff ) )
-#define MBEDTLS_BYTE_3( x ) ( (uint8_t) ( ( ( x ) >> 24 ) & 0xff ) )
-#define MBEDTLS_BYTE_4( x ) ( (uint8_t) ( ( ( x ) >> 32 ) & 0xff ) )
-#define MBEDTLS_BYTE_5( x ) ( (uint8_t) ( ( ( x ) >> 40 ) & 0xff ) )
-#define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
-#define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
-
 /**
- * Get the unsigned 32 bits integer corresponding to four bytes in
- * big-endian order (MSB first).
+ * Perform a fast block XOR operation, such that
+ * r[i] = a[i] ^ b[i] where 0 <= i < n
  *
- * \param   data    Base address of the memory to get the four bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the four bytes to build the 32 bits unsigned
- *                  integer from.
+ * \param   r Pointer to result (buffer of at least \p n bytes). \p r
+ *            may be equal to either \p a or \p b, but behaviour when
+ *            it overlaps in other ways is undefined.
+ * \param   a Pointer to input (buffer of at least \p n bytes)
+ * \param   b Pointer to input (buffer of at least \p n bytes)
+ * \param   n Number of bytes to process.
  */
-#ifndef MBEDTLS_GET_UINT32_BE
-#define MBEDTLS_GET_UINT32_BE( data , offset )                  \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ] << 24 )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 3]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 32 bits unsigned integer in big-endian order.
- *
- * \param   n       32 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 32
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 32 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT32_BE
-#define MBEDTLS_PUT_UINT32_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_0( n );             \
+inline void mbedtls_xor( unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n )
+{
+    size_t i;
+    for ( i = 0; ( i + 4 ) <= n; i += 4 )
+    {
+        uint32_t x = mbedtls_get_unaligned_uint32( a + i ) ^ mbedtls_get_unaligned_uint32( b + i );
+        mbedtls_put_unaligned_uint32( r + i, x );
+    }
+    for ( ; i < n; i++ )
+    {
+        r[i] = a[i] ^ b[i];
+    }
 }
-#endif
-
-/**
- * Get the unsigned 32 bits integer corresponding to four bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the four bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the four bytes to build the 32 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT32_LE
-#define MBEDTLS_GET_UINT32_LE( data, offset )                   \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ]       )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 3] << 24 )         \
-    )
-#endif
-
-/**
- * Put in memory a 32 bits unsigned integer in little-endian order.
- *
- * \param   n       32 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 32
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 32 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT32_LE
-#define MBEDTLS_PUT_UINT32_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 16 bits integer corresponding to two bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the two bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the two bytes to build the 16 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT16_LE
-#define MBEDTLS_GET_UINT16_LE( data, offset )                   \
-    (                                                           \
-          ( (uint16_t) ( data )[( offset )    ]       )         \
-        | ( (uint16_t) ( data )[( offset ) + 1] <<  8 )         \
-    )
-#endif
-
-/**
- * Put in memory a 16 bits unsigned integer in little-endian order.
- *
- * \param   n       16 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 16
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 16 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT16_LE
-#define MBEDTLS_PUT_UINT16_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 16 bits integer corresponding to two bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the two bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the two bytes to build the 16 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT16_BE
-#define MBEDTLS_GET_UINT16_BE( data, offset )                   \
-    (                                                           \
-          ( (uint16_t) ( data )[( offset )    ] << 8 )          \
-        | ( (uint16_t) ( data )[( offset ) + 1]      )          \
-    )
-#endif
-
-/**
- * Put in memory a 16 bits unsigned integer in big-endian order.
- *
- * \param   n       16 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 16
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 16 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT16_BE
-#define MBEDTLS_PUT_UINT16_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 24 bits integer corresponding to three bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the three bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the three bytes to build the 24 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT24_BE
-#define MBEDTLS_GET_UINT24_BE( data , offset )                  \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] << 8  )         \
-        | ( (uint32_t) ( data )[( offset ) + 2]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 24 bits unsigned integer in big-endian order.
- *
- * \param   n       24 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 24
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 24 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT24_BE
-#define MBEDTLS_PUT_UINT24_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 24 bits integer corresponding to three bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the three bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the three bytes to build the 24 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT24_LE
-#define MBEDTLS_GET_UINT24_LE( data, offset )                   \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ]       )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
-    )
-#endif
-
-/**
- * Put in memory a 24 bits unsigned integer in little-endian order.
- *
- * \param   n       24 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 24
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 24 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT24_LE
-#define MBEDTLS_PUT_UINT24_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 64 bits integer corresponding to eight bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the eight bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the eight bytes to build the 64 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT64_BE
-#define MBEDTLS_GET_UINT64_BE( data, offset )                   \
-    (                                                           \
-          ( (uint64_t) ( data )[( offset )    ] << 56 )         \
-        | ( (uint64_t) ( data )[( offset ) + 1] << 48 )         \
-        | ( (uint64_t) ( data )[( offset ) + 2] << 40 )         \
-        | ( (uint64_t) ( data )[( offset ) + 3] << 32 )         \
-        | ( (uint64_t) ( data )[( offset ) + 4] << 24 )         \
-        | ( (uint64_t) ( data )[( offset ) + 5] << 16 )         \
-        | ( (uint64_t) ( data )[( offset ) + 6] <<  8 )         \
-        | ( (uint64_t) ( data )[( offset ) + 7]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 64 bits unsigned integer in big-endian order.
- *
- * \param   n       64 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 64
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 64 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT64_BE
-#define MBEDTLS_PUT_UINT64_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_7( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_6( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_5( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_4( n );             \
-    ( data )[( offset ) + 4] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 5] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 6] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 7] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 64 bits integer corresponding to eight bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the eight bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the eight bytes to build the 64 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT64_LE
-#define MBEDTLS_GET_UINT64_LE( data, offset )                   \
-    (                                                           \
-          ( (uint64_t) ( data )[( offset ) + 7] << 56 )         \
-        | ( (uint64_t) ( data )[( offset ) + 6] << 48 )         \
-        | ( (uint64_t) ( data )[( offset ) + 5] << 40 )         \
-        | ( (uint64_t) ( data )[( offset ) + 4] << 32 )         \
-        | ( (uint64_t) ( data )[( offset ) + 3] << 24 )         \
-        | ( (uint64_t) ( data )[( offset ) + 2] << 16 )         \
-        | ( (uint64_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint64_t) ( data )[( offset )    ]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 64 bits unsigned integer in little-endian order.
- *
- * \param   n       64 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 64
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 64 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT64_LE
-#define MBEDTLS_PUT_UINT64_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 4] = MBEDTLS_BYTE_4( n );             \
-    ( data )[( offset ) + 5] = MBEDTLS_BYTE_5( n );             \
-    ( data )[( offset ) + 6] = MBEDTLS_BYTE_6( n );             \
-    ( data )[( offset ) + 7] = MBEDTLS_BYTE_7( n );             \
-}
-#endif
 
 /* Fix MSVC C99 compatible issue
  *      MSVC support __func__ from visual studio 2015( 1900 )
diff --git a/library/ctr_drbg.c b/library/ctr_drbg.c
index 71c48af..f5c5e7b 100644
--- a/library/ctr_drbg.c
+++ b/library/ctr_drbg.c
@@ -174,8 +174,7 @@
 
         while( use_len > 0 )
         {
-            for( i = 0; i < MBEDTLS_CTR_DRBG_BLOCKSIZE; i++ )
-                chain[i] ^= p[i];
+            mbedtls_xor( chain, chain, p, MBEDTLS_CTR_DRBG_BLOCKSIZE );
             p += MBEDTLS_CTR_DRBG_BLOCKSIZE;
             use_len -= ( use_len >= MBEDTLS_CTR_DRBG_BLOCKSIZE ) ?
                        MBEDTLS_CTR_DRBG_BLOCKSIZE : use_len;
diff --git a/library/debug.c b/library/debug.c
index 6114a46..78ce9ce 100644
--- a/library/debug.c
+++ b/library/debug.c
@@ -107,7 +107,7 @@
     /*
      * With non-blocking I/O and examples that just retry immediately,
      * the logs would be quickly flooded with WANT_READ, so ignore that.
-     * Don't ignore WANT_WRITE however, since is is usually rare.
+     * Don't ignore WANT_WRITE however, since it is usually rare.
      */
     if( ret == MBEDTLS_ERR_SSL_WANT_READ )
         return;
diff --git a/library/des.c b/library/des.c
index 65f5681..c56d4d4 100644
--- a/library/des.c
+++ b/library/des.c
@@ -635,7 +635,6 @@
                     const unsigned char *input,
                     unsigned char *output )
 {
-    int i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char temp[8];
 
@@ -646,8 +645,7 @@
     {
         while( length > 0 )
         {
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 8 );
 
             ret = mbedtls_des_crypt_ecb( ctx, output, output );
             if( ret != 0 )
@@ -668,8 +666,7 @@
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 8 );
 
             memcpy( iv, temp, 8 );
 
@@ -741,7 +738,6 @@
                      const unsigned char *input,
                      unsigned char *output )
 {
-    int i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char temp[8];
 
@@ -752,8 +748,7 @@
     {
         while( length > 0 )
         {
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 8 );
 
             ret = mbedtls_des3_crypt_ecb( ctx, output, output );
             if( ret != 0 )
@@ -774,8 +769,7 @@
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 8 );
 
             memcpy( iv, temp, 8 );
 
diff --git a/library/entropy.c b/library/entropy.c
index 1e0d9d3..545fd9d 100644
--- a/library/entropy.c
+++ b/library/entropy.c
@@ -564,7 +564,7 @@
 }
 
 /*
- * A test to ensure hat the entropy sources are functioning correctly
+ * A test to ensure that the entropy sources are functioning correctly
  * and there is no obvious failure. The test performs the following checks:
  *  - The entropy source is not providing only 0s (all bits unset) or 1s (all
  *    bits set).
diff --git a/library/gcm.c b/library/gcm.c
index f004a73c..0178b5b 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -235,7 +235,6 @@
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char work_buf[16];
-    size_t i;
     const unsigned char *p;
     size_t use_len, olen = 0;
     uint64_t iv_bits;
@@ -268,8 +267,7 @@
         {
             use_len = ( iv_len < 16 ) ? iv_len : 16;
 
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i] ^= p[i];
+            mbedtls_xor( ctx->y, ctx->y, p, use_len );
 
             gcm_mult( ctx, ctx->y, ctx->y );
 
@@ -277,8 +275,7 @@
             p += use_len;
         }
 
-        for( i = 0; i < 16; i++ )
-            ctx->y[i] ^= work_buf[i];
+        mbedtls_xor( ctx->y, ctx->y, work_buf, 16);
 
         gcm_mult( ctx, ctx->y, ctx->y );
     }
@@ -313,7 +310,7 @@
                            const unsigned char *add, size_t add_len )
 {
     const unsigned char *p;
-    size_t use_len, i, offset;
+    size_t use_len, offset;
 
     /* IV is limited to 2^64 bits, so 2^61 bytes */
     if( (uint64_t) add_len >> 61 != 0 )
@@ -328,8 +325,7 @@
         if( use_len > add_len )
             use_len = add_len;
 
-        for( i = 0; i < use_len; i++ )
-            ctx->buf[i+offset] ^= p[i];
+        mbedtls_xor( ctx->buf + offset, ctx->buf + offset, p, use_len );
 
         if( offset + use_len == 16 )
             gcm_mult( ctx, ctx->buf, ctx->buf );
@@ -343,8 +339,7 @@
 
     while( add_len >= 16 )
     {
-        for( i = 0; i < 16; i++ )
-            ctx->buf[i] ^= p[i];
+        mbedtls_xor( ctx->buf, ctx->buf, p, 16 );
 
         gcm_mult( ctx, ctx->buf, ctx->buf );
 
@@ -354,8 +349,7 @@
 
     if( add_len > 0 )
     {
-        for( i = 0; i < add_len; i++ )
-            ctx->buf[i] ^= p[i];
+        mbedtls_xor( ctx->buf, ctx->buf, p, add_len );
     }
 
     return( 0 );
@@ -378,7 +372,6 @@
                      const unsigned char *input,
                      unsigned char *output )
 {
-    size_t i;
     size_t olen = 0;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
@@ -389,14 +382,12 @@
         return( ret );
     }
 
-    for( i = 0; i < use_len; i++ )
-    {
-        if( ctx->mode == MBEDTLS_GCM_DECRYPT )
-            ctx->buf[offset + i] ^= input[i];
-        output[i] = ectr[offset + i] ^ input[i];
-        if( ctx->mode == MBEDTLS_GCM_ENCRYPT )
-            ctx->buf[offset + i] ^= output[i];
-    }
+    if( ctx->mode == MBEDTLS_GCM_DECRYPT )
+        mbedtls_xor( ctx->buf + offset, ctx->buf + offset, input, use_len );
+    mbedtls_xor( output, ectr + offset, input, use_len );
+    if( ctx->mode == MBEDTLS_GCM_ENCRYPT )
+        mbedtls_xor( ctx->buf + offset, ctx->buf + offset, output, use_len );
+
     return( 0 );
 }
 
@@ -489,7 +480,6 @@
                         unsigned char *tag, size_t tag_len )
 {
     unsigned char work_buf[16];
-    size_t i;
     uint64_t orig_len;
     uint64_t orig_add_len;
 
@@ -524,13 +514,11 @@
         MBEDTLS_PUT_UINT32_BE( ( orig_len     >> 32 ), work_buf, 8  );
         MBEDTLS_PUT_UINT32_BE( ( orig_len           ), work_buf, 12 );
 
-        for( i = 0; i < 16; i++ )
-            ctx->buf[i] ^= work_buf[i];
+        mbedtls_xor( ctx->buf, ctx->buf, work_buf, 16 );
 
         gcm_mult( ctx, ctx->buf, ctx->buf );
 
-        for( i = 0; i < tag_len; i++ )
-            tag[i] ^= ctx->buf[i];
+        mbedtls_xor( tag, tag, ctx->buf, tag_len );
     }
 
     return( 0 );
diff --git a/library/lmots.h b/library/lmots.h
index 39e8699..022dcf3 100644
--- a/library/lmots.h
+++ b/library/lmots.h
@@ -139,7 +139,7 @@
  *                           this public key.
  *
  * \param ctx                The initialized LMOTS context that contains the
- *                           publc key.
+ *                           public key.
  * \param key                The buffer into which the key will be output. Must
  *                           be at least #MBEDTLS_LMOTS_PUBLIC_KEY_LEN in size.
  *
diff --git a/library/md.c b/library/md.c
index 8efcf10..9c161a5 100644
--- a/library/md.c
+++ b/library/md.c
@@ -633,7 +633,6 @@
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char sum[MBEDTLS_MD_MAX_SIZE];
     unsigned char *ipad, *opad;
-    size_t i;
 
     if( ctx == NULL || ctx->md_info == NULL || ctx->hmac_ctx == NULL )
         return( MBEDTLS_ERR_MD_BAD_INPUT_DATA );
@@ -657,11 +656,8 @@
     memset( ipad, 0x36, ctx->md_info->block_size );
     memset( opad, 0x5C, ctx->md_info->block_size );
 
-    for( i = 0; i < keylen; i++ )
-    {
-        ipad[i] = (unsigned char)( ipad[i] ^ key[i] );
-        opad[i] = (unsigned char)( opad[i] ^ key[i] );
-    }
+    mbedtls_xor( ipad, ipad, key, keylen );
+    mbedtls_xor( opad, opad, key, keylen );
 
     if( ( ret = mbedtls_md_starts( ctx ) ) != 0 )
         goto cleanup;
diff --git a/library/mps_reader.c b/library/mps_reader.c
index 6f823bd..df97b6c 100644
--- a/library/mps_reader.c
+++ b/library/mps_reader.c
@@ -530,7 +530,7 @@
          * of the accumulator. */
         memmove( acc, acc + acc_backup_offset, acc_backup_len );
 
-        /* Copy uncmmitted parts of the current fragment to the
+        /* Copy uncommitted parts of the current fragment to the
          * accumulator. */
         memcpy( acc + acc_backup_len,
                 frag + frag_backup_offset, frag_backup_len );
diff --git a/library/pkcs5.c b/library/pkcs5.c
index ac5945a..1e3b17e 100644
--- a/library/pkcs5.c
+++ b/library/pkcs5.c
@@ -211,7 +211,6 @@
                               uint32_t key_length, unsigned char *output )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    int j;
     unsigned int i;
     unsigned char md1[MBEDTLS_MD_MAX_SIZE];
     unsigned char work[MBEDTLS_MD_MAX_SIZE];
@@ -263,8 +262,7 @@
 
             // U1 xor U2
             //
-            for( j = 0; j < md_size; j++ )
-                work[j] ^= md1[j];
+            mbedtls_xor( work, work, md1, md_size );
         }
 
         use_len = ( key_length < md_size ) ? key_length : md_size;
@@ -324,7 +322,6 @@
     mbedtls_md_free( &md_ctx );
     return( ret );
 #else
-    int j;
     unsigned int i;
     unsigned char md1[PSA_HASH_MAX_SIZE];
     unsigned char work[PSA_HASH_MAX_SIZE];
@@ -396,8 +393,7 @@
 
             // U1 xor U2
             //
-            for( j = 0; j < md_size; j++ )
-                work[j] ^= md1[j];
+            mbedtls_xor( work, work, md1, md_size );
         }
 
         use_len = ( key_length < md_size ) ? key_length : md_size;
diff --git a/library/pkcs7.c b/library/pkcs7.c
index e4238b6..5b22afa 100644
--- a/library/pkcs7.c
+++ b/library/pkcs7.c
@@ -314,7 +314,7 @@
     if( ret != 0 )
         goto out;
 
-    /* Asssume authenticatedAttributes is nonexistent */
+    /* Assume authenticatedAttributes is nonexistent */
 
     ret = pkcs7_get_digest_algorithm( p, end_signer, &signer->sig_alg_identifier );
     if( ret != 0 )
diff --git a/library/platform_util.c b/library/platform_util.c
index 916a7f4..2b674f6 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -143,3 +143,20 @@
 void (*mbedtls_test_hook_test_fail)( const char *, int, const char *);
 #endif /* MBEDTLS_TEST_HOOKS */
 
+/*
+ * Provide external definitions of some inline functions so that the compiler
+ * has the option to not inline them
+ */
+extern inline void mbedtls_xor( unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n );
+
+extern inline uint16_t mbedtls_get_unaligned_uint16( const void *p );
+
+extern inline void mbedtls_put_unaligned_uint16( void *p, uint16_t x );
+
+extern inline uint32_t mbedtls_get_unaligned_uint32( const void *p );
+
+extern inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x );
+
+extern inline uint64_t mbedtls_get_unaligned_uint64( const void *p );
+
+extern inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x );
diff --git a/library/psa_crypto.c b/library/psa_crypto.c
index c73f342..cb5791f 100644
--- a/library/psa_crypto.c
+++ b/library/psa_crypto.c
@@ -5998,7 +5998,7 @@
     if( status != PSA_SUCCESS )
         return( status );
     /* Breaking up a request into smaller chunks is currently not supported
-     * for the extrernal RNG interface. */
+     * for the external RNG interface. */
     if( output_length != output_size )
         return( PSA_ERROR_INSUFFICIENT_ENTROPY );
     return( PSA_SUCCESS );
diff --git a/library/psa_crypto_aead.h b/library/psa_crypto_aead.h
index 17b3953..70f714a 100644
--- a/library/psa_crypto_aead.h
+++ b/library/psa_crypto_aead.h
@@ -508,4 +508,4 @@
 psa_status_t mbedtls_psa_aead_abort(
     mbedtls_psa_aead_operation_t *operation );
 
-#endif /* PSA_CRYPTO_AEAD */
+#endif /* PSA_CRYPTO_AEAD_H */
diff --git a/library/psa_crypto_its.h b/library/psa_crypto_its.h
index 3a3f49a..1b8dc20 100644
--- a/library/psa_crypto_its.h
+++ b/library/psa_crypto_its.h
@@ -73,7 +73,7 @@
  * \return      A status indicating the success/failure of the operation
  *
  * \retval      #PSA_SUCCESS                     The operation completed successfully
- * \retval      #PSA_ERROR_NOT_PERMITTED         The operation failed because the provided `uid` value was already created with PSA_STORAGE_WRITE_ONCE_FLAG
+ * \retval      #PSA_ERROR_NOT_PERMITTED         The operation failed because the provided `uid` value was already created with PSA_STORAGE_FLAG_WRITE_ONCE
  * \retval      #PSA_ERROR_NOT_SUPPORTED         The operation failed because one or more of the flags provided in `create_flags` is not supported or is not valid
  * \retval      #PSA_ERROR_INSUFFICIENT_STORAGE  The operation failed because there was insufficient space on the storage medium
  * \retval      #PSA_ERROR_STORAGE_FAILURE       The operation failed because the physical storage has failed (Fatal error)
@@ -137,7 +137,7 @@
  *
  * \retval      #PSA_SUCCESS                  The operation completed successfully
  * \retval      #PSA_ERROR_DOES_NOT_EXIST     The operation failed because the provided key value was not found in the storage
- * \retval      #PSA_ERROR_NOT_PERMITTED      The operation failed because the provided key value was created with PSA_STORAGE_WRITE_ONCE_FLAG
+ * \retval      #PSA_ERROR_NOT_PERMITTED      The operation failed because the provided key value was created with PSA_STORAGE_FLAG_WRITE_ONCE
  * \retval      #PSA_ERROR_STORAGE_FAILURE    The operation failed because the physical storage has failed (Fatal error)
  */
 psa_status_t psa_its_remove(psa_storage_uid_t uid);
diff --git a/library/psa_crypto_rsa.h b/library/psa_crypto_rsa.h
index 197caa8..5835c6f 100644
--- a/library/psa_crypto_rsa.h
+++ b/library/psa_crypto_rsa.h
@@ -249,7 +249,7 @@
  * \retval #PSA_ERROR_INSUFFICIENT_MEMORY
  * \retval #PSA_ERROR_COMMUNICATION_FAILURE
  * \retval #PSA_ERROR_HARDWARE_FAILURE
- * \retval #PSA_ERROR_TAMPERING_DETECTED
+ * \retval #PSA_ERROR_CORRUPTION_DETECTED
  * \retval #PSA_ERROR_INSUFFICIENT_ENTROPY
  * \retval #PSA_ERROR_BAD_STATE
  *         The library has not been previously initialized by psa_crypto_init().
@@ -306,7 +306,7 @@
  * \retval #PSA_ERROR_INSUFFICIENT_MEMORY
  * \retval #PSA_ERROR_COMMUNICATION_FAILURE
  * \retval #PSA_ERROR_HARDWARE_FAILURE
- * \retval #PSA_ERROR_TAMPERING_DETECTED
+ * \retval #PSA_ERROR_CORRUPTION_DETECTED
  * \retval #PSA_ERROR_INSUFFICIENT_ENTROPY
  * \retval #PSA_ERROR_INVALID_PADDING
  * \retval #PSA_ERROR_BAD_STATE
diff --git a/library/psa_crypto_se.h b/library/psa_crypto_se.h
index 549dfb6..693c3ea 100644
--- a/library/psa_crypto_se.h
+++ b/library/psa_crypto_se.h
@@ -139,7 +139,7 @@
     psa_se_drv_table_entry_t *driver,
     psa_key_slot_number_t *slot_number );
 
-/** Destoy a key in a secure element.
+/** Destroy a key in a secure element.
  *
  * This function calls the relevant driver method to destroy a key
  * and updates the driver's persistent data.
diff --git a/library/ssl_client.c b/library/ssl_client.c
index 6d3789d..e838845 100644
--- a/library/ssl_client.c
+++ b/library/ssl_client.c
@@ -788,7 +788,7 @@
 
     /*
      * Generate the random bytes, except when responding to a verify request
-     * where we MUST reuse the previoulsy generated random bytes
+     * where we MUST reuse the previously generated random bytes
      * (RFC 6347 4.2.1).
      */
 #if defined(MBEDTLS_SSL_PROTO_DTLS)
diff --git a/library/ssl_misc.h b/library/ssl_misc.h
index ab667a2..89a2517 100644
--- a/library/ssl_misc.h
+++ b/library/ssl_misc.h
@@ -143,7 +143,7 @@
               MBEDTLS_SSL_EXT_MASK( TRUNCATED_HMAC )                         | \
               MBEDTLS_SSL_EXT_MASK( UNRECOGNIZED ) )
 
-/* RFC 8446 section 4.2. Allowed extensions for ClienHello */
+/* RFC 8446 section 4.2. Allowed extensions for ClientHello */
 #define MBEDTLS_SSL_TLS1_3_ALLOWED_EXTS_OF_CH                                  \
             ( MBEDTLS_SSL_EXT_MASK( SERVERNAME )                             | \
               MBEDTLS_SSL_EXT_MASK( MAX_FRAGMENT_LENGTH )                    | \
diff --git a/library/ssl_msg.c b/library/ssl_msg.c
index 040dc80..753998e 100644
--- a/library/ssl_msg.c
+++ b/library/ssl_msg.c
@@ -669,15 +669,12 @@
                                     unsigned char const *dynamic_iv,
                                     size_t dynamic_iv_len )
 {
-    size_t i;
-
     /* Start with Fixed IV || 0 */
     memset( dst_iv, 0, dst_iv_len );
     memcpy( dst_iv, fixed_iv, fixed_iv_len );
 
     dst_iv += dst_iv_len - dynamic_iv_len;
-    for( i = 0; i < dynamic_iv_len; i++ )
-        dst_iv[i] ^= dynamic_iv[i];
+    mbedtls_xor( dst_iv, dst_iv, dynamic_iv, dynamic_iv_len );
 }
 #endif /* MBEDTLS_GCM_C || MBEDTLS_CCM_C || MBEDTLS_CHACHAPOLY_C */
 
@@ -691,7 +688,7 @@
     int auth_done = 0;
     unsigned char * data;
     /* For an explanation of the additional data length see
-    * the descrpition of ssl_extract_add_data_from_record().
+    * the description of ssl_extract_add_data_from_record().
     */
 #if defined(MBEDTLS_SSL_DTLS_CONNECTION_ID)
     unsigned char add_data[23 + MBEDTLS_SSL_CID_OUT_LEN_MAX];
@@ -1289,7 +1286,7 @@
 #endif
     unsigned char* data;
     /* For an explanation of the additional data length see
-    * the descrpition of ssl_extract_add_data_from_record().
+    * the description of ssl_extract_add_data_from_record().
     */
 #if defined(MBEDTLS_SSL_DTLS_CONNECTION_ID)
     unsigned char add_data[23 + MBEDTLS_SSL_CID_IN_LEN_MAX];
@@ -4098,7 +4095,7 @@
     if( hs == NULL )
         return( -1 );
 
-    MBEDTLS_SSL_DEBUG_MSG( 2, ( "=> ssl_load_buffered_messsage" ) );
+    MBEDTLS_SSL_DEBUG_MSG( 2, ( "=> ssl_load_buffered_message" ) );
 
     if( ssl->state == MBEDTLS_SSL_CLIENT_CHANGE_CIPHER_SPEC ||
         ssl->state == MBEDTLS_SSL_SERVER_CHANGE_CIPHER_SPEC )
diff --git a/library/ssl_tls.c b/library/ssl_tls.c
index 83f2b3c..9bb9dc2 100644
--- a/library/ssl_tls.c
+++ b/library/ssl_tls.c
@@ -1704,6 +1704,15 @@
 {
     conf->early_data_enabled = early_data_enabled;
 }
+
+#if defined(MBEDTLS_SSL_SRV_C)
+void mbedtls_ssl_tls13_conf_max_early_data_size(
+         mbedtls_ssl_config *conf, uint32_t max_early_data_size )
+{
+    conf->max_early_data_size = max_early_data_size;
+}
+#endif /* MBEDTLS_SSL_SRV_C */
+
 #endif /* MBEDTLS_SSL_EARLY_DATA */
 #endif /* MBEDTLS_SSL_PROTO_TLS1_3 */
 
@@ -4241,7 +4250,7 @@
         MBEDTLS_SSL_DEBUG_MSG( 1, ( "There is pending outgoing data" ) );
         return( MBEDTLS_ERR_SSL_BAD_INPUT_DATA );
     }
-    /* Protocol must be DLTS, not TLS */
+    /* Protocol must be DTLS, not TLS */
     if( ssl->conf->transport != MBEDTLS_SSL_TRANSPORT_DATAGRAM )
     {
         MBEDTLS_SSL_DEBUG_MSG( 1, ( "Only DTLS is supported" ) );
@@ -5117,6 +5126,15 @@
 #endif
 
 #if defined(MBEDTLS_SSL_PROTO_TLS1_3)
+
+#if defined(MBEDTLS_SSL_EARLY_DATA)
+    mbedtls_ssl_tls13_conf_early_data( conf, MBEDTLS_SSL_EARLY_DATA_DISABLED );
+#if defined(MBEDTLS_SSL_SRV_C)
+    mbedtls_ssl_tls13_conf_max_early_data_size(
+        conf, MBEDTLS_SSL_MAX_EARLY_DATA_SIZE );
+#endif
+#endif /* MBEDTLS_SSL_EARLY_DATA */
+
 #if defined(MBEDTLS_SSL_SRV_C) && defined(MBEDTLS_SSL_SESSION_TICKETS)
     mbedtls_ssl_conf_new_session_tickets(
         conf, MBEDTLS_SSL_TLS1_3_DEFAULT_NEW_SESSION_TICKETS );
diff --git a/library/ssl_tls12_client.c b/library/ssl_tls12_client.c
index 0444f27..76588d3 100644
--- a/library/ssl_tls12_client.c
+++ b/library/ssl_tls12_client.c
@@ -1976,8 +1976,8 @@
     }
 
     /*
-     * Note: we currently ignore the PKS identity hint, as we only allow one
-     * PSK to be provisionned on the client. This could be changed later if
+     * Note: we currently ignore the PSK identity hint, as we only allow one
+     * PSK to be provisioned on the client. This could be changed later if
      * someone needs that feature.
      */
     *p += len;
diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c
index ce8767c..6caae89 100644
--- a/library/ssl_tls13_server.c
+++ b/library/ssl_tls13_server.c
@@ -634,7 +634,7 @@
 
     if( p_identity_len != identities_end || p_binder_len != binders_end )
     {
-        MBEDTLS_SSL_DEBUG_MSG( 3, ( "pre_shared_key extesion decode error" ) );
+        MBEDTLS_SSL_DEBUG_MSG( 3, ( "pre_shared_key extension decode error" ) );
         MBEDTLS_SSL_PEND_FATAL_ALERT( MBEDTLS_SSL_ALERT_MSG_DECODE_ERROR,
                                       MBEDTLS_ERR_SSL_DECODE_ERROR );
         return( MBEDTLS_ERR_SSL_DECODE_ERROR );
diff --git a/library/x509.c b/library/x509.c
index 362e036..be87973 100644
--- a/library/x509.c
+++ b/library/x509.c
@@ -233,7 +233,7 @@
  *
  * RFC 4055 (which defines use of RSASSA-PSS in PKIX) states that the value
  * of trailerField MUST be 1, and PKCS#1 v2.2 doesn't even define any other
- * option. Enfore this at parsing time.
+ * option. Enforce this at parsing time.
  */
 int mbedtls_x509_get_rsassa_pss_params( const mbedtls_x509_buf *params,
                                 mbedtls_md_type_t *md_alg, mbedtls_md_type_t *mgf_md,
diff --git a/library/x509_crl.c b/library/x509_crl.c
index d830fcd..dc2d2e3 100644
--- a/library/x509_crl.c
+++ b/library/x509_crl.c
@@ -1,5 +1,5 @@
 /*
- *  X.509 Certidicate Revocation List (CRL) parsing
+ *  X.509 Certificate Revocation List (CRL) parsing
  *
  *  Copyright The Mbed TLS Contributors
  *  SPDX-License-Identifier: Apache-2.0
diff --git a/programs/pkey/dh_genprime.c b/programs/pkey/dh_genprime.c
index 331838b..3e81d13 100644
--- a/programs/pkey/dh_genprime.c
+++ b/programs/pkey/dh_genprime.c
@@ -42,7 +42,7 @@
 
 #define USAGE \
     "\n usage: dh_genprime param=<>...\n"                                   \
-    "\n acceprable parameters:\n"                                           \
+    "\n acceptable parameters:\n"                                           \
     "    bits=%%d           default: 2048\n"
 
 #define DFL_BITS    2048
diff --git a/programs/psa/aead_demo.c b/programs/psa/aead_demo.c
index c4ed0dd..1efd132 100644
--- a/programs/psa/aead_demo.c
+++ b/programs/psa/aead_demo.c
@@ -104,7 +104,7 @@
 
 /* Run a PSA function and bail out if it fails.
  * The symbolic name of the error code can be recovered using:
- * programs/psa/psa_consant_name status <value> */
+ * programs/psa/psa_constant_name status <value> */
 #define PSA_CHECK( expr )                                       \
     do                                                          \
     {                                                           \
diff --git a/programs/psa/hmac_demo.c b/programs/psa/hmac_demo.c
index 6238892..f949a89 100644
--- a/programs/psa/hmac_demo.c
+++ b/programs/psa/hmac_demo.c
@@ -81,7 +81,7 @@
 
 /* Run a PSA function and bail out if it fails.
  * The symbolic name of the error code can be recovered using:
- * programs/psa/psa_consant_name status <value> */
+ * programs/psa/psa_constant_name status <value> */
 #define PSA_CHECK( expr )                                       \
     do                                                          \
     {                                                           \
diff --git a/programs/psa/key_ladder_demo.c b/programs/psa/key_ladder_demo.c
index 1303719..f40874e 100644
--- a/programs/psa/key_ladder_demo.c
+++ b/programs/psa/key_ladder_demo.c
@@ -713,4 +713,6 @@
     usage( );
     return( EXIT_FAILURE );
 }
-#endif /* MBEDTLS_SHA256_C && MBEDTLS_MD_C && MBEDTLS_AES_C && MBEDTLS_CCM_C && MBEDTLS_PSA_CRYPTO_C && MBEDTLS_FS_IO */
+#endif /* MBEDTLS_SHA256_C && MBEDTLS_MD_C &&
+          MBEDTLS_AES_C && MBEDTLS_CCM_C &&
+          MBEDTLS_PSA_CRYPTO_C && MBEDTLS_FS_IO */
diff --git a/programs/ssl/dtls_client.c b/programs/ssl/dtls_client.c
index 23a34e0..d13ea28 100644
--- a/programs/ssl/dtls_client.c
+++ b/programs/ssl/dtls_client.c
@@ -343,5 +343,5 @@
     mbedtls_exit( ret );
 }
 #endif /* MBEDTLS_SSL_CLI_C && MBEDTLS_SSL_PROTO_DTLS && MBEDTLS_NET_C &&
-          MBEDTLD_TIMING_C && MBEDTLS_ENTROPY_C && MBEDTLS_CTR_DRBG_C &&
+          MBEDTLS_TIMING_C && MBEDTLS_ENTROPY_C && MBEDTLS_CTR_DRBG_C &&
           MBEDTLS_X509_CRT_PARSE_C && MBEDTLS_RSA_C && MBEDTLS_PEM_PARSE_C */
diff --git a/programs/ssl/ssl_server2.c b/programs/ssl/ssl_server2.c
index 1b4a94a..00624b5 100644
--- a/programs/ssl/ssl_server2.c
+++ b/programs/ssl/ssl_server2.c
@@ -129,6 +129,7 @@
 #define DFL_SNI                 NULL
 #define DFL_ALPN_STRING         NULL
 #define DFL_CURVES              NULL
+#define DFL_MAX_EARLY_DATA_SIZE 0
 #define DFL_SIG_ALGS            NULL
 #define DFL_DHM_FILE            NULL
 #define DFL_TRANSPORT           MBEDTLS_SSL_TRANSPORT_STREAM
@@ -424,6 +425,15 @@
 #define USAGE_ECJPAKE ""
 #endif
 
+#if defined(MBEDTLS_SSL_EARLY_DATA)
+#define USAGE_EARLY_DATA \
+    "    max_early_data_size=%%d default: -1 (disabled)\n"             \
+    "                            options: -1 (disabled), "           \
+    "                                     >= 0 (enabled, max amount of early data )\n"
+#else
+#define USAGE_EARLY_DATA ""
+#endif /* MBEDTLS_SSL_EARLY_DATA */
+
 #if defined(MBEDTLS_ECP_C)
 #define USAGE_CURVES \
     "    curves=a,b,c,d      default: \"default\" (library default)\n"  \
@@ -677,6 +687,7 @@
     const char *cid_val_renego; /* the CID to use for incoming messages
                                  * after renegotiation                      */
     int reproducible;           /* make communication reproducible          */
+    uint32_t max_early_data_size; /* max amount of early data               */
     int query_config_mode;      /* whether to read config                   */
     int use_srtp;               /* Support SRTP                             */
     int force_srtp_profile;     /* SRTP protection profile to use or all    */
@@ -1535,6 +1546,9 @@
      };
 #endif /* MBEDTLS_SSL_DTLS_SRTP */
 
+#if defined(MBEDTLS_SSL_EARLY_DATA)
+    int tls13_early_data_enabled = MBEDTLS_SSL_EARLY_DATA_DISABLED;
+#endif
 #if defined(MBEDTLS_MEMORY_BUFFER_ALLOC_C)
     mbedtls_memory_buffer_alloc_init( alloc_buf, sizeof(alloc_buf) );
 #if defined(MBEDTLS_MEMORY_DEBUG)
@@ -1691,6 +1705,7 @@
     opt.sni                 = DFL_SNI;
     opt.alpn_string         = DFL_ALPN_STRING;
     opt.curves              = DFL_CURVES;
+    opt.max_early_data_size = DFL_MAX_EARLY_DATA_SIZE;
     opt.sig_algs            = DFL_SIG_ALGS;
     opt.dhm_file            = DFL_DHM_FILE;
     opt.transport           = DFL_TRANSPORT;
@@ -1881,6 +1896,19 @@
         else if( strcmp( p, "sig_algs" ) == 0 )
             opt.sig_algs = q;
 #endif
+#if defined(MBEDTLS_SSL_EARLY_DATA)
+        else if( strcmp( p, "max_early_data_size" ) == 0 )
+        {
+            long long value = atoll( q );
+            tls13_early_data_enabled =
+                value >= 0 ? MBEDTLS_SSL_EARLY_DATA_ENABLED :
+                             MBEDTLS_SSL_EARLY_DATA_DISABLED;
+            if( tls13_early_data_enabled )
+            {
+                opt.max_early_data_size = atoi( q );
+            }
+        }
+#endif /* MBEDTLS_SSL_EARLY_DATA */
         else if( strcmp( p, "renegotiation" ) == 0 )
         {
             opt.renegotiation = (atoi( q )) ?
@@ -1949,7 +1977,7 @@
              * is not recommended in practice.
              * `psk_or_ephemeral` exists in theory, we need this mode to test if
              * this setting work correctly. With this key exchange setting, server
-             * should always perform `ephemeral` handshake. `psk` or `psk_ephermal`
+             * should always perform `ephemeral` handshake. `psk` or `psk_ephemeral`
              * is not expected.
              */
             else if( strcmp( q, "psk_or_ephemeral" ) == 0 )
@@ -2876,6 +2904,15 @@
     if( opt.cert_req_ca_list != DFL_CERT_REQ_CA_LIST )
         mbedtls_ssl_conf_cert_req_ca_list( &conf, opt.cert_req_ca_list );
 
+#if defined(MBEDTLS_SSL_EARLY_DATA)
+    mbedtls_ssl_tls13_conf_early_data( &conf, tls13_early_data_enabled );
+    if( tls13_early_data_enabled == MBEDTLS_SSL_EARLY_DATA_ENABLED )
+    {
+        mbedtls_ssl_tls13_conf_max_early_data_size(
+            &conf, opt.max_early_data_size );
+    }
+#endif /* MBEDTLS_SSL_EARLY_DATA */
+
 #if defined(MBEDTLS_KEY_EXCHANGE_CERT_REQ_ALLOWED_ENABLED)
     /* exercise setting DN hints for server certificate request
      * (Intended for use where the client cert expected has been signed by
diff --git a/programs/test/benchmark.c b/programs/test/benchmark.c
index ecb093e..6313c52 100644
--- a/programs/test/benchmark.c
+++ b/programs/test/benchmark.c
@@ -416,7 +416,7 @@
     Sleep( alarmMs );
     mbedtls_timing_alarmed = 1;
     /* _endthread will be called implicitly on return
-     * That ensures execution of thread funcition's epilogue */
+     * That ensures execution of thread function's epilogue */
 }
 
 static void mbedtls_set_alarm( int seconds )
diff --git a/programs/test/cmake_package_install/CMakeLists.txt b/programs/test/cmake_package_install/CMakeLists.txt
index 711a1e5..fb5ad51 100644
--- a/programs/test/cmake_package_install/CMakeLists.txt
+++ b/programs/test/cmake_package_install/CMakeLists.txt
@@ -26,7 +26,7 @@
 # Locate the package.
 #
 
-set(MbedTLS_DIR "${MbedTLS_INSTALL_DIR}/cmake")
+list(INSERT CMAKE_PREFIX_PATH 0 "${MbedTLS_INSTALL_DIR}")
 find_package(MbedTLS REQUIRED)
 
 #
diff --git a/programs/test/udp_proxy.c b/programs/test/udp_proxy.c
index e3386d1..ccd1303 100644
--- a/programs/test/udp_proxy.c
+++ b/programs/test/udp_proxy.c
@@ -377,7 +377,7 @@
 
 #if defined(MBEDTLS_TIMING_C)
 /* Return elapsed time in milliseconds since the first call */
-static unsigned ellapsed_time( void )
+static unsigned elapsed_time( void )
 {
     static int initialized = 0;
     static struct mbedtls_timing_hr_time hires;
@@ -413,9 +413,9 @@
     int ret;
 
     mbedtls_printf( "  %05u flush    %s: %u bytes, %u datagrams, last %u ms\n",
-                    ellapsed_time(), buf->description,
+                    elapsed_time(), buf->description,
                     (unsigned) buf->len, buf->num_datagrams,
-                    ellapsed_time() - buf->packet_lifetime );
+                    elapsed_time() - buf->packet_lifetime );
 
     ret = mbedtls_net_send( buf->ctx, buf->data, buf->len );
 
@@ -427,7 +427,7 @@
 
 static unsigned ctx_buffer_time_remaining( ctx_buffer *buf )
 {
-    unsigned const cur_time = ellapsed_time();
+    unsigned const cur_time = elapsed_time();
 
     if( buf->num_datagrams == 0 )
         return( (unsigned) -1 );
@@ -467,7 +467,7 @@
 
     buf->len += len;
     if( ++buf->num_datagrams == 1 )
-        buf->packet_lifetime = ellapsed_time();
+        buf->packet_lifetime = elapsed_time();
 
     return( (int) len );
 }
@@ -517,10 +517,10 @@
 #if defined(MBEDTLS_TIMING_C)
     if( why == NULL )
         mbedtls_printf( "  %05u dispatch %s %s (%u bytes)\n",
-                ellapsed_time(), p->way, p->type, p->len );
+                elapsed_time(), p->way, p->type, p->len );
     else
         mbedtls_printf( "  %05u dispatch %s %s (%u bytes): %s\n",
-                ellapsed_time(), p->way, p->type, p->len, why );
+                elapsed_time(), p->way, p->type, p->len, why );
 #else
     if( why == NULL )
         mbedtls_printf( "        dispatch %s %s (%u bytes)\n",
diff --git a/programs/x509/cert_req.c b/programs/x509/cert_req.c
index 30b389a..4879583 100644
--- a/programs/x509/cert_req.c
+++ b/programs/x509/cert_req.c
@@ -355,7 +355,7 @@
     if( ( ret = write_certificate_request( &req, opt.output_file,
                                            mbedtls_ctr_drbg_random, &ctr_drbg ) ) != 0 )
     {
-        mbedtls_printf( " failed\n  !  write_certifcate_request %d", ret );
+        mbedtls_printf( " failed\n  !  write_certificate_request %d", ret );
         goto exit;
     }
 
diff --git a/programs/x509/cert_write.c b/programs/x509/cert_write.c
index f9366fe..a8910d7 100644
--- a/programs/x509/cert_write.c
+++ b/programs/x509/cert_write.c
@@ -752,7 +752,7 @@
         if( ret != 0 )
         {
             mbedtls_strerror( ret, buf, sizeof(buf) );
-            mbedtls_printf( " failed\n  !  x509write_crt_set_basic_contraints "
+            mbedtls_printf( " failed\n  !  x509write_crt_set_basic_constraints "
                             "returned -0x%04x - %s\n\n", (unsigned int) -ret, buf );
             goto exit;
         }
diff --git a/scripts/code_style.py b/scripts/code_style.py
new file mode 100755
index 0000000..68cd556
--- /dev/null
+++ b/scripts/code_style.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""Check or fix the code style by running Uncrustify.
+
+Note: The code style enforced by this script is not yet introduced to
+Mbed TLS. At present this script will only be used to prepare for a future
+change of code style.
+"""
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import io
+import os
+import subprocess
+import sys
+from typing import List
+
+UNCRUSTIFY_SUPPORTED_VERSION = "0.75.1"
+CONFIG_FILE = ".uncrustify.cfg"
+UNCRUSTIFY_EXE = "uncrustify"
+UNCRUSTIFY_ARGS = ["-c", CONFIG_FILE]
+STDOUT_UTF8 = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+STDERR_UTF8 = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+def print_err(*args):
+    print("Error: ", *args, file=STDERR_UTF8)
+
+def get_src_files() -> List[str]:
+    """
+    Use git ls-files to get a list of the source files
+    """
+    git_ls_files_cmd = ["git", "ls-files",
+                        "*.[hc]",
+                        "tests/suites/*.function",
+                        "scripts/data_files/*.fmt"]
+
+    result = subprocess.run(git_ls_files_cmd, stdout=subprocess.PIPE, \
+            stderr=STDERR_UTF8, check=False)
+
+    if result.returncode != 0:
+        print_err("git ls-files returned: " + str(result.returncode))
+        return []
+    else:
+        src_files = str(result.stdout, "utf-8").split()
+        # Don't correct style for files in 3rdparty/
+        src_files = list(filter( \
+                lambda filename: not filename.startswith("3rdparty/"), \
+                src_files))
+        return src_files
+
+def get_uncrustify_version() -> str:
+    """
+    Get the version string from Uncrustify
+    """
+    result = subprocess.run([UNCRUSTIFY_EXE, "--version"], \
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False)
+    if result.returncode != 0:
+        print_err("Could not get Uncrustify version:", str(result.stderr, "utf-8"))
+        return ""
+    else:
+        return str(result.stdout, "utf-8")
+
+def check_style_is_correct(src_file_list: List[str]) -> bool:
+    """
+    Check the code style and output a diff for each file whose style is
+    incorrect.
+    """
+    style_correct = True
+    for src_file in src_file_list:
+        uncrustify_cmd = [UNCRUSTIFY_EXE] + UNCRUSTIFY_ARGS + [src_file]
+        subprocess.run(uncrustify_cmd, stdout=subprocess.PIPE, \
+                stderr=subprocess.PIPE, check=False)
+
+        # Uncrustify makes changes to the code and places the result in a new
+        # file with the extension ".uncrustify". To get the changes (if any)
+        # simply diff the 2 files.
+        diff_cmd = ["diff", "-u", src_file, src_file + ".uncrustify"]
+        result = subprocess.run(diff_cmd, stdout=subprocess.PIPE, \
+                stderr=STDERR_UTF8, check=False)
+        if len(result.stdout) > 0:
+            print(src_file + " - Incorrect code style.", file=STDOUT_UTF8)
+            print("File changed - diff:", file=STDOUT_UTF8)
+            print(str(result.stdout, "utf-8"), file=STDOUT_UTF8)
+            style_correct = False
+        else:
+            print(src_file + " - OK.", file=STDOUT_UTF8)
+
+        # Tidy up artifact
+        os.remove(src_file + ".uncrustify")
+
+    return style_correct
+
+def fix_style_single_pass(src_file_list: List[str]) -> None:
+    """
+    Run Uncrustify once over the source files.
+    """
+    code_change_args = UNCRUSTIFY_ARGS + ["--no-backup"]
+    for src_file in src_file_list:
+        uncrustify_cmd = [UNCRUSTIFY_EXE] + code_change_args + [src_file]
+        subprocess.run(uncrustify_cmd, check=False, stdout=STDOUT_UTF8, \
+                stderr=STDERR_UTF8)
+
+def fix_style(src_file_list: List[str]) -> int:
+    """
+    Fix the code style. This takes 2 passes of Uncrustify.
+    """
+    fix_style_single_pass(src_file_list)
+    fix_style_single_pass(src_file_list)
+
+    # Guard against future changes that cause the codebase to require
+    # more passes.
+    if not check_style_is_correct(src_file_list):
+        print("Code style still incorrect after second run of Uncrustify.")
+        return 1
+    else:
+        return 0
+
+def main() -> int:
+    """
+    Main with command line arguments.
+    """
+    uncrustify_version = get_uncrustify_version().strip()
+    if UNCRUSTIFY_SUPPORTED_VERSION not in uncrustify_version:
+        print("Warning: Using unsupported Uncrustify version '" \
+                + uncrustify_version + "' (Note: The only supported version" \
+                "is " + UNCRUSTIFY_SUPPORTED_VERSION + ")", file=STDOUT_UTF8)
+
+    src_files = get_src_files()
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--fix', action='store_true', \
+            help='modify source files to fix the code style')
+
+    args = parser.parse_args()
+
+    if args.fix:
+        # Fix mode
+        return fix_style(src_files)
+    else:
+        # Check mode
+        if check_style_is_correct(src_files):
+            return 0
+        else:
+            return 1
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja b/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja
index 3ad92aa..e716e40 100644
--- a/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja
+++ b/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja
@@ -291,7 +291,7 @@
                     alg, hash, hash_length,
                     signature, signature_size, signature_length ) );
     }
-#endif /* PSA_CRYPTO_SE_C */
+#endif /* MBEDTLS_PSA_CRYPTO_SE_C */
 
     psa_status_t status = PSA_ERROR_CORRUPTION_DETECTED;
     psa_key_location_t location =
@@ -375,7 +375,7 @@
                     alg, hash, hash_length,
                     signature, signature_length ) );
     }
-#endif /* PSA_CRYPTO_SE_C */
+#endif /* MBEDTLS_PSA_CRYPTO_SE_C */
 
     psa_status_t status = PSA_ERROR_CORRUPTION_DETECTED;
     psa_key_location_t location =
@@ -647,7 +647,7 @@
 
         return( PSA_SUCCESS );
     }
-#endif /* PSA_CRYPTO_SE_C */
+#endif /* MBEDTLS_PSA_CRYPTO_SE_C */
 
     switch( location )
     {
@@ -715,7 +715,7 @@
                      *( (psa_key_slot_number_t *)key_buffer ),
                      data, data_size, data_length ) );
     }
-#endif /* PSA_CRYPTO_SE_C */
+#endif /* MBEDTLS_PSA_CRYPTO_SE_C */
 
     switch( location )
     {
diff --git a/scripts/mbedtls_dev/bignum_common.py b/scripts/mbedtls_dev/bignum_common.py
index 67ea78d..0339b1a 100644
--- a/scripts/mbedtls_dev/bignum_common.py
+++ b/scripts/mbedtls_dev/bignum_common.py
@@ -99,6 +99,7 @@
     limb_sizes = [32, 64] # type: List[int]
     arities = [1, 2]
     arity = 2
+    suffix = False   # for arity = 1, symbol can be prefix (default) or suffix
 
     def __init__(self, val_a: str, val_b: str = "0", bits_in_limb: int = 32) -> None:
         self.val_a = val_a
@@ -170,7 +171,8 @@
         """
         if not self.case_description:
             if self.arity == 1:
-                self.case_description = "{} {:x}".format(
+                format_string = "{1:x} {0}" if self.suffix else "{0} {1:x}"
+                self.case_description = format_string.format(
                     self.symbol, self.int_a
                 )
             elif self.arity == 2:
@@ -251,6 +253,12 @@
         # provides earlier/more robust input validation.
         self.int_n = hex_to_int(val_n)
 
+    def to_montgomery(self, val: int) -> int:
+        return (val * self.r) % self.int_n
+
+    def from_montgomery(self, val: int) -> int:
+        return (val * self.r_inv) % self.int_n
+
     @property
     def boundary(self) -> int:
         return self.int_n
diff --git a/scripts/mbedtls_dev/bignum_core.py b/scripts/mbedtls_dev/bignum_core.py
index 2960d24..118a659 100644
--- a/scripts/mbedtls_dev/bignum_core.py
+++ b/scripts/mbedtls_dev/bignum_core.py
@@ -759,12 +759,23 @@
     """Test cases for bignum core exponentiation."""
     symbol = "^"
     test_function = "mpi_core_exp_mod"
-    test_name = "Core modular exponentiation"
+    test_name = "Core modular exponentiation (Mongtomery form only)"
     input_style = "fixed"
 
+    def arguments(self) -> List[str]:
+        # Input 'a' has to be given in Montgomery form
+        mont_a = self.to_montgomery(self.int_a)
+        arg_mont_a = self.format_arg('{:x}'.format(mont_a))
+        return [bignum_common.quote_str(n) for n in [self.arg_n,
+                                                     arg_mont_a,
+                                                     self.arg_b]
+               ] + self.result()
+
     def result(self) -> List[str]:
+        # Result has to be given in Montgomery form too
         result = pow(self.int_a, self.int_b, self.int_n)
-        return [self.format_result(result)]
+        mont_result = self.to_montgomery(result)
+        return [self.format_result(mont_result)]
 
     @property
     def is_valid(self) -> bool:
diff --git a/scripts/mbedtls_dev/bignum_data.py b/scripts/mbedtls_dev/bignum_data.py
index 74d21d0..9658933 100644
--- a/scripts/mbedtls_dev/bignum_data.py
+++ b/scripts/mbedtls_dev/bignum_data.py
@@ -18,7 +18,7 @@
 import random
 
 # Functions calling these were used to produce test data and are here only for
-# reproducability, they are not used by the test generation framework/classes
+# reproducibility, they are not used by the test generation framework/classes
 try:
     from Cryptodome.Util.number import isPrime, getPrime #type: ignore #pylint: disable=import-error
 except ImportError:
@@ -90,8 +90,8 @@
                               "4708d9893a973000b54a23020fc5b043d6e4a51519d9c9cc"
                               "52d32377e78131c1")
 
-# Adding 192 bit and 1024 bit numbers because these are the shortest required
-# for ECC and RSA respectively.
+# Adding 192 bit and 1024 bit numbers because these are the shortest required
+# for ECC and RSA respectively.
 INPUTS_DEFAULT = [
         "0", "1", # corner cases
         "2", "3", # small primes
@@ -110,13 +110,21 @@
 # supported for now.
 MODULI_DEFAULT = [
         "53", # safe prime
-        "45", # non-prime
+        "45", # non-prime
         SAFE_PRIME_192_BIT_SEED_1,  # safe prime
         RANDOM_192_BIT_SEED_2_NO4,  # not a prime
         SAFE_PRIME_1024_BIT_SEED_3, # safe prime
         RANDOM_1024_BIT_SEED_4_NO5, # not a prime
         ]
 
+# Some functions, e.g. mbedtls_mpi_mod_raw_inv_prime(), only support prime moduli.
+ONLY_PRIME_MODULI = [
+        "53", # safe prime
+        "8ac72304057392b5",     # 9999999997777777333 (longer, not safe, prime)
+        SAFE_PRIME_192_BIT_SEED_1,  # safe prime
+        SAFE_PRIME_1024_BIT_SEED_3, # safe prime
+        ]
+
 def __gen_safe_prime(bits, seed):
     '''
     Generate a safe prime.
@@ -128,7 +136,7 @@
     randbytes.
     '''
     rng = random.Random()
-    # We want reproducability across python versions
+    # We want reproducibility across python versions
     rng.seed(seed, version=2)
     while True:
         prime = 2*getPrime(bits-1, rng.randbytes)+1 #pylint: disable=no-member
diff --git a/scripts/mbedtls_dev/bignum_mod.py b/scripts/mbedtls_dev/bignum_mod.py
index 81ece07..aa06fe8 100644
--- a/scripts/mbedtls_dev/bignum_mod.py
+++ b/scripts/mbedtls_dev/bignum_mod.py
@@ -34,6 +34,20 @@
 
 # BEGIN MERGE SLOT 3
 
+class BignumModSub(bignum_common.ModOperationCommon, BignumModTarget):
+    """Test cases for bignum mpi_mod_sub()."""
+    symbol = "-"
+    test_function = "mpi_mod_sub"
+    test_name = "mbedtls_mpi_mod_sub"
+    input_style = "fixed"
+    arity = 2
+
+    def result(self) -> List[str]:
+        result = (self.int_a - self.int_b) % self.int_n
+        # To make negative tests easier, append 0 for success to the
+        # generated cases
+        return [self.format_result(result), "0"]
+
 # END MERGE SLOT 3
 
 # BEGIN MERGE SLOT 4
diff --git a/scripts/mbedtls_dev/bignum_mod_raw.py b/scripts/mbedtls_dev/bignum_mod_raw.py
index 0bbad5d..6fc4c91 100644
--- a/scripts/mbedtls_dev/bignum_mod_raw.py
+++ b/scripts/mbedtls_dev/bignum_mod_raw.py
@@ -18,6 +18,7 @@
 
 from . import test_data_generation
 from . import bignum_common
+from .bignum_data import ONLY_PRIME_MODULI
 
 class BignumModRawTarget(test_data_generation.BaseTarget):
     #pylint: disable=abstract-method, too-few-public-methods
@@ -53,6 +54,34 @@
 
 # BEGIN MERGE SLOT 3
 
+class BignumModRawInvPrime(bignum_common.ModOperationCommon,
+                           BignumModRawTarget):
+    """Test cases for bignum mpi_mod_raw_inv_prime()."""
+    moduli = ONLY_PRIME_MODULI
+    symbol = "^ -1"
+    test_function = "mpi_mod_raw_inv_prime"
+    test_name = "mbedtls_mpi_mod_raw_inv_prime (Montgomery form only)"
+    input_style = "fixed"
+    arity = 1
+    suffix = True
+
+    @property
+    def is_valid(self) -> bool:
+        return self.int_a > 0 and self.int_a < self.int_n
+
+    @property
+    def arg_a(self) -> str:
+        # Input has to be given in Montgomery form
+        mont_a = self.to_montgomery(self.int_a)
+        return self.format_arg('{:x}'.format(mont_a))
+
+    def result(self) -> List[str]:
+        result = bignum_common.invmod(self.int_a, self.int_n)
+        if result < 0:
+            result += self.int_n
+        mont_result = self.to_montgomery(result)
+        return [self.format_result(mont_result)]
+
 # END MERGE SLOT 3
 
 # BEGIN MERGE SLOT 4
@@ -92,10 +121,9 @@
     arity = 1
 
     def result(self) -> List[str]:
-        result = (self.int_a * self.r) % self.int_n
+        result = self.to_montgomery(self.int_a)
         return [self.format_result(result)]
 
-
 class BignumModRawConvertFromMont(bignum_common.ModOperationCommon,
                                   BignumModRawTarget):
     """ Test cases for mpi_mod_raw_from_mont_rep(). """
@@ -106,10 +134,21 @@
     arity = 1
 
     def result(self) -> List[str]:
-        result = (self.int_a * self.r_inv) % self.int_n
+        result = self.from_montgomery(self.int_a)
         return [self.format_result(result)]
 
+class BignumModRawModNegate(bignum_common.ModOperationCommon,
+                            BignumModRawTarget):
+    """ Test cases for mpi_mod_raw_neg(). """
+    test_function = "mpi_mod_raw_neg"
+    test_name = "Modular negation: "
+    symbol = "-"
+    input_style = "arch_split"
+    arity = 1
 
+    def result(self) -> List[str]:
+        result = (self.int_n - self.int_a) % self.int_n
+        return [self.format_result(result)]
 # END MERGE SLOT 7
 
 # BEGIN MERGE SLOT 8
diff --git a/tests/include/test/constant_flow.h b/tests/include/test/constant_flow.h
index 9626af9..f3d676e 100644
--- a/tests/include/test/constant_flow.h
+++ b/tests/include/test/constant_flow.h
@@ -46,6 +46,12 @@
  * This file contains two implementations: one based on MemorySanitizer, the
  * other on valgrind's memcheck. If none of them is enabled, dummy macros that
  * do nothing are defined for convenience.
+ *
+ * \note #TEST_CF_SECRET must be called directly from within a .function file,
+ *       not indirectly via a macro defined under tests/include or a function
+ *       under tests/src. This is because we only run Valgrind for constant
+ *       flow on test suites that have greppable annotations inside them (see
+ *       `skip_suites_without_constant_flow` in `tests/scripts/all.sh`).
  */
 
 #if defined(MBEDTLS_TEST_CONSTANT_FLOW_MEMSAN)
diff --git a/tests/include/test/psa_crypto_helpers.h b/tests/include/test/psa_crypto_helpers.h
index bc2b016..3542950 100644
--- a/tests/include/test/psa_crypto_helpers.h
+++ b/tests/include/test/psa_crypto_helpers.h
@@ -189,7 +189,7 @@
  *
  * Do a key policy permission extension on key usage policies always involves
  * permissions of other usage policies
- * (like PSA_KEY_USAGE_SIGN_HASH involves PSA_KEY_USAGE_SIGN_MESSGAE).
+ * (like PSA_KEY_USAGE_SIGN_HASH involves PSA_KEY_USAGE_SIGN_MESSAGE).
  */
 psa_key_usage_t mbedtls_test_update_key_usage_flags( psa_key_usage_t usage_flags );
 
diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh
index d3eedcf..db46b03 100755
--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@@ -185,7 +185,8 @@
     export CTEST_OUTPUT_ON_FAILURE=1
 
     # CFLAGS and LDFLAGS for Asan builds that don't use CMake
-    ASAN_CFLAGS='-Werror -Wall -Wextra -fsanitize=address,undefined -fno-sanitize-recover=all'
+    # default to -O2, use -Ox _after_ this if you want another level
+    ASAN_CFLAGS='-O2 -Werror -fsanitize=address,undefined -fno-sanitize-recover=all'
 
     # Gather the list of available components. These are the functions
     # defined in this script whose name starts with "component_".
@@ -1591,6 +1592,17 @@
     env OPENSSL_CMD="$OPENSSL_NEXT" tests/compat.sh -e '^$' -f 'ARIA\|CHACHA'
 }
 
+skip_suites_without_constant_flow () {
+    # Skip the test suites that don't have any constant-flow annotations.
+    # This will need to be adjusted if we ever start declaring things as
+    # secret from macros or functions inside tests/include or tests/src.
+    SKIP_TEST_SUITES=$(
+        git -C tests/suites grep -L TEST_CF_ 'test_suite_*.function' |
+            sed 's/test_suite_//; s/\.function$//' |
+            tr '\n' ,)
+    export SKIP_TEST_SUITES
+}
+
 component_test_memsan_constant_flow () {
     # This tests both (1) accesses to undefined memory, and (2) branches or
     # memory access depending on secret values. To distinguish between those:
@@ -1642,12 +1654,13 @@
     scripts/config.py full
     scripts/config.py set MBEDTLS_TEST_CONSTANT_FLOW_VALGRIND
     scripts/config.py unset MBEDTLS_USE_PSA_CRYPTO
+    skip_suites_without_constant_flow
     cmake -D CMAKE_BUILD_TYPE:String=Release .
     make
 
     # this only shows a summary of the results (how many of each type)
     # details are left in Testing/<date>/DynamicAnalysis.xml
-    msg "test: main suites (full minus MBEDTLS_USE_PSA_CRYPTO, valgrind + constant flow)"
+    msg "test: some suites (full minus MBEDTLS_USE_PSA_CRYPTO, valgrind + constant flow)"
     make memcheck
 }
 
@@ -1664,12 +1677,13 @@
     msg "build: cmake release GCC, full config with constant flow testing"
     scripts/config.py full
     scripts/config.py set MBEDTLS_TEST_CONSTANT_FLOW_VALGRIND
+    skip_suites_without_constant_flow
     cmake -D CMAKE_BUILD_TYPE:String=Release .
     make
 
     # this only shows a summary of the results (how many of each type)
     # details are left in Testing/<date>/DynamicAnalysis.xml
-    msg "test: main suites (valgrind + constant flow)"
+    msg "test: some suites (valgrind + constant flow)"
     make memcheck
 }
 
@@ -2195,11 +2209,16 @@
     msg "test: MBEDTLS_PSA_CRYPTO_CONFIG with accelerated hash and USE_PSA"
     make test
 
+    # This is mostly useful so that we can later compare outcome files with
+    # the reference config in analyze_outcomes.py, to check that the
+    # dependency declarations in ssl-opt.sh and in TLS code are correct.
     msg "test: ssl-opt.sh, MBEDTLS_PSA_CRYPTO_CONFIG with accelerated hash and USE_PSA"
     tests/ssl-opt.sh
 
-    msg "test: compat.sh, MBEDTLS_PSA_CRYPTO_CONFIG without accelerated hash and USE_PSA"
-    tests/compat.sh
+    # This is to make sure all ciphersuites are exercised, but we don't need
+    # interop testing (besides, we already got some from ssl-opt.sh).
+    msg "test: compat.sh, MBEDTLS_PSA_CRYPTO_CONFIG with accelerated hash and USE_PSA"
+    tests/compat.sh -p mbedTLS -V YES
 }
 
 # This component provides reference configuration for test_psa_crypto_config_accel_hash_use_psa
@@ -3476,30 +3495,43 @@
 
 component_test_valgrind () {
     msg "build: Release (clang)"
+    # default config, in particular without MBEDTLS_USE_PSA_CRYPTO
     CC=clang cmake -D CMAKE_BUILD_TYPE:String=Release .
     make
 
-    msg "test: main suites valgrind (Release)"
+    msg "test: main suites, Valgrind (default config)"
     make memcheck
 
     # Optional parts (slow; currently broken on OS X because programs don't
     # seem to receive signals under valgrind on OS X).
+    # These optional parts don't run on the CI.
     if [ "$MEMORY" -gt 0 ]; then
-        msg "test: ssl-opt.sh --memcheck (Release)"
+        msg "test: ssl-opt.sh --memcheck (default config)"
         tests/ssl-opt.sh --memcheck
     fi
 
     if [ "$MEMORY" -gt 1 ]; then
-        msg "test: compat.sh --memcheck (Release)"
+        msg "test: compat.sh --memcheck (default config)"
         tests/compat.sh --memcheck
     fi
 
     if [ "$MEMORY" -gt 0 ]; then
-        msg "test: context-info.sh --memcheck (Release)"
+        msg "test: context-info.sh --memcheck (default config)"
         tests/context-info.sh --memcheck
     fi
 }
 
+component_test_valgrind_psa () {
+    msg "build: Release, full (clang)"
+    # full config, in particular with MBEDTLS_USE_PSA_CRYPTO
+    scripts/config.py full
+    CC=clang cmake -D CMAKE_BUILD_TYPE:String=Release .
+    make
+
+    msg "test: main suites, Valgrind (full config)"
+    make memcheck
+}
+
 support_test_cmake_out_of_source () {
     distrib_id=""
     distrib_ver=""
@@ -3636,6 +3668,26 @@
     [ "$ver_major" -eq 3 ] && [ "$ver_minor" -ge 10 ]
 }
 
+component_test_corrected_code_style () {
+    ./scripts/code_style.py --fix
+
+    msg "build: make, default config (out-of-box), corrected code style"
+    make
+
+    msg "test: main suites make, default config (out-of-box), corrected code style"
+    make test
+
+    # Clean up code-style corrections
+    git checkout -- .
+}
+
+support_test_corrected_code_style() {
+    case $(uncrustify --version) in
+        *0.75.1*) true;;
+        *) false;;
+    esac
+}
+
 component_check_python_files () {
     msg "Lint: Python scripts"
     tests/scripts/check-python-files.sh
diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py
index 396ab74..13b6c2d 100755
--- a/tests/scripts/check_names.py
+++ b/tests/scripts/check_names.py
@@ -36,7 +36,7 @@
   declared in the header files. This uses the nm command.
 - All macros, constants, and identifiers (function names, struct names, etc)
   follow the required regex pattern.
-- Typo checking: All words that begin with MBED exist as macros or constants.
+- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
 
 The script returns 0 on success, 1 on test failure, and 2 if there is a script
 error. It must be run from Mbed TLS root.
@@ -191,11 +191,12 @@
 
 class Typo(Problem): # pylint: disable=too-few-public-methods
     """
-    A problem that occurs when a word using MBED doesn't appear to be defined as
-    constants nor enum values. Created with NameCheck.check_for_typos()
+    A problem that occurs when a word using MBED or PSA doesn't
+    appear to be defined as constants nor enum values. Created with
+    NameCheck.check_for_typos()
 
     Fields:
-    * match: the Match object of the MBED name in question.
+    * match: the Match object of the MBED|PSA name in question.
     """
     def __init__(self, match):
         self.match = match
@@ -245,7 +246,7 @@
             .format(str(self.excluded_files))
         )
 
-        all_macros = {"public": [], "internal": []}
+        all_macros = {"public": [], "internal": [], "private":[]}
         all_macros["public"] = self.parse_macros([
             "include/mbedtls/*.h",
             "include/psa/*.h",
@@ -256,9 +257,14 @@
             "library/*.h",
             "tests/include/test/drivers/*.h",
         ])
+        all_macros["private"] = self.parse_macros([
+            "library/*.c",
+        ])
         enum_consts = self.parse_enum_consts([
             "include/mbedtls/*.h",
+            "include/psa/*.h",
             "library/*.h",
+            "library/*.c",
             "3rdparty/everest/include/everest/everest.h",
             "3rdparty/everest/include/everest/x25519.h"
         ])
@@ -269,7 +275,7 @@
             "3rdparty/everest/include/everest/everest.h",
             "3rdparty/everest/include/everest/x25519.h"
         ])
-        mbed_words = self.parse_mbed_words([
+        mbed_psa_words = self.parse_mbed_psa_words([
             "include/mbedtls/*.h",
             "include/psa/*.h",
             "library/*.h",
@@ -302,10 +308,11 @@
         return {
             "public_macros": actual_macros["public"],
             "internal_macros": actual_macros["internal"],
+            "private_macros": all_macros["private"],
             "enum_consts": enum_consts,
             "identifiers": identifiers,
             "symbols": symbols,
-            "mbed_words": mbed_words
+            "mbed_psa_words": mbed_psa_words
         }
 
     def is_file_excluded(self, path, exclude_wildcards):
@@ -373,25 +380,28 @@
 
         return macros
 
-    def parse_mbed_words(self, include, exclude=None):
+    def parse_mbed_psa_words(self, include, exclude=None):
         """
-        Parse all words in the file that begin with MBED, in and out of macros,
-        comments, anything.
+        Parse all words in the file that begin with MBED|PSA, in and out of
+        macros, comments, anything.
 
         Args:
         * include: A List of glob expressions to look for files through.
         * exclude: A List of glob expressions for excluding files.
 
-        Returns a List of Match objects for words beginning with MBED.
+        Returns a List of Match objects for words beginning with MBED|PSA.
         """
         # Typos of TLS are common, hence the broader check below than MBEDTLS.
-        mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
+        mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
         exclusions = re.compile(r"// *no-check-names|#error")
 
         files = self.get_files(include, exclude)
-        self.log.debug("Looking for MBED words in {} files".format(len(files)))
+        self.log.debug(
+            "Looking for MBED|PSA words in {} files"
+            .format(len(files))
+        )
 
-        mbed_words = []
+        mbed_psa_words = []
         for filename in files:
             with open(filename, "r", encoding="utf-8") as fp:
                 for line_no, line in enumerate(fp):
@@ -399,14 +409,14 @@
                         continue
 
                     for name in mbed_regex.finditer(line):
-                        mbed_words.append(Match(
+                        mbed_psa_words.append(Match(
                             filename,
                             line,
                             line_no,
                             name.span(0),
                             name.group(0)))
 
-        return mbed_words
+        return mbed_psa_words
 
     def parse_enum_consts(self, include, exclude=None):
         """
@@ -629,7 +639,7 @@
         self.log.info("Compiling...")
         symbols = []
 
-        # Back up the config and atomically compile with the full configratuion.
+        # Back up the config and atomically compile with the full configuration.
         shutil.copy(
             "include/mbedtls/mbedtls_config.h",
             "include/mbedtls/mbedtls_config.h.bak"
@@ -832,12 +842,14 @@
             for match
             in self.parse_result["public_macros"] +
             self.parse_result["internal_macros"] +
+            self.parse_result["private_macros"] +
             self.parse_result["enum_consts"]
             }
         typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
-                                    r"MBEDTLS_TEST_LIBTESTDRIVER*")
+                                    r"MBEDTLS_TEST_LIBTESTDRIVER*|"
+                                    r"PSA_CRYPTO_DRIVER_TEST")
 
-        for name_match in self.parse_result["mbed_words"]:
+        for name_match in self.parse_result["mbed_psa_words"]:
             found = name_match.name in all_caps_names
 
             # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
@@ -892,7 +904,7 @@
     parser.add_argument(
         "-q", "--quiet",
         action="store_true",
-        help="hide unnecessary text, explanations, and highlighs"
+        help="hide unnecessary text, explanations, and highlights"
     )
 
     args = parser.parse_args()
diff --git a/tests/scripts/generate_psa_tests.py b/tests/scripts/generate_psa_tests.py
index 2f09007..b271048 100755
--- a/tests/scripts/generate_psa_tests.py
+++ b/tests/scripts/generate_psa_tests.py
@@ -80,7 +80,7 @@
 # A temporary hack: at the time of writing, not all dependency symbols
 # are implemented yet. Skip test cases for which the dependency symbols are
 # not available. Once all dependency symbols are available, this hack must
-# be removed so that a bug in the dependency symbols proprely leads to a test
+# be removed so that a bug in the dependency symbols properly leads to a test
 # failure.
 def read_implemented_dependencies(filename: str) -> FrozenSet[str]:
     return frozenset(symbol
@@ -459,7 +459,7 @@
         """Prepare to generate a key.
 
         * `usage`                 : The usage flags used for the key.
-        * `without_implicit_usage`: Flag to defide to apply the usage extension
+        * `without_implicit_usage`: Flag to define to apply the usage extension
         """
         usage_flags = set(usage)
         if not without_implicit_usage:
@@ -483,7 +483,7 @@
     ) -> None:
         """Prepare to generate test data
 
-        * `description`   : used for the the test case names
+        * `description`   : used for the test case names
         * `expected_usage`: the usage flags generated as the expected usage flags
                             in the test cases. CAn differ from the usage flags
                             stored in the keys because of the usage flags extension.
diff --git a/tests/scripts/test_psa_compliance.py b/tests/scripts/test_psa_compliance.py
index 7d06db1..92db417 100755
--- a/tests/scripts/test_psa_compliance.py
+++ b/tests/scripts/test_psa_compliance.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """Run the PSA Crypto API compliance test suite.
 Clone the repo and check out the commit specified by PSA_ARCH_TEST_REPO and PSA_ARCH_TEST_REF,
-then complie and run the test suite. The clone is stored at <Mbed TLS root>/psa-arch-tests.
+then compile and run the test suite. The clone is stored at <Mbed TLS root>/psa-arch-tests.
 Known defects in either the test suite or mbedtls - identified by their test number - are ignored,
 while unexpected failures AND successes are reported as errors,
 to help keep the list of known defects as up to date as possible.
diff --git a/tests/suites/test_suite_alignment.data b/tests/suites/test_suite_alignment.data
new file mode 100644
index 0000000..8c0c21d
--- /dev/null
+++ b/tests/suites/test_suite_alignment.data
@@ -0,0 +1,119 @@
+Aligned 16-bit access
+mbedtls_unaligned_access:16:0
+
+Aligned 32-bit access
+mbedtls_unaligned_access:32:0
+
+Aligned 64-bit access
+mbedtls_unaligned_access:64:0
+
+Unaligned 16-bit access offset=1
+mbedtls_unaligned_access:16:1
+
+Unaligned 32-bit access offset=1
+mbedtls_unaligned_access:32:1
+
+Unaligned 64-bit access offset=1
+mbedtls_unaligned_access:64:1
+
+Unaligned 16-bit access offset=4
+mbedtls_unaligned_access:16:4
+
+Unaligned 32-bit access offset=4
+mbedtls_unaligned_access:32:4
+
+Unaligned 64-bit access offset=4
+mbedtls_unaligned_access:64:4
+
+Unaligned 16-bit access offset=7
+mbedtls_unaligned_access:16:7
+
+Unaligned 32-bit access offset=7
+mbedtls_unaligned_access:32:7
+
+Unaligned 64-bit access offset=7
+mbedtls_unaligned_access:64:7
+
+Unaligned 16-bit access offset=8
+mbedtls_unaligned_access:16:8
+
+Unaligned 32-bit access offset=8
+mbedtls_unaligned_access:32:8
+
+Unaligned 64-bit access offset=8
+mbedtls_unaligned_access:64:8
+
+Byteswap 16
+mbedtls_byteswap:"0100":16:"0001"
+
+Byteswap 16 with truncation
+mbedtls_byteswap:"0706050403020100":16:"0001"
+
+Byteswap 16 all-zero
+mbedtls_byteswap:"0000":16:"0000"
+
+Byteswap 16 all-ones
+mbedtls_byteswap:"ffffffffffffffff":16:"ffff"
+
+Byteswap 32
+mbedtls_byteswap:"03020100":32:"00010203"
+
+Byteswap 32 with truncation
+mbedtls_byteswap:"0706050403020100":32:"00010203"
+
+Byteswap 32 all-zero
+mbedtls_byteswap:"00000000":32:"00000000"
+
+Byteswap 32 all-ones
+mbedtls_byteswap:"ffffffffffffffff":32:"ffffffff"
+
+Byteswap 64
+mbedtls_byteswap:"0706050403020100":64:"01020304050607"
+
+Byteswap 64 all-zero
+mbedtls_byteswap:"0000000000000000":64:"0000000000000000"
+
+Byteswap 64 all-ones
+mbedtls_byteswap:"ffffffffffffffff":64:"ffffffffffffffff"
+
+Get individual bytes
+get_byte
+
+Endian-aware unaligned 16-bit BE offset=0
+unaligned_access_endian_aware:16:0:1
+
+Endian-aware unaligned 16-bit BE offset=3
+unaligned_access_endian_aware:16:3:1
+
+Endian-aware unaligned 16-bit LE offset=0
+unaligned_access_endian_aware:16:0:0
+
+Endian-aware unaligned 16-bit LE offset=3
+unaligned_access_endian_aware:16:3:0
+
+Endian-aware unaligned 32-bit BE offset=0
+unaligned_access_endian_aware:32:0:1
+
+Endian-aware unaligned 32-bit BE offset=3
+unaligned_access_endian_aware:32:3:1
+
+Endian-aware unaligned 32-bit LE offset=0
+unaligned_access_endian_aware:32:0:0
+
+Endian-aware unaligned 32-bit LE offset=3
+unaligned_access_endian_aware:32:3:0
+
+Endian-aware unaligned 64-bit BE offset=0
+unaligned_access_endian_aware:64:0:1
+
+Endian-aware unaligned 64-bit BE offset=3
+unaligned_access_endian_aware:64:3:1
+
+Endian-aware unaligned 64-bit LE offset=0
+unaligned_access_endian_aware:64:0:0
+
+Endian-aware unaligned 64-bit LE offset=3
+unaligned_access_endian_aware:64:3:0
+
+Big-endian check
+mbedtls_is_big_endian
diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function
new file mode 100644
index 0000000..06c5668
--- /dev/null
+++ b/tests/suites/test_suite_alignment.function
@@ -0,0 +1,407 @@
+/* BEGIN_HEADER */
+#include "../library/alignment.h"
+
+#include <stdint.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunreachable-code"
+#endif
+#include <stdio.h>
+
+/*
+ * Convert a string of the form "abcd" (case-insensitive) to a uint64_t.
+ */
+int parse_hex_string( char* hex_string, uint64_t *result )
+{
+    uint8_t raw[8];
+    size_t olen;
+    if ( mbedtls_test_unhexify(raw, sizeof(raw), hex_string, &olen) != 0 ) return 0;
+    *result = 0;
+    for ( size_t i = 0; i < olen; i++ )
+    {
+        if ( MBEDTLS_IS_BIG_ENDIAN ) {
+            *result |= ((uint64_t)raw[i]) << ( i * 8 );
+        }
+        else
+        {
+            *result |= ((uint64_t)raw[i]) << ( (olen - i - 1) * 8 );
+        }
+    }
+    return 1;
+}
+
+/* END_HEADER */
+
+/* BEGIN_CASE */
+void mbedtls_unaligned_access( int size, int offset )
+{
+    /* Define 64-bit aligned raw byte array */
+    uint64_t raw[2];
+
+    /* Populate with known data */
+    uint8_t *x = (uint8_t *) raw;
+    for ( size_t i = 0; i < sizeof(raw); i++ )
+        x[i] = (uint8_t)i;
+
+    TEST_ASSERT( size == 16 || size == 32 || size == 64 );
+
+    uint64_t r = 0;
+    switch ( size )
+    {
+        case 16:
+            r = mbedtls_get_unaligned_uint16( x + offset );
+            break;
+        case 32:
+            r = mbedtls_get_unaligned_uint32( x + offset );
+            break;
+        case 64:
+            r = mbedtls_get_unaligned_uint64( x + offset );
+            break;
+    }
+
+    /* Generate expected result */
+    uint64_t expected = 0;
+    for ( uint8_t i = 0; i < 8; i++ )
+    {
+        uint8_t shift;
+        if ( MBEDTLS_IS_BIG_ENDIAN )
+        {
+            /*
+            * Similar to little-endian case described below, but the shift needs
+            * to be inverted
+            */
+            shift = 7 - ( i * 8 );
+        } else {
+            /* example for offset == 1:
+            * expected = (( 1 + 0 ) << (0 * 8)) | (( 1 + 1 ) << (1 * 8)) | (( 1 + 2 ) << (2 * 8)))
+            *          = (1 << 0) | (2 << 8) | (3 << 16) ...
+            *          = 0x0807060504030201
+            * x = { 0, 1, 2, 3, ... }
+            * ie expected is the value that would be read from x on a LE system, when
+            * byte swapping is not performed
+            */
+            shift = i * 8;
+        }
+        uint64_t b = offset + i;
+        expected |= b << shift;
+    }
+
+    /* Mask out excess bits from expected result */
+    switch ( size )
+    {
+        case 16:
+            expected &= 0xffff;
+            break;
+        case 32:
+            expected &= 0xffffffff;
+            break;
+    }
+
+    TEST_EQUAL( r, expected );
+
+    /* Write sentinel to the part of the array we will testing writing to */
+    for ( size_t i = 0; i < (size_t) ( size / 8 ); i++ )
+    {
+        x[i + offset] = 0xff;
+    }
+    /*
+        * Write back to the array with mbedtls_put_unaligned_uint16 and validate
+        * that the array is unchanged as a result.
+        */
+    switch ( size )
+    {
+        case 16:
+            mbedtls_put_unaligned_uint16( x + offset, r );
+            break;
+        case 32:
+            mbedtls_put_unaligned_uint32( x + offset, r );
+            break;
+        case 64:
+            mbedtls_put_unaligned_uint64( x + offset, r );
+            break;
+    }
+    for ( size_t i = 0; i < sizeof(x); i++ )
+    {
+        TEST_EQUAL( x[i], (uint8_t)i );
+    }
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void mbedtls_byteswap( char* input_str, int size, char *expected_str )
+{
+    uint64_t input, expected;
+    TEST_ASSERT( parse_hex_string( input_str, &input ) );
+    TEST_ASSERT( parse_hex_string( expected_str, &expected ) );
+
+    /* Check against expected result */
+    uint64_t r = 0;
+    switch ( size )
+    {
+        case 16:
+            r = MBEDTLS_BSWAP16( input );
+            break;
+        case 32:
+            r = MBEDTLS_BSWAP32( input );
+            break;
+        case 64:
+            r = MBEDTLS_BSWAP64( input );
+            break;
+        default:
+            TEST_ASSERT( ! "size must be 16, 32 or 64" );
+    }
+    TEST_EQUAL( r, expected );
+
+    /*
+     * Check byte by byte by extracting bytes from opposite ends of
+     * input and r.
+     */
+    for ( size_t i = 0; i < (size_t)( size / 8 ); i++ )
+    {
+        size_t s1 = i * 8;
+        size_t s2 = ( ( size / 8 - 1 ) - i ) * 8;
+        uint64_t a = ( input & ( (uint64_t)0xff << s1 ) ) >> s1;
+        uint64_t b = ( r & ( (uint64_t)0xff << s2 ) ) >> s2;
+        TEST_EQUAL( a, b );
+    }
+
+    /* Check BSWAP(BSWAP(x)) == x */
+    switch ( size )
+    {
+        case 16:
+            r = MBEDTLS_BSWAP16( r );
+            TEST_EQUAL( r, input & 0xffff );
+            break;
+        case 32:
+            r = MBEDTLS_BSWAP32( r );
+            TEST_EQUAL( r, input & 0xffffffff );
+            break;
+        case 64:
+            r = MBEDTLS_BSWAP64( r );
+            TEST_EQUAL( r, input );
+            break;
+    }
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void get_byte()
+{
+    uint8_t data[16];
+
+    for ( size_t i = 0; i < sizeof(data); i++ )
+        data[i] = (uint8_t) i;
+
+    uint64_t u64 = 0x0706050403020100;
+    for ( size_t b = 0; b < 8 ; b++ )
+    {
+        uint8_t expected = b;
+        uint8_t actual = b + 1;
+        switch ( b )
+        {
+            case 0:
+                actual = MBEDTLS_BYTE_0( u64 );
+                break;
+            case 1:
+                actual = MBEDTLS_BYTE_1( u64 );
+                break;
+            case 2:
+                actual = MBEDTLS_BYTE_2( u64 );
+                break;
+            case 3:
+                actual = MBEDTLS_BYTE_3( u64 );
+                break;
+            case 4:
+                actual = MBEDTLS_BYTE_4( u64 );
+                break;
+            case 5:
+                actual = MBEDTLS_BYTE_5( u64 );
+                break;
+            case 6:
+                actual = MBEDTLS_BYTE_6( u64 );
+                break;
+            case 7:
+                actual = MBEDTLS_BYTE_7( u64 );
+                break;
+        }
+        TEST_EQUAL( actual, expected );
+    }
+
+    uint32_t u32 = 0x03020100;
+    for ( size_t b = 0; b < 4 ; b++ )
+    {
+        uint8_t expected = b;
+        uint8_t actual = b + 1;
+        switch ( b )
+        {
+            case 0:
+                actual = MBEDTLS_BYTE_0( u32 );
+                break;
+            case 1:
+                actual = MBEDTLS_BYTE_1( u32 );
+                break;
+            case 2:
+                actual = MBEDTLS_BYTE_2( u32 );
+                break;
+            case 3:
+                actual = MBEDTLS_BYTE_3( u32 );
+                break;
+        }
+        TEST_EQUAL( actual, expected );
+    }
+
+    uint16_t u16 = 0x0100;
+    for ( size_t b = 0; b < 2 ; b++ )
+    {
+        uint8_t expected = b;
+        uint8_t actual = b + 1;
+        switch ( b )
+        {
+            case 0:
+                actual = MBEDTLS_BYTE_0( u16 );
+                break;
+            case 1:
+                actual = MBEDTLS_BYTE_1( u16 );
+                break;
+        }
+        TEST_EQUAL( actual, expected );
+    }
+
+    uint8_t u8 = 0x01;
+    uint8_t actual = MBEDTLS_BYTE_0( u8 );
+    TEST_EQUAL( actual, u8 );
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void unaligned_access_endian_aware(int size, int offset, int big_endian )
+{
+    TEST_ASSERT( size == 16 || size == 24 || size == 32 || size == 64 );
+    TEST_ASSERT( offset >= 0 && offset < 8 );
+
+    /* Define 64-bit aligned raw byte array */
+    uint64_t raw[2];
+    /* Populate with known data: x == { 0, 1, 2, ... } */
+    uint8_t *x = (uint8_t *) raw;
+    for ( size_t i = 0; i < sizeof(raw); i++ )
+        x[i] = (uint8_t) i;
+
+    uint64_t read = 0;
+    if ( big_endian )
+    {
+        switch ( size )
+        {
+            case 16:
+                read = MBEDTLS_GET_UINT16_BE( x, offset );
+                break;
+            case 24:
+                read = MBEDTLS_GET_UINT24_BE( x, offset );
+                break;
+            case 32:
+                read = MBEDTLS_GET_UINT32_BE( x, offset );
+                break;
+            case 64:
+                read = MBEDTLS_GET_UINT64_BE( x, offset );
+                break;
+        }
+    }
+    else
+    {
+        switch ( size )
+        {
+            case 16:
+                read = MBEDTLS_GET_UINT16_LE( x, offset );
+                break;
+            case 24:
+                read = MBEDTLS_GET_UINT24_LE( x, offset );
+                break;
+            case 32:
+                read = MBEDTLS_GET_UINT32_LE( x, offset );
+                break;
+            case 64:
+                read = MBEDTLS_GET_UINT64_LE( x, offset );
+                break;
+        }
+    }
+
+    /* Build up expected value byte by byte, in either big or little endian format */
+    uint64_t expected = 0;
+    for ( size_t i = 0; i < (size_t)(size / 8); i++ )
+    {
+        uint64_t b = x[i + offset];
+        uint8_t shift = (big_endian) ? (8 * ((size / 8 - 1) - i)) : (8 * i);
+        expected |= b << shift;
+    }
+
+    /* Verify read */
+    TEST_EQUAL( read, expected );
+
+    /* Test writing back to memory. First write sentiel */
+    for ( size_t i = 0; i < (size_t)(size / 8); i++ )
+    {
+        x[i + offset] = 0xff;
+    }
+    /* Overwrite sentinel with endian-aware write macro */
+    if ( big_endian )
+    {
+        switch ( size )
+        {
+            case 16:
+                MBEDTLS_PUT_UINT16_BE( read, x, offset );
+                break;
+            case 24:
+                MBEDTLS_PUT_UINT24_BE( read, x, offset );
+                break;
+            case 32:
+                MBEDTLS_PUT_UINT32_BE( read, x, offset );
+                break;
+            case 64:
+                MBEDTLS_PUT_UINT64_BE( read, x, offset );
+                break;
+        }
+    }
+    else
+    {
+        switch ( size )
+        {
+            case 16:
+                MBEDTLS_PUT_UINT16_LE( read, x, offset );
+                break;
+                case 24:
+                MBEDTLS_PUT_UINT24_LE( read, x, offset );
+                break;
+            case 32:
+                MBEDTLS_PUT_UINT32_LE( read, x, offset );
+                break;
+            case 64:
+                MBEDTLS_PUT_UINT64_LE( read, x, offset );
+                break;
+        }
+    }
+
+    /* Verify write - check memory is correct */
+    for ( size_t i = 0; i < sizeof(raw); i++ )
+        TEST_EQUAL( x[i], (uint8_t) i );
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void mbedtls_is_big_endian()
+{
+    uint16_t check = 0x1234;
+    uint8_t* p = (uint8_t*) &check;
+
+    if ( MBEDTLS_IS_BIG_ENDIAN )
+    {
+        /* Big-endian: data stored MSB first, i.e. p == { 0x12, 0x34 } */
+        TEST_EQUAL( p[0], 0x12 );
+        TEST_EQUAL( p[1], 0x34 );
+    }
+    else
+    {
+        /* Little-endian: data stored LSB first, i.e. p == { 0x34, 0x12 } */
+        TEST_EQUAL( p[0], 0x34 );
+        TEST_EQUAL( p[1], 0x12 );
+    }
+}
+/* END_CASE */
diff --git a/tests/suites/test_suite_bignum_core.function b/tests/suites/test_suite_bignum_core.function
index 078239f..7bf03fb 100644
--- a/tests/suites/test_suite_bignum_core.function
+++ b/tests/suites/test_suite_bignum_core.function
@@ -1046,15 +1046,13 @@
                        char * input_E, char * input_X )
 {
     mbedtls_mpi_uint *A = NULL;
-    size_t A_limbs;
     mbedtls_mpi_uint *E = NULL;
-    size_t E_limbs;
     mbedtls_mpi_uint *N = NULL;
-    size_t N_limbs;
     mbedtls_mpi_uint *X = NULL;
-    size_t X_limbs;
+    size_t A_limbs, E_limbs, N_limbs, X_limbs;
     const mbedtls_mpi_uint *R2 = NULL;
     mbedtls_mpi_uint *Y = NULL;
+    mbedtls_mpi_uint *T = NULL;
     /* Legacy MPIs for computing R2 */
     mbedtls_mpi N_mpi;
     mbedtls_mpi_init( &N_mpi );
@@ -1078,11 +1076,35 @@
     TEST_EQUAL( 0, mbedtls_mpi_grow( &R2_mpi, N_limbs ) );
     R2 = R2_mpi.p;
 
-    TEST_EQUAL( 0,
-                mbedtls_mpi_core_exp_mod( Y, A, N, N_limbs, E, E_limbs, R2 ) );
+    size_t working_limbs = mbedtls_mpi_core_exp_mod_working_limbs( N_limbs,
+                                                                   E_limbs );
+
+    /* No point exactly duplicating the code in mbedtls_mpi_core_exp_mod_working_limbs()
+     * to see if the output is correct, but we can check that it's in a
+     * reasonable range.  The current calculation works out as
+     * `1 + N_limbs * (welem + 3)`, where welem is the number of elements in
+     * the window (1 << 1 up to 1 << 6).
+     */
+    size_t min_expected_working_limbs = 1 + N_limbs * 4;
+    size_t max_expected_working_limbs = 1 + N_limbs * 67;
+
+    TEST_LE_U( min_expected_working_limbs, working_limbs );
+    TEST_LE_U( working_limbs, max_expected_working_limbs );
+
+    ASSERT_ALLOC( T, working_limbs );
+
+    mbedtls_mpi_core_exp_mod( Y, A, N, N_limbs, E, E_limbs, R2, T );
+
     TEST_EQUAL( 0, memcmp( X, Y, N_limbs * sizeof( mbedtls_mpi_uint ) ) );
 
+    /* Check when output aliased to input */
+
+    mbedtls_mpi_core_exp_mod( A, A, N, N_limbs, E, E_limbs, R2, T );
+
+    TEST_EQUAL( 0, memcmp( X, A, N_limbs * sizeof( mbedtls_mpi_uint ) ) );
+
 exit:
+    mbedtls_free( T );
     mbedtls_free( A );
     mbedtls_free( E );
     mbedtls_free( N );
diff --git a/tests/suites/test_suite_bignum_mod.data b/tests/suites/test_suite_bignum_mod.data
index 2ea4a58..501d9d7 100644
--- a/tests/suites/test_suite_bignum_mod.data
+++ b/tests/suites/test_suite_bignum_mod.data
@@ -17,6 +17,27 @@
 
 # BEGIN MERGE SLOT 3
 
+mpi_mod_sub base case for negative testing (N, a, b all >= 1 limb)
+mpi_mod_sub:"014320a022ccb75bdf470ddf25":"000000025a55a46e5da99c71c7":"00033b2e3c9fd0803ce8000f93":"013fe57440828b4a0008aa4159":0
+
+mpi_mod_sub with modulus too long/both inputs too short
+mpi_mod_sub:"0000000014320a022ccb75bdf470ddf25":"000000025a55a46e5da99c71c7":"00033b2e3c9fd0803ce8000f93":"00":MBEDTLS_ERR_MPI_BAD_INPUT_DATA
+
+mpi_mod_sub with first input too long
+mpi_mod_sub:"014320a022ccb75bdf470ddf25":"0000000000000025a55a46e5da99c71c7":"00033b2e3c9fd0803ce8000f93":"00":MBEDTLS_ERR_MPI_BAD_INPUT_DATA
+
+mpi_mod_sub with second input too long
+mpi_mod_sub:"014320a022ccb75bdf470ddf25":"000000025a55a46e5da99c71c7":"000000000033b2e3c9fd0803ce8000f93":"00":MBEDTLS_ERR_MPI_BAD_INPUT_DATA
+
+mpi_mod_sub with both inputs too long
+mpi_mod_sub:"014320a022ccb75bdf470ddf25":"0000000000000025a55a46e5da99c71c7":"000000000033b2e3c9fd0803ce8000f93":"00":MBEDTLS_ERR_MPI_BAD_INPUT_DATA
+
+mpi_mod_sub with first input too short
+mpi_mod_sub:"014320a022ccb75bdf470ddf25":"a99c71c7":"00033b2e3c9fd0803ce8000f93":"00":MBEDTLS_ERR_MPI_BAD_INPUT_DATA
+
+mpi_mod_sub with second input too short
+mpi_mod_sub:"014320a022ccb75bdf470ddf25":"000000025a55a46e5da99c71c7":"e8000f93":"00":MBEDTLS_ERR_MPI_BAD_INPUT_DATA
+
 # END MERGE SLOT 3
 
 # BEGIN MERGE SLOT 4
diff --git a/tests/suites/test_suite_bignum_mod.function b/tests/suites/test_suite_bignum_mod.function
index a941cb6..0d2e232 100644
--- a/tests/suites/test_suite_bignum_mod.function
+++ b/tests/suites/test_suite_bignum_mod.function
@@ -4,6 +4,47 @@
 #include "bignum_mod.h"
 #include "constant_time_internal.h"
 #include "test/constant_flow.h"
+
+#define TEST_COMPARE_MPI_RESIDUES( a, b ) \
+            ASSERT_COMPARE( (a).p, (a).limbs * sizeof(mbedtls_mpi_uint), \
+                            (b).p, (b).limbs * sizeof(mbedtls_mpi_uint) )
+
+static int test_read_modulus( mbedtls_mpi_mod_modulus *m,
+                              mbedtls_mpi_mod_rep_selector int_rep,
+                              char *input )
+{
+    mbedtls_mpi_uint *p = NULL;
+    size_t limbs;
+
+    int ret = mbedtls_test_read_mpi_core( &p, &limbs, input );
+    if( ret != 0 )
+        return( ret );
+
+    return( mbedtls_mpi_mod_modulus_setup( m, p, limbs, int_rep ) );
+}
+
+static int test_read_residue( mbedtls_mpi_mod_residue *r,
+                              const mbedtls_mpi_mod_modulus *m,
+                              char *input,
+                              int skip_limbs_and_value_checks )
+{
+    mbedtls_mpi_uint *p = NULL;
+    size_t limbs;
+
+    int ret = mbedtls_test_read_mpi_core( &p, &limbs, input );
+    if( ret != 0 )
+        return( ret );
+
+    if( skip_limbs_and_value_checks )
+    {
+        r->p = p;
+        r->limbs = limbs;
+        return( 0 );
+    }
+
+    /* mbedtls_mpi_mod_residue_setup() checks limbs, and that value < m */
+    return( mbedtls_mpi_mod_residue_setup( r, m, p, limbs ) );
+}
 /* END_HEADER */
 
 /* BEGIN_DEPENDENCIES
@@ -64,7 +105,104 @@
 /* END MERGE SLOT 2 */
 
 /* BEGIN MERGE SLOT 3 */
+/* BEGIN_CASE */
+void mpi_mod_sub( char * input_N,
+                  char * input_A, char * input_B,
+                  char * input_D, int oret )
+{
+    mbedtls_mpi_mod_residue a = { NULL, 0 };
+    mbedtls_mpi_mod_residue b = { NULL, 0 };
+    mbedtls_mpi_mod_residue d = { NULL, 0 };
+    mbedtls_mpi_mod_residue x = { NULL, 0 };
+    mbedtls_mpi_uint *X_raw = NULL;
 
+    mbedtls_mpi_mod_modulus m;
+    mbedtls_mpi_mod_modulus_init( &m );
+
+    TEST_EQUAL( 0,
+        test_read_modulus( &m, MBEDTLS_MPI_MOD_REP_MONTGOMERY, input_N ) );
+
+    /* test_read_residue() normally checks that inputs have the same number of
+     * limbs as the modulus. For negative testing we can ask it to skip this
+     * with a non-zero final parameter. */
+    TEST_EQUAL( 0, test_read_residue( &a, &m, input_A, oret != 0 ) );
+    TEST_EQUAL( 0, test_read_residue( &b, &m, input_B, oret != 0 ) );
+    TEST_EQUAL( 0, test_read_residue( &d, &m, input_D, oret != 0 ) );
+
+    size_t limbs = m.limbs;
+    size_t bytes = limbs * sizeof( *X_raw );
+
+    /* One spare limb for negative testing */
+    ASSERT_ALLOC( X_raw, limbs + 1 );
+
+    if( oret == 0 )
+    {
+        /* Sneak in a couple of negative tests on known-good data */
+
+        /* First, negative test with too many limbs in output */
+        x.p = X_raw;
+        x.limbs = limbs + 1;
+        TEST_EQUAL( MBEDTLS_ERR_MPI_BAD_INPUT_DATA,
+                    mbedtls_mpi_mod_sub( &x, &a, &b, &m ) );
+
+        /* Then negative test with too few limbs in output */
+        if( limbs > 1 )
+        {
+            x.p = X_raw;
+            x.limbs = limbs - 1;
+            TEST_EQUAL( MBEDTLS_ERR_MPI_BAD_INPUT_DATA,
+                        mbedtls_mpi_mod_sub( &x, &a, &b, &m ) );
+        }
+
+        /* Negative testing with too many/too few limbs in a and b is covered by
+         * manually-written test cases with oret != 0. */
+
+        /* Back to the normally-scheduled programme */
+    }
+
+    TEST_EQUAL( 0, mbedtls_mpi_mod_residue_setup( &x, &m, X_raw, limbs ) );
+
+    /* a - b => Correct result, or expected error */
+    TEST_EQUAL( oret, mbedtls_mpi_mod_sub( &x, &a, &b, &m ) );
+    if( oret != 0 )
+        goto exit;
+
+    TEST_COMPARE_MPI_RESIDUES( x, d );
+
+    /* a - b: alias x to a => Correct result */
+    memcpy( x.p, a.p, bytes );
+    TEST_EQUAL( 0, mbedtls_mpi_mod_sub( &x, &x, &b, &m ) );
+    TEST_COMPARE_MPI_RESIDUES( x, d );
+
+    /* a - b: alias x to b => Correct result */
+    memcpy( x.p, b.p, bytes );
+    TEST_EQUAL( 0, mbedtls_mpi_mod_sub( &x, &a, &x, &m ) );
+    TEST_COMPARE_MPI_RESIDUES( x, d );
+
+    if ( memcmp( a.p, b.p, bytes ) == 0 )
+    {
+        /* a == b: alias a and b */
+
+        /* a - a => Correct result */
+        TEST_EQUAL( 0, mbedtls_mpi_mod_sub( &x, &a, &a, &m ) );
+        TEST_COMPARE_MPI_RESIDUES( x, d );
+
+        /* a - a: x, a, b all aliased together => Correct result */
+        memcpy( x.p, a.p, bytes );
+        TEST_EQUAL( 0, mbedtls_mpi_mod_sub( &x, &x, &x, &m ) );
+        TEST_COMPARE_MPI_RESIDUES( x, d );
+    }
+
+exit:
+    mbedtls_free( (void *)m.p ); /* mbedtls_mpi_mod_modulus_free() sets m.p = NULL */
+    mbedtls_mpi_mod_modulus_free( &m );
+
+    mbedtls_free( a.p );
+    mbedtls_free( b.p );
+    mbedtls_free( d.p );
+    mbedtls_free( X_raw );
+}
+/* END_CASE */
 /* END MERGE SLOT 3 */
 
 /* BEGIN MERGE SLOT 4 */
diff --git a/tests/suites/test_suite_bignum_mod_raw.function b/tests/suites/test_suite_bignum_mod_raw.function
index c7decf0..83e1f54 100644
--- a/tests/suites/test_suite_bignum_mod_raw.function
+++ b/tests/suites/test_suite_bignum_mod_raw.function
@@ -349,6 +349,75 @@
 
 /* BEGIN MERGE SLOT 3 */
 
+/* BEGIN_CASE */
+void mpi_mod_raw_inv_prime( char * input_N, char * input_A, char * input_X )
+{
+    mbedtls_mpi_uint *A = NULL;
+    mbedtls_mpi_uint *N = NULL;
+    mbedtls_mpi_uint *X = NULL;
+    size_t A_limbs, N_limbs, X_limbs;
+    mbedtls_mpi_uint *Y = NULL;
+    mbedtls_mpi_uint *T = NULL;
+    const mbedtls_mpi_uint *R2 = NULL;
+
+    /* Legacy MPIs for computing R2 */
+    mbedtls_mpi N_mpi;  /* gets set up manually, aliasing N, so no need to free */
+    mbedtls_mpi R2_mpi;
+    mbedtls_mpi_init( &R2_mpi );
+
+    TEST_EQUAL( 0, mbedtls_test_read_mpi_core( &A, &A_limbs, input_A ) );
+    TEST_EQUAL( 0, mbedtls_test_read_mpi_core( &N, &N_limbs, input_N ) );
+    TEST_EQUAL( 0, mbedtls_test_read_mpi_core( &X, &X_limbs, input_X ) );
+    ASSERT_ALLOC( Y, N_limbs );
+
+    TEST_EQUAL( A_limbs, N_limbs );
+    TEST_EQUAL( X_limbs, N_limbs );
+
+    N_mpi.s = 1;
+    N_mpi.p = N;
+    N_mpi.n = N_limbs;
+    TEST_EQUAL( 0, mbedtls_mpi_core_get_mont_r2_unsafe( &R2_mpi, &N_mpi ) );
+    TEST_EQUAL( 0, mbedtls_mpi_grow( &R2_mpi, N_limbs ) );
+    R2 = R2_mpi.p;
+
+    size_t working_limbs = mbedtls_mpi_mod_raw_inv_prime_working_limbs( N_limbs );
+
+    /* No point exactly duplicating the code in mbedtls_mpi_mod_raw_inv_prime_working_limbs()
+     * to see if the output is correct, but we can check that it's in a
+     * reasonable range.  The current calculation works out as
+     * `1 + N_limbs * (welem + 4)`, where welem is the number of elements in
+     * the window (1 << 1 up to 1 << 6).
+     */
+    size_t min_expected_working_limbs = 1 + N_limbs * 5;
+    size_t max_expected_working_limbs = 1 + N_limbs * 68;
+
+    TEST_LE_U( min_expected_working_limbs, working_limbs );
+    TEST_LE_U( working_limbs, max_expected_working_limbs );
+
+    ASSERT_ALLOC( T, working_limbs );
+
+    mbedtls_mpi_mod_raw_inv_prime( Y, A, N, N_limbs, R2, T );
+
+    TEST_EQUAL( 0, memcmp( X, Y, N_limbs * sizeof( mbedtls_mpi_uint ) ) );
+
+    /* Check when output aliased to input */
+
+    mbedtls_mpi_mod_raw_inv_prime( A, A, N, N_limbs, R2, T );
+
+    TEST_EQUAL( 0, memcmp( X, A, N_limbs * sizeof( mbedtls_mpi_uint ) ) );
+
+exit:
+    mbedtls_free( T );
+    mbedtls_free( A );
+    mbedtls_free( N );
+    mbedtls_free( X );
+    mbedtls_free( Y );
+    mbedtls_mpi_free( &R2_mpi );
+    // R2 doesn't need to be freed as it is only aliasing R2_mpi
+    // N_mpi doesn't need to be freed as it is only aliasing N
+}
+/* END_CASE */
+
 /* END MERGE SLOT 3 */
 
 /* BEGIN MERGE SLOT 4 */
@@ -526,6 +595,60 @@
     mbedtls_free( X );
 }
 /* END_CASE */
+
+/* BEGIN_CASE */
+void mpi_mod_raw_neg( char * input_N, char * input_A, char * input_X )
+{
+    mbedtls_mpi_uint *N = NULL;
+    mbedtls_mpi_uint *A = NULL;
+    mbedtls_mpi_uint *X = NULL;
+    mbedtls_mpi_uint *R = NULL;
+    mbedtls_mpi_uint *Z = NULL;
+    size_t n_limbs, a_limbs, x_limbs, bytes;
+
+    mbedtls_mpi_mod_modulus m;
+    mbedtls_mpi_mod_modulus_init( &m );
+
+    /* Read inputs */
+    TEST_EQUAL( 0, mbedtls_test_read_mpi_core( &N, &n_limbs, input_N ) );
+    TEST_EQUAL( 0, mbedtls_test_read_mpi_core( &A, &a_limbs, input_A ) );
+    TEST_EQUAL( 0, mbedtls_test_read_mpi_core( &X, &x_limbs, input_X ) );
+
+    TEST_EQUAL( a_limbs, n_limbs );
+    TEST_EQUAL( x_limbs, n_limbs );
+    bytes = n_limbs * sizeof( mbedtls_mpi_uint );
+
+    ASSERT_ALLOC( R, n_limbs );
+    ASSERT_ALLOC( Z, n_limbs );
+
+    TEST_EQUAL( 0, mbedtls_mpi_mod_modulus_setup( &m, N, n_limbs,
+            MBEDTLS_MPI_MOD_REP_MONTGOMERY ) );
+
+    /* Neg( A == 0 ) => Zero result */
+    mbedtls_mpi_mod_raw_neg( R, Z, &m );
+    ASSERT_COMPARE( R, bytes, Z, bytes );
+
+    /* Neg( A == N ) => Zero result */
+    mbedtls_mpi_mod_raw_neg( R, N, &m );
+    ASSERT_COMPARE( R, bytes, Z, bytes );
+
+    /* Neg( A ) => Correct result */
+    mbedtls_mpi_mod_raw_neg( R, A, &m );
+    ASSERT_COMPARE( R, bytes, X, bytes );
+
+    /* Neg( A ): alias A to R => Correct result */
+    mbedtls_mpi_mod_raw_neg( A, A, &m );
+    ASSERT_COMPARE( A, bytes, X, bytes );
+exit:
+    mbedtls_mpi_mod_modulus_free( &m );
+    mbedtls_free( N );
+    mbedtls_free( A );
+    mbedtls_free( X );
+    mbedtls_free( R );
+    mbedtls_free( Z );
+}
+/* END_CASE */
+
 /* END MERGE SLOT 7 */
 
 /* BEGIN MERGE SLOT 8 */
diff --git a/tests/suites/test_suite_common.data b/tests/suites/test_suite_common.data
new file mode 100644
index 0000000..500852d
--- /dev/null
+++ b/tests/suites/test_suite_common.data
@@ -0,0 +1,20 @@
+Block xor, length 0
+mbedtls_xor:0
+
+Block xor, length 1
+mbedtls_xor:1
+
+Block xor, length 3
+mbedtls_xor:3
+
+Block xor, length 4
+mbedtls_xor:4
+
+Block xor, length 7
+mbedtls_xor:7
+
+Block xor, length 8
+mbedtls_xor:8
+
+Block xor, length 16
+mbedtls_xor:16
diff --git a/tests/suites/test_suite_common.function b/tests/suites/test_suite_common.function
new file mode 100644
index 0000000..4444a52
--- /dev/null
+++ b/tests/suites/test_suite_common.function
@@ -0,0 +1,90 @@
+/* BEGIN_HEADER */
+#include "../library/common.h"
+
+void fill_arrays( unsigned char *a, unsigned char *b, unsigned char *r1, unsigned char *r2, size_t n )
+{
+    for ( size_t i = 0; i < n; i++ )
+    {
+        a[i]  = (unsigned char) i * 3;
+        b[i]  = (unsigned char) i * 3 + 1;
+        r1[i] = (unsigned char) i * 3 + 2;
+        r2[i] = r1[i];
+    }
+}
+/* END_HEADER */
+
+/* BEGIN_CASE */
+void mbedtls_xor( int len )
+{
+    size_t n = (size_t) len;
+    unsigned char *a = NULL, *b = NULL, *r1 = NULL, *r2 = NULL;
+    ASSERT_ALLOC( a, n + 1 );
+    ASSERT_ALLOC( b, n + 1 );
+    ASSERT_ALLOC( r1, n + 1 );
+    ASSERT_ALLOC( r2, n + 1 );
+
+    /* Test non-overlapping */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = a[i] ^ b[i];
+    }
+    mbedtls_xor( r2, a, b, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test r == a */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = r1[i] ^ b[i];
+    }
+    mbedtls_xor( r2, r2, b, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test r == b */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = a[i] ^ r1[i];
+    }
+    mbedtls_xor( r2, a, r2, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test a == b */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = a[i] ^ a[i];
+    }
+    mbedtls_xor( r2, a, a, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test a == b == r */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = r1[i] ^ r1[i];
+    }
+    mbedtls_xor( r2, r2, r2, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test non-word-aligned buffers, for all combinations of alignedness */
+    for ( int i = 0; i < 7; i++ )
+    {
+        int r_off = i & 1, a_off = (i & 2) >> 1, b_off = (i & 4) >> 2;
+        fill_arrays( a, b, r1, r2, n + 1 );
+
+        for ( size_t j = 0; j < n; j++ )
+        {
+            r1[j + r_off] = a[j + a_off] ^ b[j + b_off];
+        }
+        mbedtls_xor( r2 + r_off, a + a_off, b + b_off, n );
+        ASSERT_COMPARE( r1 + r_off, n, r2 + r_off, n );
+    }
+exit:
+    mbedtls_free( a );
+    mbedtls_free( b );
+    mbedtls_free( r1 );
+    mbedtls_free( r2 );
+}
+/* END_CASE */
diff --git a/tests/suites/test_suite_constant_time.data b/tests/suites/test_suite_constant_time.data
new file mode 100644
index 0000000..4504aa4
--- /dev/null
+++ b/tests/suites/test_suite_constant_time.data
@@ -0,0 +1,11 @@
+# these are the numbers we'd get with an empty plaintext and truncated HMAC
+Constant-flow memcpy from offset: small
+ssl_cf_memcpy_offset:0:5:10
+
+# we could get this with 255-bytes plaintext and untruncated SHA-256
+Constant-flow memcpy from offset: medium
+ssl_cf_memcpy_offset:0:255:32
+
+# we could get this with 255-bytes plaintext and untruncated SHA-384
+Constant-flow memcpy from offset: large
+ssl_cf_memcpy_offset:100:339:48
diff --git a/tests/suites/test_suite_constant_time.function b/tests/suites/test_suite_constant_time.function
new file mode 100644
index 0000000..a3673b7
--- /dev/null
+++ b/tests/suites/test_suite_constant_time.function
@@ -0,0 +1,49 @@
+/* BEGIN_HEADER */
+/** \file test_suite_constant_time.function
+ *
+ * Functional testing of functions in the constant_time module.
+ *
+ * The tests are instrumented with #TEST_CF_SECRET and #TEST_CF_PUBLIC
+ * (see tests/include/test/constant_flow.h) so that running the tests
+ * under MSan or Valgrind will detect a non-constant-time implementation.
+ */
+
+#include <mbedtls/constant_time.h>
+#include <constant_time_internal.h>
+#include <constant_time_invasive.h>
+
+#include <test/constant_flow.h>
+/* END_HEADER */
+
+/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC:MBEDTLS_TEST_HOOKS */
+void ssl_cf_memcpy_offset( int offset_min, int offset_max, int len )
+{
+    unsigned char *dst = NULL;
+    unsigned char *src = NULL;
+    size_t src_len = offset_max + len;
+    size_t secret;
+
+    ASSERT_ALLOC( dst, len );
+    ASSERT_ALLOC( src, src_len );
+
+    /* Fill src in a way that we can detect if we copied the right bytes */
+    mbedtls_test_rnd_std_rand( NULL, src, src_len );
+
+    for( secret = offset_min; secret <= (size_t) offset_max; secret++ )
+    {
+        mbedtls_test_set_step( (int) secret );
+
+        TEST_CF_SECRET( &secret, sizeof( secret ) );
+        mbedtls_ct_memcpy_offset( dst, src, secret,
+                                  offset_min, offset_max, len );
+        TEST_CF_PUBLIC( &secret, sizeof( secret ) );
+        TEST_CF_PUBLIC( dst, len );
+
+        ASSERT_COMPARE( dst, len, src + secret, len );
+    }
+
+exit:
+    mbedtls_free( dst );
+    mbedtls_free( src );
+}
+/* END_CASE */
diff --git a/tests/suites/test_suite_constant_time_hmac.data b/tests/suites/test_suite_constant_time_hmac.data
new file mode 100644
index 0000000..abf90f0
--- /dev/null
+++ b/tests/suites/test_suite_constant_time_hmac.data
@@ -0,0 +1,15 @@
+Constant-flow HMAC: MD5
+depends_on:MBEDTLS_HAS_ALG_MD5_VIA_MD_OR_PSA_BASED_ON_USE_PSA
+ssl_cf_hmac:MBEDTLS_MD_MD5
+
+Constant-flow HMAC: SHA1
+depends_on:MBEDTLS_HAS_ALG_SHA_1_VIA_MD_OR_PSA_BASED_ON_USE_PSA
+ssl_cf_hmac:MBEDTLS_MD_SHA1
+
+Constant-flow HMAC: SHA256
+depends_on:MBEDTLS_HAS_ALG_SHA_256_VIA_MD_OR_PSA_BASED_ON_USE_PSA
+ssl_cf_hmac:MBEDTLS_MD_SHA256
+
+Constant-flow HMAC: SHA384
+depends_on:MBEDTLS_HAS_ALG_SHA_384_VIA_MD_OR_PSA_BASED_ON_USE_PSA
+ssl_cf_hmac:MBEDTLS_MD_SHA384
diff --git a/tests/suites/test_suite_constant_time_hmac.function b/tests/suites/test_suite_constant_time_hmac.function
new file mode 100644
index 0000000..f8c1bfc
--- /dev/null
+++ b/tests/suites/test_suite_constant_time_hmac.function
@@ -0,0 +1,160 @@
+/* BEGIN_HEADER */
+
+#include <mbedtls/constant_time.h>
+#include <mbedtls/legacy_or_psa.h>
+#include <mbedtls/md.h>
+#include <constant_time_internal.h>
+#include <hash_info.h>
+
+#include <test/constant_flow.h>
+/* END_HEADER */
+
+/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC:MBEDTLS_TEST_HOOKS */
+void ssl_cf_hmac( int hash )
+{
+    /*
+     * Test the function mbedtls_ct_hmac() against a reference
+     * implementation.
+     */
+#if defined(MBEDTLS_USE_PSA_CRYPTO)
+    mbedtls_svc_key_id_t key = MBEDTLS_SVC_KEY_ID_INIT;
+    psa_key_attributes_t attributes = PSA_KEY_ATTRIBUTES_INIT;
+    psa_algorithm_t alg;
+    psa_mac_operation_t operation = PSA_MAC_OPERATION_INIT;
+#else
+    mbedtls_md_context_t ctx, ref_ctx;
+    const mbedtls_md_info_t *md_info;
+#endif /* MBEDTLS_USE_PSA_CRYPTO */
+    size_t out_len, block_size;
+    size_t min_in_len, in_len, max_in_len, i;
+    /* TLS additional data is 13 bytes (hence the "lucky 13" name) */
+    unsigned char add_data[13];
+    unsigned char ref_out[MBEDTLS_HASH_MAX_SIZE];
+    unsigned char *data = NULL;
+    unsigned char *out = NULL;
+    unsigned char rec_num = 0;
+
+    USE_PSA_INIT( );
+
+#if defined(MBEDTLS_USE_PSA_CRYPTO)
+    alg = PSA_ALG_HMAC( mbedtls_hash_info_psa_from_md( hash ) );
+
+    out_len = PSA_HASH_LENGTH( alg );
+    block_size = PSA_HASH_BLOCK_LENGTH( alg );
+
+    /* mbedtls_ct_hmac() requires the key to be exportable */
+    psa_set_key_usage_flags( &attributes, PSA_KEY_USAGE_EXPORT |
+                                          PSA_KEY_USAGE_VERIFY_HASH );
+    psa_set_key_algorithm( &attributes, PSA_ALG_HMAC( alg ) );
+    psa_set_key_type( &attributes, PSA_KEY_TYPE_HMAC );
+#else
+    mbedtls_md_init( &ctx );
+    mbedtls_md_init( &ref_ctx );
+
+    md_info = mbedtls_md_info_from_type( hash );
+    TEST_ASSERT( md_info != NULL );
+    out_len = mbedtls_md_get_size( md_info );
+    TEST_ASSERT( out_len != 0 );
+    block_size = hash == MBEDTLS_MD_SHA384 ? 128 : 64;
+#endif /* MBEDTLS_USE_PSA_CRYPTO */
+
+    /* Use allocated out buffer to catch overwrites */
+    ASSERT_ALLOC( out, out_len );
+
+#if defined(MBEDTLS_USE_PSA_CRYPTO)
+    /* Set up dummy key */
+    memset( ref_out, 42, sizeof( ref_out ) );
+    TEST_EQUAL( PSA_SUCCESS, psa_import_key( &attributes,
+                                             ref_out, out_len,
+                                             &key ) );
+#else
+    /* Set up contexts with the given hash and a dummy key */
+    TEST_EQUAL( 0, mbedtls_md_setup( &ctx, md_info, 1 ) );
+    TEST_EQUAL( 0, mbedtls_md_setup( &ref_ctx, md_info, 1 ) );
+    memset( ref_out, 42, sizeof( ref_out ) );
+    TEST_EQUAL( 0, mbedtls_md_hmac_starts( &ctx, ref_out, out_len ) );
+    TEST_EQUAL( 0, mbedtls_md_hmac_starts( &ref_ctx, ref_out, out_len ) );
+    memset( ref_out, 0, sizeof( ref_out ) );
+#endif
+
+    /*
+     * Test all possible lengths up to a point. The difference between
+     * max_in_len and min_in_len is at most 255, and make sure they both vary
+     * by at least one block size.
+     */
+    for( max_in_len = 0; max_in_len <= 255 + block_size; max_in_len++ )
+    {
+        mbedtls_test_set_step( max_in_len * 10000 );
+
+        /* Use allocated in buffer to catch overreads */
+        ASSERT_ALLOC( data, max_in_len );
+
+        min_in_len = max_in_len > 255 ? max_in_len - 255 : 0;
+        for( in_len = min_in_len; in_len <= max_in_len; in_len++ )
+        {
+            mbedtls_test_set_step( max_in_len * 10000 + in_len );
+
+            /* Set up dummy data and add_data */
+            rec_num++;
+            memset( add_data, rec_num, sizeof( add_data ) );
+            for( i = 0; i < in_len; i++ )
+                data[i] = ( i & 0xff ) ^ rec_num;
+
+            /* Get the function's result */
+            TEST_CF_SECRET( &in_len, sizeof( in_len ) );
+#if defined(MBEDTLS_USE_PSA_CRYPTO)
+            TEST_EQUAL( 0, mbedtls_ct_hmac( key, PSA_ALG_HMAC( alg ),
+                                            add_data, sizeof( add_data ),
+                                            data, in_len,
+                                            min_in_len, max_in_len,
+                                            out ) );
+#else
+            TEST_EQUAL( 0, mbedtls_ct_hmac( &ctx, add_data, sizeof( add_data ),
+                                            data, in_len,
+                                            min_in_len, max_in_len,
+                                            out ) );
+#endif /* MBEDTLS_USE_PSA_CRYPTO */
+            TEST_CF_PUBLIC( &in_len, sizeof( in_len ) );
+            TEST_CF_PUBLIC( out, out_len );
+
+#if defined(MBEDTLS_USE_PSA_CRYPTO)
+            TEST_EQUAL( PSA_SUCCESS, psa_mac_verify_setup( &operation,
+                                                           key, alg ) );
+            TEST_EQUAL( PSA_SUCCESS, psa_mac_update( &operation, add_data,
+                                                     sizeof( add_data ) ) );
+            TEST_EQUAL( PSA_SUCCESS, psa_mac_update( &operation,
+                                                     data, in_len ) );
+            TEST_EQUAL( PSA_SUCCESS, psa_mac_verify_finish( &operation,
+                                                            out, out_len ) );
+#else
+            /* Compute the reference result */
+            TEST_EQUAL( 0, mbedtls_md_hmac_update( &ref_ctx, add_data,
+                                                   sizeof( add_data ) ) );
+            TEST_EQUAL( 0, mbedtls_md_hmac_update( &ref_ctx, data, in_len ) );
+            TEST_EQUAL( 0, mbedtls_md_hmac_finish( &ref_ctx, ref_out ) );
+            TEST_EQUAL( 0, mbedtls_md_hmac_reset( &ref_ctx ) );
+
+            /* Compare */
+            ASSERT_COMPARE( out, out_len, ref_out, out_len );
+#endif /* MBEDTLS_USE_PSA_CRYPTO */
+        }
+
+        mbedtls_free( data );
+        data = NULL;
+    }
+
+exit:
+#if defined(MBEDTLS_USE_PSA_CRYPTO)
+    psa_mac_abort( &operation );
+    psa_destroy_key( key );
+#else
+    mbedtls_md_free( &ref_ctx );
+    mbedtls_md_free( &ctx );
+#endif /* MBEDTLS_USE_PSA_CRYPTO */
+
+    mbedtls_free( data );
+    mbedtls_free( out );
+
+    USE_PSA_DONE( );
+}
+/* END_CASE */
diff --git a/tests/suites/test_suite_mps.function b/tests/suites/test_suite_mps.function
index c40c50e..ec1122a 100644
--- a/tests/suites/test_suite_mps.function
+++ b/tests/suites/test_suite_mps.function
@@ -295,7 +295,7 @@
     /* This test exercises the behaviour of the MPS reader with accumulator
      * in the situation where upon calling mbedtls_mps_reader_reclaim(), the
      * uncommitted data together with the excess data missing in the last
-     * call to medtls_mps_reader_get() exceeds the bounds of the type
+     * call to mbedtls_mps_reader_get() exceeds the bounds of the type
      * holding the buffer length.
      */
 
diff --git a/tests/suites/test_suite_psa_crypto.data b/tests/suites/test_suite_psa_crypto.data
index 946234c..9ced77c 100644
--- a/tests/suites/test_suite_psa_crypto.data
+++ b/tests/suites/test_suite_psa_crypto.data
@@ -4138,7 +4138,7 @@
 sign_hash_deterministic:PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1):"ab45435712649cb30bbddac49197eebf2740ffc7f874d9244c3460f54f322d3a":PSA_ALG_DETERMINISTIC_ECDSA( PSA_ALG_SHA_256 ):"9ac4335b469bbd791439248504dd0d49c71349a295fee5a1c68507f45a9e1c7b":"6a3399f69421ffe1490377adf2ea1f117d81a63cf5bf22e918d51175eb259151ce95d7c26cc04e25503e2f7a1ec3573e3c2412534bb4a19b3a7811742f49f50f"
 
 PSA sign hash: deterministic ECDSA SECP256R1 SHA-384
-depends_on:PSA_WANT_ALG_DETERMINISTIC_ECDSA:PSA_WANT_ALG_SHA_384:PSA_WANT_KEY_TYPE_ECC_KEY_PAIR:MBEDTLS_PK_PARSE_C:PSA_WANT_ECC_SECP_R1_256:MBEDLTS_PSA_BUILTIN_ALG_SHA_384
+depends_on:PSA_WANT_ALG_DETERMINISTIC_ECDSA:PSA_WANT_ALG_SHA_384:PSA_WANT_KEY_TYPE_ECC_KEY_PAIR:MBEDTLS_PK_PARSE_C:PSA_WANT_ECC_SECP_R1_256:MBEDTLS_PSA_BUILTIN_ALG_SHA_384
 sign_hash_deterministic:PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1):"49c9a8c18c4b885638c431cf1df1c994131609b580d4fd43a0cab17db2f13eee":PSA_ALG_DETERMINISTIC_ECDSA( PSA_ALG_SHA_384 ):"59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f":"cd40ba1b555ca5994d30ddffc4ad734b1f5c604675b0f249814aa5de3992ef3ddf4d5dc5d2aab1979ce210b560754df671363d99795475882894c048e3b986ca"
 
 PSA sign hash: deterministic ECDSA SECP384R1 SHA-256
@@ -4406,7 +4406,7 @@
 sign_message_deterministic:PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1):"ab45435712649cb30bbddac49197eebf2740ffc7f874d9244c3460f54f322d3a":PSA_ALG_DETERMINISTIC_ECDSA(PSA_ALG_SHA_256):"616263":"36e5b5a7da1c9c265dc447de3a5a704fcb8c03f7a3749dde48d84c9bf736fc1ed48d8b3660e7d3cbc6b1870730b7ce2a043f69e37ccb340b98d1e65184e03548"
 
 PSA sign message: deterministic ECDSA SECP256R1 SHA-384
-depends_on:PSA_WANT_ALG_DETERMINISTIC_ECDSA:PSA_WANT_ALG_SHA_384:PSA_WANT_KEY_TYPE_ECC_KEY_PAIR:MBEDTLS_PK_PARSE_C:PSA_WANT_ECC_SECP_R1_256:MBEDLTS_PSA_BUILTIN_ALG_SHA_384
+depends_on:PSA_WANT_ALG_DETERMINISTIC_ECDSA:PSA_WANT_ALG_SHA_384:PSA_WANT_KEY_TYPE_ECC_KEY_PAIR:MBEDTLS_PK_PARSE_C:PSA_WANT_ECC_SECP_R1_256:MBEDTLS_PSA_BUILTIN_ALG_SHA_384
 sign_message_deterministic:PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1):"49c9a8c18c4b885638c431cf1df1c994131609b580d4fd43a0cab17db2f13eee":PSA_ALG_DETERMINISTIC_ECDSA(PSA_ALG_SHA_384):"616263":"7ea712a20e3a8cbe0c6e64195362ba7635bbe78af51ddedd7a5fd858395250c592654c35d3b0614ae0e3b329c25cf5b4a5fcb243af3e3ad15c8446fe401be066"
 
 PSA sign message: deterministic ECDSA SECP384R1 SHA-256
@@ -4526,7 +4526,7 @@
 sign_verify_message:PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1):"ab45435712649cb30bbddac49197eebf2740ffc7f874d9244c3460f54f322d3a":PSA_ALG_ECDSA(PSA_ALG_SHA_384):"616263"
 
 PSA sign/verify message: deterministic ECDSA SECP256R1 SHA-384
-depends_on:PSA_WANT_ALG_DETERMINISTIC_ECDSA:PSA_WANT_ALG_SHA_384:PSA_WANT_KEY_TYPE_ECC_KEY_PAIR:MBEDTLS_PK_PARSE_C:PSA_WANT_ECC_SECP_R1_256:MBEDLTS_PSA_BUILTIN_ALG_SHA_384
+depends_on:PSA_WANT_ALG_DETERMINISTIC_ECDSA:PSA_WANT_ALG_SHA_384:PSA_WANT_KEY_TYPE_ECC_KEY_PAIR:MBEDTLS_PK_PARSE_C:PSA_WANT_ECC_SECP_R1_256:MBEDTLS_PSA_BUILTIN_ALG_SHA_384
 sign_verify_message:PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1):"ab45435712649cb30bbddac49197eebf2740ffc7f874d9244c3460f54f322d3a":PSA_ALG_DETERMINISTIC_ECDSA(PSA_ALG_SHA_384):"616263"
 
 PSA sign/verify message: randomized ECDSA SECP384R1 SHA-256
diff --git a/tests/suites/test_suite_psa_crypto.function b/tests/suites/test_suite_psa_crypto.function
index 0c0f2ed..dbbac76 100644
--- a/tests/suites/test_suite_psa_crypto.function
+++ b/tests/suites/test_suite_psa_crypto.function
@@ -622,7 +622,7 @@
  *                                  the data in to be encrypted / decrypted. If
  *                                  -1, no chunking
  * \param expected_output           Expected output
- * \param is_verify                 If non-zero this is an verify operation.
+ * \param is_verify                 If non-zero this is a verify operation.
  * \param do_zero_parts             If non-zero, interleave zero length chunks
  *                                  with normal length chunks.
  * \return int                      Zero on failure, non-zero on success.
@@ -6131,7 +6131,7 @@
 
     psa_aead_abort( &operation );
 
-    /* Test for calling set lengths with an plaintext length of SIZE_MAX, after setting nonce */
+    /* Test for calling set lengths with a plaintext length of SIZE_MAX, after setting nonce */
     PSA_ASSERT( psa_aead_encrypt_setup( &operation, key, alg ) );
 
     PSA_ASSERT( psa_aead_set_nonce( &operation, nonce->x, nonce->len ) );
diff --git a/tests/suites/test_suite_psa_crypto_driver_wrappers.function b/tests/suites/test_suite_psa_crypto_driver_wrappers.function
index b895796..b713cb2 100644
--- a/tests/suites/test_suite_psa_crypto_driver_wrappers.function
+++ b/tests/suites/test_suite_psa_crypto_driver_wrappers.function
@@ -748,14 +748,14 @@
     mbedtls_psa_cipher_operation_t mbedtls_operation =
             MBEDTLS_PSA_CIPHER_OPERATION_INIT;
 
-    mbedtls_transparent_test_driver_cipher_operation_t tranparent_operation =
+    mbedtls_transparent_test_driver_cipher_operation_t transparent_operation =
             MBEDTLS_TRANSPARENT_TEST_DRIVER_CIPHER_OPERATION_INIT;
 
     mbedtls_opaque_test_driver_cipher_operation_t opaque_operation =
             MBEDTLS_OPAQUE_TEST_DRIVER_CIPHER_OPERATION_INIT;
 
     operation.ctx.mbedtls_ctx = mbedtls_operation;
-    operation.ctx.transparent_test_driver_ctx = tranparent_operation;
+    operation.ctx.transparent_test_driver_ctx = transparent_operation;
     operation.ctx.opaque_test_driver_ctx = opaque_operation;
 
     PSA_ASSERT( psa_crypto_init( ) );
@@ -880,14 +880,14 @@
     mbedtls_psa_cipher_operation_t mbedtls_operation =
             MBEDTLS_PSA_CIPHER_OPERATION_INIT;
 
-    mbedtls_transparent_test_driver_cipher_operation_t tranparent_operation =
+    mbedtls_transparent_test_driver_cipher_operation_t transparent_operation =
             MBEDTLS_TRANSPARENT_TEST_DRIVER_CIPHER_OPERATION_INIT;
 
     mbedtls_opaque_test_driver_cipher_operation_t opaque_operation =
             MBEDTLS_OPAQUE_TEST_DRIVER_CIPHER_OPERATION_INIT;
 
     operation.ctx.mbedtls_ctx = mbedtls_operation;
-    operation.ctx.transparent_test_driver_ctx = tranparent_operation;
+    operation.ctx.transparent_test_driver_ctx = transparent_operation;
     operation.ctx.opaque_test_driver_ctx = opaque_operation;
 
     PSA_ASSERT( psa_crypto_init( ) );
diff --git a/tests/suites/test_suite_psa_crypto_generate_key.function b/tests/suites/test_suite_psa_crypto_generate_key.function
index dbe9a0e..6dc6043 100644
--- a/tests/suites/test_suite_psa_crypto_generate_key.function
+++ b/tests/suites/test_suite_psa_crypto_generate_key.function
@@ -18,7 +18,7 @@
     psa_key_attributes_t attributes = PSA_KEY_ATTRIBUTES_INIT;
     mbedtls_svc_key_id_t key_id = INVALID_KEY_ID;
 
-    // key lifetiem, usage flags, algorithm are irrelevant for this test
+    // key lifetime, usage flags, algorithm are irrelevant for this test
     psa_key_type_t key_type = key_type_arg;
     size_t bits = bits_arg;
     psa_status_t expected_status = expected_status_arg;
diff --git a/tests/suites/test_suite_psa_crypto_persistent_key.function b/tests/suites/test_suite_psa_crypto_persistent_key.function
index 08db34a..bb87923 100644
--- a/tests/suites/test_suite_psa_crypto_persistent_key.function
+++ b/tests/suites/test_suite_psa_crypto_persistent_key.function
@@ -2,7 +2,7 @@
 
 /* The tests in this module verify the contents of key store files. They
  * access internal key storage functions directly. Some of the tests depend
- * on the the storage format. On the other hand, these tests treat the storage
+ * on the storage format. On the other hand, these tests treat the storage
  * subsystem as a black box, and in particular have no reliance on the
  * internals of the ITS implementation.
  *
diff --git a/tests/suites/test_suite_ssl.data b/tests/suites/test_suite_ssl.data
index e718411..a7f0501 100644
--- a/tests/suites/test_suite_ssl.data
+++ b/tests/suites/test_suite_ssl.data
@@ -3378,34 +3378,6 @@
 depends_on:MBEDTLS_SSL_PROTO_TLS1_3:MBEDTLS_SSL_SRV_C
 ssl_serialize_session_load_buf_size:0:"":MBEDTLS_SSL_IS_SERVER:MBEDTLS_SSL_VERSION_TLS1_3
 
-Constant-flow HMAC: MD5
-depends_on:MBEDTLS_HAS_ALG_MD5_VIA_MD_OR_PSA_BASED_ON_USE_PSA
-ssl_cf_hmac:MBEDTLS_MD_MD5
-
-Constant-flow HMAC: SHA1
-depends_on:MBEDTLS_HAS_ALG_SHA_1_VIA_MD_OR_PSA_BASED_ON_USE_PSA
-ssl_cf_hmac:MBEDTLS_MD_SHA1
-
-Constant-flow HMAC: SHA256
-depends_on:MBEDTLS_HAS_ALG_SHA_256_VIA_MD_OR_PSA_BASED_ON_USE_PSA
-ssl_cf_hmac:MBEDTLS_MD_SHA256
-
-Constant-flow HMAC: SHA384
-depends_on:MBEDTLS_HAS_ALG_SHA_384_VIA_MD_OR_PSA_BASED_ON_USE_PSA
-ssl_cf_hmac:MBEDTLS_MD_SHA384
-
-# these are the numbers we'd get with an empty plaintext and truncated HMAC
-Constant-flow memcpy from offset: small
-ssl_cf_memcpy_offset:0:5:10
-
-# we could get this with 255-bytes plaintext and untruncated SHA-256
-Constant-flow memcpy from offset: medium
-ssl_cf_memcpy_offset:0:255:32
-
-# we could get this with 255-bytes plaintext and untruncated SHA-384
-Constant-flow memcpy from offset: large
-ssl_cf_memcpy_offset:100:339:48
-
 Test configuration of groups for DHE through mbedtls_ssl_conf_curves()
 conf_curve:
 
diff --git a/tests/suites/test_suite_ssl.function b/tests/suites/test_suite_ssl.function
index 7447a1d..674e649 100644
--- a/tests/suites/test_suite_ssl.function
+++ b/tests/suites/test_suite_ssl.function
@@ -511,7 +511,7 @@
  * \p peer1 and \p peer2 must have been previously initialized by calling
  * mbedtls_mock_socket_init().
  *
- * The capacites of the internal buffers are set to \p bufsize. Setting this to
+ * The capacities of the internal buffers are set to \p bufsize. Setting this to
  * the correct value allows for simulation of MTU, sanity testing the mock
  * implementation and mocking TCP connections with lower memory cost.
  */
@@ -652,7 +652,7 @@
 }
 
 /*
- * Setup a given mesasge socket context including initialization of
+ * Setup a given message socket context including initialization of
  * input/output queues to a chosen capacity of messages. Also set the
  * corresponding mock socket.
  *
@@ -5438,189 +5438,6 @@
 }
 /* END_CASE */
 
-/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC:MBEDTLS_TEST_HOOKS */
-void ssl_cf_hmac( int hash )
-{
-    /*
-     * Test the function mbedtls_ct_hmac() against a reference
-     * implementation.
-     */
-#if defined(MBEDTLS_USE_PSA_CRYPTO)
-    mbedtls_svc_key_id_t key = MBEDTLS_SVC_KEY_ID_INIT;
-    psa_key_attributes_t attributes = PSA_KEY_ATTRIBUTES_INIT;
-    psa_algorithm_t alg;
-    psa_mac_operation_t operation = PSA_MAC_OPERATION_INIT;
-#else
-    mbedtls_md_context_t ctx, ref_ctx;
-    const mbedtls_md_info_t *md_info;
-#endif /* MBEDTLS_USE_PSA_CRYPTO */
-    size_t out_len, block_size;
-    size_t min_in_len, in_len, max_in_len, i;
-    /* TLS additional data is 13 bytes (hence the "lucky 13" name) */
-    unsigned char add_data[13];
-    unsigned char ref_out[MBEDTLS_HASH_MAX_SIZE];
-    unsigned char *data = NULL;
-    unsigned char *out = NULL;
-    unsigned char rec_num = 0;
-
-    USE_PSA_INIT( );
-
-#if defined(MBEDTLS_USE_PSA_CRYPTO)
-    alg = PSA_ALG_HMAC( mbedtls_hash_info_psa_from_md( hash ) );
-
-    out_len = PSA_HASH_LENGTH( alg );
-    block_size = PSA_HASH_BLOCK_LENGTH( alg );
-
-    /* mbedtls_ct_hmac() requires the key to be exportable */
-    psa_set_key_usage_flags( &attributes, PSA_KEY_USAGE_EXPORT |
-                                          PSA_KEY_USAGE_VERIFY_HASH );
-    psa_set_key_algorithm( &attributes, PSA_ALG_HMAC( alg ) );
-    psa_set_key_type( &attributes, PSA_KEY_TYPE_HMAC );
-#else
-    mbedtls_md_init( &ctx );
-    mbedtls_md_init( &ref_ctx );
-
-    md_info = mbedtls_md_info_from_type( hash );
-    TEST_ASSERT( md_info != NULL );
-    out_len = mbedtls_md_get_size( md_info );
-    TEST_ASSERT( out_len != 0 );
-    block_size = hash == MBEDTLS_MD_SHA384 ? 128 : 64;
-#endif /* MBEDTLS_USE_PSA_CRYPTO */
-
-    /* Use allocated out buffer to catch overwrites */
-    ASSERT_ALLOC( out, out_len );
-
-#if defined(MBEDTLS_USE_PSA_CRYPTO)
-    /* Set up dummy key */
-    memset( ref_out, 42, sizeof( ref_out ) );
-    TEST_EQUAL( PSA_SUCCESS, psa_import_key( &attributes,
-                                             ref_out, out_len,
-                                             &key ) );
-#else
-    /* Set up contexts with the given hash and a dummy key */
-    TEST_EQUAL( 0, mbedtls_md_setup( &ctx, md_info, 1 ) );
-    TEST_EQUAL( 0, mbedtls_md_setup( &ref_ctx, md_info, 1 ) );
-    memset( ref_out, 42, sizeof( ref_out ) );
-    TEST_EQUAL( 0, mbedtls_md_hmac_starts( &ctx, ref_out, out_len ) );
-    TEST_EQUAL( 0, mbedtls_md_hmac_starts( &ref_ctx, ref_out, out_len ) );
-    memset( ref_out, 0, sizeof( ref_out ) );
-#endif
-
-    /*
-     * Test all possible lengths up to a point. The difference between
-     * max_in_len and min_in_len is at most 255, and make sure they both vary
-     * by at least one block size.
-     */
-    for( max_in_len = 0; max_in_len <= 255 + block_size; max_in_len++ )
-    {
-        mbedtls_test_set_step( max_in_len * 10000 );
-
-        /* Use allocated in buffer to catch overreads */
-        ASSERT_ALLOC( data, max_in_len );
-
-        min_in_len = max_in_len > 255 ? max_in_len - 255 : 0;
-        for( in_len = min_in_len; in_len <= max_in_len; in_len++ )
-        {
-            mbedtls_test_set_step( max_in_len * 10000 + in_len );
-
-            /* Set up dummy data and add_data */
-            rec_num++;
-            memset( add_data, rec_num, sizeof( add_data ) );
-            for( i = 0; i < in_len; i++ )
-                data[i] = ( i & 0xff ) ^ rec_num;
-
-            /* Get the function's result */
-            TEST_CF_SECRET( &in_len, sizeof( in_len ) );
-#if defined(MBEDTLS_USE_PSA_CRYPTO)
-            TEST_EQUAL( 0, mbedtls_ct_hmac( key, PSA_ALG_HMAC( alg ),
-                                            add_data, sizeof( add_data ),
-                                            data, in_len,
-                                            min_in_len, max_in_len,
-                                            out ) );
-#else
-            TEST_EQUAL( 0, mbedtls_ct_hmac( &ctx, add_data, sizeof( add_data ),
-                                            data, in_len,
-                                            min_in_len, max_in_len,
-                                            out ) );
-#endif /* MBEDTLS_USE_PSA_CRYPTO */
-            TEST_CF_PUBLIC( &in_len, sizeof( in_len ) );
-            TEST_CF_PUBLIC( out, out_len );
-
-#if defined(MBEDTLS_USE_PSA_CRYPTO)
-            TEST_EQUAL( PSA_SUCCESS, psa_mac_verify_setup( &operation,
-                                                           key, alg ) );
-            TEST_EQUAL( PSA_SUCCESS, psa_mac_update( &operation, add_data,
-                                                     sizeof( add_data ) ) );
-            TEST_EQUAL( PSA_SUCCESS, psa_mac_update( &operation,
-                                                     data, in_len ) );
-            TEST_EQUAL( PSA_SUCCESS, psa_mac_verify_finish( &operation,
-                                                            out, out_len ) );
-#else
-            /* Compute the reference result */
-            TEST_EQUAL( 0, mbedtls_md_hmac_update( &ref_ctx, add_data,
-                                                   sizeof( add_data ) ) );
-            TEST_EQUAL( 0, mbedtls_md_hmac_update( &ref_ctx, data, in_len ) );
-            TEST_EQUAL( 0, mbedtls_md_hmac_finish( &ref_ctx, ref_out ) );
-            TEST_EQUAL( 0, mbedtls_md_hmac_reset( &ref_ctx ) );
-
-            /* Compare */
-            ASSERT_COMPARE( out, out_len, ref_out, out_len );
-#endif /* MBEDTLS_USE_PSA_CRYPTO */
-        }
-
-        mbedtls_free( data );
-        data = NULL;
-    }
-
-exit:
-#if defined(MBEDTLS_USE_PSA_CRYPTO)
-    psa_mac_abort( &operation );
-    psa_destroy_key( key );
-#else
-    mbedtls_md_free( &ref_ctx );
-    mbedtls_md_free( &ctx );
-#endif /* MBEDTLS_USE_PSA_CRYPTO */
-
-    mbedtls_free( data );
-    mbedtls_free( out );
-
-    USE_PSA_DONE( );
-}
-/* END_CASE */
-
-/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC:MBEDTLS_TEST_HOOKS */
-void ssl_cf_memcpy_offset( int offset_min, int offset_max, int len )
-{
-    unsigned char *dst = NULL;
-    unsigned char *src = NULL;
-    size_t src_len = offset_max + len;
-    size_t secret;
-
-    ASSERT_ALLOC( dst, len );
-    ASSERT_ALLOC( src, src_len );
-
-    /* Fill src in a way that we can detect if we copied the right bytes */
-    mbedtls_test_rnd_std_rand( NULL, src, src_len );
-
-    for( secret = offset_min; secret <= (size_t) offset_max; secret++ )
-    {
-        mbedtls_test_set_step( (int) secret );
-
-        TEST_CF_SECRET( &secret, sizeof( secret ) );
-        mbedtls_ct_memcpy_offset( dst, src, secret,
-                                  offset_min, offset_max, len );
-        TEST_CF_PUBLIC( &secret, sizeof( secret ) );
-        TEST_CF_PUBLIC( dst, len );
-
-        ASSERT_COMPARE( dst, len, src + secret, len );
-    }
-
-exit:
-    mbedtls_free( dst );
-    mbedtls_free( src );
-}
-/* END_CASE */
-
 /* BEGIN_CASE depends_on:MBEDTLS_SSL_HANDSHAKE_WITH_PSK_ENABLED */
 void test_multiple_psks()
 {