Integrate p256-m as an example driver alongside Mbed TLS and write documentation for the example. (Reapplying changes as one commit on top of development post codestyle change instead of rewriting old branch) Signed-off-by: Aditya Deshpande <aditya.deshpande@arm.com>

commit: e41f7e457fe039b1ea8a05389460b3af07a9d3dd [log] [tgz]
author: Aditya Deshpande <aditya.deshpande@arm.com> Thu Jan 12 16:29:02 2023 +0000
committer: Aditya Deshpande <aditya.deshpande@arm.com> Fri Apr 28 17:54:09 2023 +0100
tree: c943d054dad5be35e8df5f74c66313e28c7d683f
parent: 14d6b1124bffbcefbacedeb6f6eba5c960abcc28 [diff]
diff --git a/3rdparty/Makefile.inc b/3rdparty/Makefile.inc
index 0ed85af..ea100f4 100644
--- a/3rdparty/Makefile.inc
+++ b/3rdparty/Makefile.inc

@@ -1,2 +1,3 @@
 THIRDPARTY_DIR = $(dir $(lastword $(MAKEFILE_LIST)))
 include $(THIRDPARTY_DIR)/everest/Makefile.inc
+include ../3rdparty/p256-m/Makefile.inc

diff --git a/3rdparty/p256-m/Makefile.inc b/3rdparty/p256-m/Makefile.inc
new file mode 100644
index 0000000..fc8f73b
--- /dev/null
+++ b/3rdparty/p256-m/Makefile.inc

@@ -0,0 +1,5 @@
+THIRDPARTY_INCLUDES+=-I../3rdparty/p256-m/p256-m/include -I../3rdparty/p256-m/p256-m/include/p256-m -I../3rdparty/p256-m/p256-m_driver_interface
+
+THIRDPARTY_CRYPTO_OBJECTS+= \
+	../3rdparty/p256-m//p256-m_driver_entrypoints.o \
+	../3rdparty/p256-m//p256-m/p256-m.o

diff --git a/3rdparty/p256-m/p256-m/apache-2.0.txt b/3rdparty/p256-m/p256-m/apache-2.0.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/3rdparty/p256-m/p256-m/apache-2.0.txt

@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

diff --git a/3rdparty/p256-m/p256-m/p256-m.c b/3rdparty/p256-m/p256-m/p256-m.c
new file mode 100644
index 0000000..7f2f0f1
--- /dev/null
+++ b/3rdparty/p256-m/p256-m/p256-m.c

@@ -0,0 +1,1459 @@
+/*
+ * Implementation of curve P-256 (ECDH and ECDSA)
+ *
+ * Author: Manuel Pégourié-Gonnard.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "p256-m.h"
+#include <stdlib.h>
+
+/*
+ * Zeroize memory - this should not be optimized away
+ */
+static void zeroize(void *d, size_t n)
+{
+    volatile char *p = d;
+    while( n-- )
+        *p++ = 0;
+}
+
+/*
+ * Helpers to test constant-time behaviour with valgrind or MemSan.
+ *
+ * CT_POISON() is used for secret data. It marks the memory area as
+ * uninitialised, so that any branch or pointer dereference that depends on it
+ * (even indirectly) triggers a warning.
+ * CT_UNPOISON() is used for public data; it marks the area as initialised.
+ *
+ * These are macros in order to avoid interfering with origin tracking.
+ */
+#if defined(CT_MEMSAN)
+
+#include <sanitizer/msan_interface.h>
+#define CT_POISON   __msan_allocated_memory
+// void __msan_allocated_memory(const volatile void* data, size_t size);
+#define CT_UNPOISON __msan_unpoison
+// void __msan_unpoison(const volatile void *a, size_t size);
+
+#elif defined(CT_VALGRIND)
+
+#include <valgrind/memcheck.h>
+#define CT_POISON   VALGRIND_MAKE_MEM_UNDEFINED
+// VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr,_qzz_len)
+#define CT_UNPOISON VALGRIND_MAKE_MEM_DEFINED
+// VALGRIND_MAKE_MEM_DEFINED(_qzz_addr,_qzz_len)
+
+#else
+#define CT_POISON(p, sz)
+#define CT_UNPOISON(p, sz)
+#endif
+
+/**********************************************************************
+ *
+ * Operations on fixed-width unsigned integers
+ *
+ * Represented using 32-bit limbs, least significant limb first.
+ * That is: x = x[0] + 2^32 x[1] + ... + 2^224 x[7] for 256-bit.
+ *
+ **********************************************************************/
+
+/*
+ * 256-bit set to 32-bit value
+ *
+ * in: x in [0, 2^32)
+ * out: z = x
+ */
+static void u256_set32(uint32_t z[8], uint32_t x)
+{
+    z[0] = x;
+    for (unsigned i = 1; i < 8; i++) {
+        z[i] = 0;
+    }
+}
+
+/*
+ * 256-bit addition
+ *
+ * in: x, y in [0, 2^256)
+ * out: z = (x + y) mod 2^256
+ *      c = (x + y) div 2^256
+ * That is, z + c * 2^256 = x + y
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static uint32_t u256_add(uint32_t z[8],
+                         const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t carry = 0;
+
+    for (unsigned i = 0; i < 8; i++) {
+        uint64_t sum = (uint64_t) carry + x[i] + y[i];
+        z[i] = (uint32_t) sum;
+        carry = (uint32_t) (sum >> 32);
+    }
+
+    return carry;
+}
+
+/*
+ * 256-bit subtraction
+ *
+ * in: x, y in [0, 2^256)
+ * out: z = (x - y) mod 2^256
+ *      c = 0 if x >=y, 1 otherwise
+ * That is, z = c * 2^256 + x - y
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static uint32_t u256_sub(uint32_t z[8],
+                         const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t carry = 0;
+
+    for (unsigned i = 0; i < 8; i++) {
+        uint64_t diff = (uint64_t) x[i] - y[i] - carry;
+        z[i] = (uint32_t) diff;
+        carry = -(uint32_t) (diff >> 32);
+    }
+
+    return carry;
+}
+
+/*
+ * 256-bit conditional assignment
+ *
+ * in: x in [0, 2^256)
+ *     c in [0, 1]
+ * out: z = x if c == 1, z unchanged otherwise
+ *
+ * Note: as a memory area, z must be either equal to x, or not overlap.
+ */
+static void u256_cmov(uint32_t z[8], const uint32_t x[8], uint32_t c)
+{
+    const uint32_t x_mask = -c;
+    for (unsigned i = 0; i < 8; i++) {
+        z[i] = (z[i] & ~x_mask) | (x[i] & x_mask);
+    }
+}
+
+/*
+ * 256-bit compare for equality
+ *
+ * in: x in [0, 2^256)
+ *     y in [0, 2^256)
+ * out: 0 if x == y, unspecified non-zero otherwise
+ */
+static uint32_t u256_diff(const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t diff = 0;
+    for (unsigned i = 0; i < 8; i++) {
+        diff |= x[i] ^ y[i];
+    }
+    return diff;
+}
+
+/*
+ * 256-bit compare to zero
+ *
+ * in: x in [0, 2^256)
+ * out: 0 if x == 0, unspecified non-zero otherwise
+ */
+static uint32_t u256_diff0(const uint32_t x[8])
+{
+    uint32_t diff = 0;
+    for (unsigned i = 0; i < 8; i++) {
+        diff |= x[i];
+    }
+    return diff;
+}
+
+/*
+ * 32 x 32 -> 64-bit multiply-and-accumulate
+ *
+ * in: x, y, z, t in [0, 2^32)
+ * out: x * y + z + t in [0, 2^64)
+ *
+ * Note: this computation cannot overflow.
+ *
+ * Note: this function has two pure-C implementations (depending on whether
+ * MUL64_IS_CONSTANT_TIME), and possibly optimised asm implementations.
+ * Start with the potential asm definitions, and use the C definition only if
+ * we no have no asm for the current toolchain & CPU.
+ */
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t);
+
+/* This macro is used to mark whether an asm implentation is found */
+#undef MULADD64_ASM
+/* This macro is used to mark whether the implementation has a small
+ * code size (ie, it can be inlined even in an unrolled loop) */
+#undef MULADD64_SMALL
+
+/*
+ * Currently assembly optimisations are only supported with GCC/Clang for
+ * Arm's Cortex-A and Cortex-M lines of CPUs, which start with the v6-M and
+ * v7-M architectures. __ARM_ARCH_PROFILE is not defined for v6 and earlier.
+ */
+#if defined(__GNUC__) &&\
+    defined(__ARM_ARCH) && __ARM_ARCH >= 6 && defined(__ARM_ARCH_PROFILE) && \
+    ( __ARM_ARCH_PROFILE == 77 || __ARM_ARCH_PROFILE == 65 ) /* 'M' or 'A' */
+
+/*
+ * This set of CPUs is conveniently partitioned as follows:
+ *
+ * 1. Cores that have the DSP extension, which includes a 1-cycle UMAAL
+ *    instruction: M4, M7, M33, all A-class cores.
+ * 2. Cores that don't have the DSP extension, and also lack a constant-time
+ *    64-bit multiplication instruction:
+ *    - M0, M0+, M23: 32-bit multiplication only;
+ *    - M3: 64-bit multiplication is not constant-time.
+ */
+#if defined(__ARM_FEATURE_DSP)
+
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    __asm__(
+        /* UMAAL <RdLo>, <RdHi>, <Rn>, <Rm> */
+        "umaal   %[z], %[t], %[x], %[y]"
+        : [z] "+l" (z), [t] "+l" (t)
+        : [x] "l" (x), [y] "l" (y)
+    );
+    return ((uint64_t) t << 32) | z;
+}
+#define MULADD64_ASM
+#define MULADD64_SMALL
+
+#else /* __ARM_FEATURE_DSP */
+
+/*
+ * This implementation only uses 16x16->32 bit multiplication.
+ *
+ * It decomposes the multiplicands as:
+ *      x = xh:xl = 2^16 * xh + xl
+ *      y = yh:yl = 2^16 * yh + yl
+ * and computes their product as:
+ *      x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
+ * then adds z and t to the result.
+ */
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    /* First compute x*y, using 3 temporary registers */
+    uint32_t tmp1, tmp2, tmp3;
+    __asm__(
+        ".syntax unified\n\t"
+        /* start by splitting the inputs into halves */
+        "lsrs    %[u], %[x], #16\n\t"
+        "lsrs    %[v], %[y], #16\n\t"
+        "uxth    %[x], %[x]\n\t"
+        "uxth    %[y], %[y]\n\t"
+        /* now we have %[x], %[y], %[u], %[v] = xl, yl, xh, yh */
+        /* let's compute the 4 products we can form with those */
+        "movs    %[w], %[v]\n\t"
+        "muls    %[w], %[u]\n\t"
+        "muls    %[v], %[x]\n\t"
+        "muls    %[x], %[y]\n\t"
+        "muls    %[y], %[u]\n\t"
+        /* now we have %[x], %[y], %[v], %[w] = xl*yl, xh*yl, xl*yh, xh*yh */
+        /* let's split and add the first middle product */
+        "lsls    %[u], %[y], #16\n\t"
+        "lsrs    %[y], %[y], #16\n\t"
+        "adds    %[x], %[u]\n\t"
+        "adcs    %[y], %[w]\n\t"
+        /* let's finish with the second middle product */
+        "lsls    %[u], %[v], #16\n\t"
+        "lsrs    %[v], %[v], #16\n\t"
+        "adds    %[x], %[u]\n\t"
+        "adcs    %[y], %[v]\n\t"
+        : [x] "+l" (x), [y] "+l" (y),
+          [u] "=&l" (tmp1), [v] "=&l" (tmp2), [w] "=&l" (tmp3)
+        : /* no read-only inputs */
+        : "cc"
+    );
+    (void) tmp1;
+    (void) tmp2;
+    (void) tmp3;
+
+    /* Add z and t, using one temporary register */
+    __asm__(
+        ".syntax unified\n\t"
+        "movs    %[u], #0\n\t"
+        "adds    %[x], %[z]\n\t"
+        "adcs    %[y], %[u]\n\t"
+        "adds    %[x], %[t]\n\t"
+        "adcs    %[y], %[u]\n\t"
+        : [x] "+l" (x), [y] "+l" (y), [u] "=&l" (tmp1)
+        : [z] "l" (z), [t] "l" (t)
+        : "cc"
+    );
+    (void) tmp1;
+
+    return ((uint64_t) y << 32) | x;
+}
+#define MULADD64_ASM
+
+#endif /* __ARM_FEATURE_DSP */
+
+#endif /* GCC/Clang with Cortex-M/A CPU */
+
+#if !defined(MULADD64_ASM)
+#if defined(MUL64_IS_CONSTANT_TIME)
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    return (uint64_t) x * y + z + t;
+}
+#define MULADD64_SMALL
+#else
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    /* x = xl + 2**16 xh, y = yl + 2**16 yh */
+    const uint16_t xl = (uint16_t) x;
+    const uint16_t yl = (uint16_t) y;
+    const uint16_t xh = x >> 16;
+    const uint16_t yh = y >> 16;
+
+    /* x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
+     *     = lo    + 2**16 (m1    + m2   ) + 2**32 hi    */
+    const uint32_t lo = (uint32_t) xl * yl;
+    const uint32_t m1 = (uint32_t) xh * yl;
+    const uint32_t m2 = (uint32_t) xl * yh;
+    const uint32_t hi = (uint32_t) xh * yh;
+
+    uint64_t acc = lo + ((uint64_t) (hi + (m1 >> 16) + (m2 >> 16)) << 32);
+    acc += m1 << 16;
+    acc += m2 << 16;
+    acc += z;
+    acc += t;
+
+    return acc;
+}
+#endif /* MUL64_IS_CONSTANT_TIME */
+#endif /* MULADD64_ASM */
+
+/*
+ * 288 + 32 x 256 -> 288-bit multiply and add
+ *
+ * in: x in [0, 2^32)
+ *     y in [0, 2^256)
+ *     z in [0, 2^288)
+ * out: z_out = z_in + x * y mod 2^288
+ *      c     = z_in + x * y div 2^288
+ * That is, z_out + c * 2^288 = z_in + x * y
+ *
+ * Note: as a memory area, z must be either equal to y, or not overlap.
+ *
+ * This is a helper for Montgomery multiplication.
+ */
+static uint32_t u288_muladd(uint32_t z[9], uint32_t x, const uint32_t y[8])
+{
+    uint32_t carry = 0;
+
+#define U288_MULADD_STEP(i) \
+    do { \
+        uint64_t prod = u32_muladd64(x, y[i], z[i], carry); \
+        z[i] = (uint32_t) prod; \
+        carry = (uint32_t) (prod >> 32); \
+    } while( 0 )
+
+#if defined(MULADD64_SMALL)
+    U288_MULADD_STEP(0);
+    U288_MULADD_STEP(1);
+    U288_MULADD_STEP(2);
+    U288_MULADD_STEP(3);
+    U288_MULADD_STEP(4);
+    U288_MULADD_STEP(5);
+    U288_MULADD_STEP(6);
+    U288_MULADD_STEP(7);
+#else
+    for (unsigned i = 0; i < 8; i++) {
+        U288_MULADD_STEP(i);
+    }
+#endif
+
+    uint64_t sum = (uint64_t) z[8] + carry;
+    z[8] = (uint32_t) sum;
+    carry = (uint32_t) (sum >> 32);
+
+    return carry;
+}
+
+/*
+ * 288-bit in-place right shift by 32 bits
+ *
+ * in: z in [0, 2^288)
+ *     c in [0, 2^32)
+ * out: z_out = z_in div 2^32 + c * 2^256
+ *            = (z_in + c * 2^288) div 2^32
+ *
+ * This is a helper for Montgomery multiplication.
+ */
+static void u288_rshift32(uint32_t z[9], uint32_t c)
+{
+    for (unsigned i = 0; i < 8; i++) {
+        z[i] = z[i + 1];
+    }
+    z[8] = c;
+}
+
+/*
+ * 256-bit import from big-endian bytes
+ *
+ * in: p = p0, ..., p31
+ * out: z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
+ */
+static void u256_from_bytes(uint32_t z[8], const uint8_t p[32])
+{
+    for (unsigned i = 0; i < 8; i++) {
+        unsigned j = 4 * (7 - i);
+        z[i] = ((uint32_t) p[j + 0] << 24) |
+               ((uint32_t) p[j + 1] << 16) |
+               ((uint32_t) p[j + 2] <<  8) |
+               ((uint32_t) p[j + 3] <<  0);
+    }
+}
+
+/*
+ * 256-bit export to big-endian bytes
+ *
+ * in: z in [0, 2^256)
+ * out: p = p0, ..., p31 such that
+ *      z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
+ */
+static void u256_to_bytes(uint8_t p[32], const uint32_t z[8])
+{
+    for (unsigned i = 0; i < 8; i++) {
+        unsigned j = 4 * (7 - i);
+        p[j + 0] = (uint8_t) (z[i] >> 24);
+        p[j + 1] = (uint8_t) (z[i] >> 16);
+        p[j + 2] = (uint8_t) (z[i] >>  8);
+        p[j + 3] = (uint8_t) (z[i] >>  0);
+    }
+}
+
+/**********************************************************************
+ *
+ * Operations modulo a 256-bit prime m
+ *
+ * These are done in the Montgomery domain, that is x is represented by
+ *  x * 2^256 mod m
+ * Numbers need to be converted to that domain before computations,
+ * and back from it afterwards.
+ *
+ * Inversion is computed using Fermat's little theorem.
+ *
+ * Assumptions on m:
+ * - Montgomery operations require that m is odd.
+ * - Fermat's little theorem require it to be a prime.
+ * - m256_inv() further requires that m % 2^32 >= 2.
+ * - m256_inv() also assumes that the value of m is not a secret.
+ *
+ * In practice operations are done modulo the curve's p and n,
+ * both of which satisfy those assumptions.
+ *
+ **********************************************************************/
+
+/*
+ * Data associated to a modulus for Montgomery operations.
+ *
+ * m in [0, 2^256) - the modulus itself, must be odd
+ * R2 = 2^512 mod m
+ * ni = -m^-1 mod 2^32
+ */
+typedef struct {
+    uint32_t m[8];
+    uint32_t R2[8];
+    uint32_t ni;
+}
+m256_mod;
+
+/*
+ * Data for Montgomery operations modulo the curve's p
+ */
+static const m256_mod p256_p = {
+    {   /* the curve's p */
+        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
+        0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF,
+    },
+    {   /* 2^512 mod p */
+        0x00000003, 0x00000000, 0xffffffff, 0xfffffffb,
+        0xfffffffe, 0xffffffff, 0xfffffffd, 0x00000004,
+    },
+    0x00000001, /* -p^-1 mod 2^32 */
+};
+
+/*
+ * Data for Montgomery operations modulo the curve's n
+ */
+static const m256_mod p256_n = {
+    {   /* the curve's n */
+        0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD,
+        0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF,
+    },
+    {   /* 2^512 mod n */
+        0xbe79eea2, 0x83244c95, 0x49bd6fa6, 0x4699799c,
+        0x2b6bec59, 0x2845b239, 0xf3d95620, 0x66e12d94,
+    },
+    0xee00bc4f, /* -n^-1 mod 2^32 */
+};
+
+/*
+ * Modular addition
+ *
+ * in: x, y in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z = (x + y) mod m, in [0, m)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_add(uint32_t z[8],
+                     const uint32_t x[8], const uint32_t y[8],
+                     const m256_mod *mod)
+{
+    uint32_t r[8];
+    uint32_t carry_add = u256_add(z, x, y);
+    uint32_t carry_sub = u256_sub(r, z, mod->m);
+    /* Need to subract m if:
+     *      x+y >= 2^256 > m (that is, carry_add == 1)
+     *   OR z >= m (that is, carry_sub == 0) */
+    uint32_t use_sub = carry_add | (1 - carry_sub);
+    u256_cmov(z, r, use_sub);
+}
+
+/*
+ * Modular addition mod p
+ *
+ * in: x, y in [0, p)
+ * out: z = (x + y) mod p, in [0, p)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_add_p(uint32_t z[8],
+                       const uint32_t x[8], const uint32_t y[8])
+{
+    m256_add(z, x, y, &p256_p);
+}
+
+/*
+ * Modular subtraction
+ *
+ * in: x, y in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z = (x - y) mod m, in [0, m)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_sub(uint32_t z[8],
+                     const uint32_t x[8], const uint32_t y[8],
+                     const m256_mod *mod)
+{
+    uint32_t r[8];
+    uint32_t carry = u256_sub(z, x, y);
+    (void) u256_add(r, z, mod->m);
+    /* Need to add m if and only if x < y, that is carry == 1.
+     * In that case z is in [2^256 - m + 1, 2^256 - 1], so the
+     * addition will have a carry as well, which cancels out. */
+    u256_cmov(z, r, carry);
+}
+
+/*
+ * Modular subtraction mod p
+ *
+ * in: x, y in [0, p)
+ * out: z = (x + y) mod p, in [0, p)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_sub_p(uint32_t z[8],
+                       const uint32_t x[8], const uint32_t y[8])
+{
+    m256_sub(z, x, y, &p256_p);
+}
+
+/*
+ * Montgomery modular multiplication
+ *
+ * in: x, y in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z = (x * y) / 2^256 mod m, in [0, m)
+ *
+ * Note: as a memory area, z may overlap with x or y.
+ */
+static void m256_mul(uint32_t z[8],
+                     const uint32_t x[8], const uint32_t y[8],
+                     const m256_mod *mod)
+{
+    /*
+     * Algorithm 14.36 in Handbook of Applied Cryptography with:
+     * b = 2^32, n = 8, R = 2^256
+     */
+    uint32_t m_prime = mod->ni;
+    uint32_t a[9];
+
+    for (unsigned i = 0; i < 9; i++) {
+        a[i] = 0;
+    }
+
+    for (unsigned i = 0; i < 8; i++) {
+        /* the "mod 2^32" is implicit from the type */
+        uint32_t u = (a[0] + x[i] * y[0]) * m_prime;
+
+        /* a = (a + x[i] * y + u * m) div b */
+        uint32_t c = u288_muladd(a, x[i], y);
+        c += u288_muladd(a, u, mod->m);
+        u288_rshift32(a, c);
+    }
+
+    /* a = a > m ? a - m : a */
+    uint32_t carry_add = a[8];  // 0 or 1 since a < 2m, see HAC Note 14.37
+    uint32_t carry_sub = u256_sub(z, a, mod->m);
+    uint32_t use_sub = carry_add | (1 - carry_sub);     // see m256_add()
+    u256_cmov(z, a, 1 - use_sub);
+}
+
+/*
+ * Montgomery modular multiplication modulo p.
+ *
+ * in: x, y in [0, p)
+ * out: z = (x * y) / 2^256 mod p, in [0, p)
+ *
+ * Note: as a memory area, z may overlap with x or y.
+ */
+static void m256_mul_p(uint32_t z[8],
+                       const uint32_t x[8], const uint32_t y[8])
+{
+    m256_mul(z, x, y, &p256_p);
+}
+
+/*
+ * In-place conversion to Montgomery form
+ *
+ * in: z in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z_out = z_in * 2^256 mod m, in [0, m)
+ */
+static void m256_prep(uint32_t z[8], const m256_mod *mod)
+{
+    m256_mul(z, z, mod->R2, mod);
+}
+
+/*
+ * In-place conversion from Montgomery form
+ *
+ * in: z in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z_out = z_in / 2^256 mod m, in [0, m)
+ * That is, z_in was z_actual * 2^256 mod m, and z_out is z_actual
+ */
+static void m256_done(uint32_t z[8], const m256_mod *mod)
+{
+    uint32_t one[8];
+    u256_set32(one, 1);
+    m256_mul(z, z, one, mod);
+}
+
+/*
+ * Set to 32-bit value
+ *
+ * in: x in [0, 2^32)
+ *     mod must point to a valid m256_mod structure
+ * out: z = x * 2^256 mod m, in [0, m)
+ * That is, z is set to the image of x in the Montgomery domain.
+ */
+static void m256_set32(uint32_t z[8], uint32_t x, const m256_mod *mod)
+{
+    u256_set32(z, x);
+    m256_prep(z, mod);
+}
+
+/*
+ * Modular inversion in Montgomery form
+ *
+ * in: x in [0, m)
+ *     mod must point to a valid m256_mod structure
+ *     such that mod->m % 2^32 >= 2, assumed to be public.
+ * out: z = x^-1 * 2^512 mod m if x != 0,
+ *      z = 0 if x == 0
+ * That is, if x = x_actual    * 2^256 mod m, then
+ *             z = x_actual^-1 * 2^256 mod m
+ *
+ * Note: as a memory area, z may overlap with x.
+ */
+static void m256_inv(uint32_t z[8], const uint32_t x[8],
+                     const m256_mod *mod)
+{
+    /*
+     * Use Fermat's little theorem to compute x^-1 as x^(m-2).
+     *
+     * Take advantage of the fact that both p's and n's least significant limb
+     * is at least 2 to perform the subtraction on the flight (no carry).
+     *
+     * Use plain right-to-left binary exponentiation;
+     * branches are OK as the exponent is not a secret.
+     */
+    uint32_t bitval[8];
+    u256_cmov(bitval, x, 1);    /* copy x before writing to z */
+
+    m256_set32(z, 1, mod);
+
+    unsigned i = 0;
+    uint32_t limb = mod->m[i] - 2;
+    while (1) {
+        for (unsigned j = 0; j < 32; j++) {
+            if ((limb & 1) != 0) {
+                m256_mul(z, z, bitval, mod);
+            }
+            m256_mul(bitval, bitval, bitval, mod);
+            limb >>= 1;
+        }
+
+        if (i == 7)
+            break;
+
+        i++;
+        limb = mod->m[i];
+    }
+}
+
+/*
+ * Import modular integer from bytes to Montgomery domain
+ *
+ * in: p = p0, ..., p32
+ *     mod must point to a valid m256_mod structure
+ * out: z = (p0 * 2^248 + ... + p31) * 2^256 mod m, in [0, m)
+ *      return 0 if the number was already in [0, m), or -1.
+ *      z may be incorrect and must be discared when -1 is returned.
+ */
+static int m256_from_bytes(uint32_t z[8],
+                           const uint8_t p[32], const m256_mod *mod)
+{
+    u256_from_bytes(z, p);
+
+    uint32_t t[8];
+    uint32_t lt_m = u256_sub(t, z, mod->m);
+    if (lt_m != 1)
+        return -1;
+
+    m256_prep(z, mod);
+    return 0;
+}
+
+/*
+ * Export modular integer from Montgomery domain to bytes
+ *
+ * in: z in [0, 2^256)
+ *     mod must point to a valid m256_mod structure
+ * out: p = p0, ..., p31 such that
+ *      z = (p0 * 2^248 + ... + p31) * 2^256 mod m
+ */
+static void m256_to_bytes(uint8_t p[32],
+                          const uint32_t z[8], const m256_mod *mod)
+{
+    uint32_t zi[8];
+    u256_cmov(zi, z, 1);
+    m256_done(zi, mod);
+
+    u256_to_bytes(p, zi);
+}
+
+/**********************************************************************
+ *
+ * Operations on curve points
+ *
+ * Points are represented in two coordinates system:
+ *  - affine (x, y) - extended to represent 0 (see below)
+ *  - jacobian (x:y:z)
+ * In either case, coordinates are integers modulo p256_p and
+ * are always represented in the Montgomery domain.
+ *
+ * For background on jacobian coordinates, see for example [GECC] 3.2.2:
+ * - conversions go (x, y) -> (x:y:1) and (x:y:z) -> (x/z^2, y/z^3)
+ * - the curve equation becomes y^2 = x^3 - 3 x z^4 + b z^6
+ * - 0 (aka the origin aka point at infinity) is (x:y:0) with y^2 = x^3.
+ * - point negation goes -(x:y:z) = (x:-y:z)
+ *
+ * Normally 0 (the point at infinity) can't be represented in affine
+ * coordinates. However we extend affine coordinates with the convention that
+ * (0, 0) (which is normally not a point on the curve) is interpreted as 0.
+ *
+ * References:
+ * - [GECC]: Guide to Elliptic Curve Cryptography; Hankerson, Menezes,
+ *   Vanstone; Springer, 2004.
+ * - [CMO98]: Efficient Elliptic Curve Exponentiation Using Mixed Coordinates;
+ *   Cohen, Miyaji, Ono; Springer, ASIACRYPT 1998.
+ *   https://link.springer.com/content/pdf/10.1007/3-540-49649-1_6.pdf
+ * - [RCB15]: Complete addition formulas for prime order elliptic curves;
+ *   Renes, Costello, Batina; IACR e-print 2015-1060.
+ *   https://eprint.iacr.org/2015/1060.pdf
+ *
+ **********************************************************************/
+
+/*
+ * The curve's b parameter in the Short Weierstrass equation
+ *  y^2 = x^3 - 3*x + b
+ * Compared to the standard, this is converted to the Montgomery domain.
+ */
+static const uint32_t p256_b[8] = { /* b * 2^256 mod p */
+    0x29c4bddf, 0xd89cdf62, 0x78843090, 0xacf005cd,
+    0xf7212ed6, 0xe5a220ab, 0x04874834, 0xdc30061d,
+};
+
+/*
+ * The curve's conventional base point G.
+ * Compared to the standard, coordinates converted to the Montgomery domain.
+ */
+static const uint32_t p256_gx[8] = { /* G_x * 2^256 mod p */
+    0x18a9143c, 0x79e730d4, 0x5fedb601, 0x75ba95fc,
+    0x77622510, 0x79fb732b, 0xa53755c6, 0x18905f76,
+};
+static const uint32_t p256_gy[8] = { /* G_y * 2^256 mod p */
+    0xce95560a, 0xddf25357, 0xba19e45c, 0x8b4ab8e4,
+    0xdd21f325, 0xd2e88688, 0x25885d85, 0x8571ff18,
+};
+
+/*
+ * Point-on-curve check - do the coordinates satisfy the curve's equation?
+ *
+ * in: x, y in [0, p)   (Montgomery domain)
+ * out: 0 if the point lies on the curve and is not 0,
+ *      unspecified non-zero otherwise
+ */
+static uint32_t point_check(const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t lhs[8], rhs[8];
+
+    /* lhs = y^2 */
+    m256_mul_p(lhs, y, y);
+
+    /* rhs = x^3 - 3x + b */
+    m256_mul_p(rhs, x,   x);      /* x^2 */
+    m256_mul_p(rhs, rhs, x);      /* x^3 */
+    for (unsigned i = 0; i < 3; i++)
+        m256_sub_p(rhs, rhs, x);  /* x^3 - 3x */
+    m256_add_p(rhs, rhs, p256_b); /* x^3 - 3x + b */
+
+    return u256_diff(lhs, rhs);
+}
+
+/*
+ * In-place jacobian to affine coordinate conversion
+ *
+ * in: (x:y:z) must be on the curve (coordinates in Montegomery domain)
+ * out: x_out = x_in / z_in^2   (Montgomery domain)
+ *      y_out = y_in / z_in^3   (Montgomery domain)
+ *      z_out unspecified, must be disregarded
+ *
+ * Note: if z is 0 (that is, the input point is 0), x_out = y_out = 0.
+ */
+static void point_to_affine(uint32_t x[8], uint32_t y[8], uint32_t z[8])
+{
+    uint32_t t[8];
+
+    m256_inv(z, z, &p256_p);    /* z = z^-1 */
+
+    m256_mul_p(t, z, z);        /* t = z^-2 */
+    m256_mul_p(x, x, t);        /* x = x * z^-2 */
+
+    m256_mul_p(t, t, z);        /* t = z^-3 */
+    m256_mul_p(y, y, t);        /* y = y * z^-3 */
+}
+
+/*
+ * In-place point doubling in jacobian coordinates (Montgomery domain)
+ *
+ * in: P_in = (x:y:z), must be on the curve
+ * out: (x:y:z) = P_out = 2 * P_in
+ */
+static void point_double(uint32_t x[8], uint32_t y[8], uint32_t z[8])
+{
+    /*
+     * This is formula 6 from [CMO98], cited as complete in [RCB15] (table 1).
+     * Notations as in the paper, except u added and t ommited (it's x3).
+     */
+    uint32_t m[8], s[8], u[8];
+
+    /* m = 3 * x^2 + a * z^4 = 3 * (x + z^2) * (x - z^2) */
+    m256_mul_p(s, z, z);
+    m256_add_p(m, x, s);
+    m256_sub_p(u, x, s);
+    m256_mul_p(s, m, u);
+    m256_add_p(m, s, s);
+    m256_add_p(m, m, s);
+
+    /* s = 4 * x * y^2 */
+    m256_mul_p(u, y, y);
+    m256_add_p(u, u, u); /* u = 2 * y^2 (used below) */
+    m256_mul_p(s, x, u);
+    m256_add_p(s, s, s);
+
+    /* u = 8 * y^4 (not named in the paper, first term of y3) */
+    m256_mul_p(u, u, u);
+    m256_add_p(u, u, u);
+
+    /* x3 = t = m^2 - 2 * s */
+    m256_mul_p(x, m, m);
+    m256_sub_p(x, x, s);
+    m256_sub_p(x, x, s);
+
+    /* z3 = 2 * y * z */
+    m256_mul_p(z, y, z);
+    m256_add_p(z, z, z);
+
+    /* y3 = -u + m * (s - t) */
+    m256_sub_p(y, s, x);
+    m256_mul_p(y, y, m);
+    m256_sub_p(y, y, u);
+}
+
+/*
+ * In-place point addition in jacobian-affine coordinates (Montgomery domain)
+ *
+ * in: P_in = (x1:y1:z1), must be on the curve and not 0
+ *     Q = (x2, y2), must be on the curve and not P_in or -P_in or 0
+ * out: P_out = (x1:y1:z1) = P_in + Q
+ */
+static void point_add(uint32_t x1[8], uint32_t y1[8], uint32_t z1[8],
+                      const uint32_t x2[8], const uint32_t y2[8])
+{
+    /*
+     * This is formula 5 from [CMO98], with z2 == 1 substituted. We use
+     * intermediates with neutral names, and names from the paper in comments.
+     */
+    uint32_t t1[8], t2[8], t3[8];
+
+    /* u1 = x1 and s1 = y1 (no computations) */
+
+    /* t1 = u2 = x2 z1^2 */
+    m256_mul_p(t1, z1, z1);
+    m256_mul_p(t2, t1, z1);
+    m256_mul_p(t1, t1, x2);
+
+    /* t2 = s2 = y2 z1^3 */
+    m256_mul_p(t2, t2, y2);
+
+    /* t1 = h = u2 - u1 */
+    m256_sub_p(t1, t1, x1); /* t1 = x2 * z1^2 - x1 */
+
+    /* t2 = r = s2 - s1 */
+    m256_sub_p(t2, t2, y1);
+
+    /* z3 = z1 * h */
+    m256_mul_p(z1, z1, t1);
+
+    /* t1 = h^3 */
+    m256_mul_p(t3, t1, t1);
+    m256_mul_p(t1, t3, t1);
+
+    /* t3 = x1 * h^2 */
+    m256_mul_p(t3, t3, x1);
+
+    /* x3 = r^2 - 2 * x1 * h^2 - h^3 */
+    m256_mul_p(x1, t2, t2);
+    m256_sub_p(x1, x1, t3);
+    m256_sub_p(x1, x1, t3);
+    m256_sub_p(x1, x1, t1);
+
+    /* y3 = r * (x1 * h^2 - x3) - y1 h^3 */
+    m256_sub_p(t3, t3, x1);
+    m256_mul_p(t3, t3, t2);
+    m256_mul_p(t1, t1, y1);
+    m256_sub_p(y1, t3, t1);
+}
+
+/*
+ * Point addition or doubling (affine to jacobian, Montgomery domain)
+ *
+ * in: P = (x1, y1) - must be on the curve and not 0
+ *     Q = (x2, y2) - must be on the curve and not 0
+ * out: (x3, y3) = R = P + Q
+ *
+ * Note: unlike point_add(), this function works if P = +- Q;
+ * however it leaks information on its input through timing,
+ * branches taken and memory access patterns (if observable).
+ */
+static void point_add_or_double_leaky(
+                        uint32_t x3[8], uint32_t y3[8],
+                        const uint32_t x1[8], const uint32_t y1[8],
+                        const uint32_t x2[8], const uint32_t y2[8])
+{
+
+    uint32_t z3[8];
+    u256_cmov(x3, x1, 1);
+    u256_cmov(y3, y1, 1);
+    m256_set32(z3, 1, &p256_p);
+
+    if (u256_diff(x1, x2) != 0) {
+        // P != +- Q -> generic addition
+        point_add(x3, y3, z3, x2, y2);
+        point_to_affine(x3, y3, z3);
+    }
+    else if (u256_diff(y1, y2) == 0) {
+        // P == Q -> double
+        point_double(x3, y3, z3);
+        point_to_affine(x3, y3, z3);
+    } else {
+        // P == -Q -> zero
+        m256_set32(x3, 0, &p256_p);
+        m256_set32(y3, 0, &p256_p);
+    }
+}
+
+/*
+ * Import curve point from bytes
+ *
+ * in: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
+ * out: x, y in Mongomery domain
+ *      return 0 if x and y are both in [0, p)
+ *                  and (x, y) is on the curve and not 0
+ *             unspecified non-zero otherwise.
+ *      x and y are unspecified and must be discarded if returning non-zero.
+ */
+static int point_from_bytes(uint32_t x[8], uint32_t y[8], const uint8_t p[64])
+{
+    int ret;
+
+    ret = m256_from_bytes(x, p, &p256_p);
+    if (ret != 0)
+        return ret;
+
+    ret = m256_from_bytes(y, p + 32, &p256_p);
+    if (ret != 0)
+        return ret;
+
+    return (int) point_check(x, y);
+}
+
+/*
+ * Export curve point to bytes
+ *
+ * in: x, y affine coordinates of a point (Montgomery domain)
+ *     must be on the curve and not 0
+ * out: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
+ */
+static void point_to_bytes(uint8_t p[64],
+                           const uint32_t x[8], const uint32_t y[8])
+{
+    m256_to_bytes(p,        x, &p256_p);
+    m256_to_bytes(p + 32,   y, &p256_p);
+}
+
+/**********************************************************************
+ *
+ * Scalar multiplication and other scalar-related operations
+ *
+ **********************************************************************/
+
+/*
+ * Scalar multiplication
+ *
+ * in: P = (px, py), affine (Montgomery), must be on the curve and not 0
+ *     s in [1, n-1]
+ * out: R = s * P = (rx, ry), affine coordinates (Montgomery).
+ *
+ * Note: as memory areas, none of the parameters may overlap.
+ */
+static void scalar_mult(uint32_t rx[8], uint32_t ry[8],
+                        const uint32_t px[8], const uint32_t py[8],
+                        const uint32_t s[8])
+{
+    /*
+     * We use a signed binary ladder, see for example slides 10-14 of
+     * http://ecc2015.math.u-bordeaux1.fr/documents/hamburg.pdf but with
+     * implicit recoding, and a different loop initialisation to avoid feeding
+     * 0 to our addition formulas, as they don't support it.
+     */
+    uint32_t s_odd[8], py_neg[8], py_use[8], rz[8];
+
+    /*
+     * Make s odd by replacing it with n - s if necessary.
+     *
+     * If s was odd, we'll have s_odd = s, and define P' = P.
+     * Otherwise, we'll have s_odd = n - s and define P' = -P.
+     *
+     * Either way, we can compute s * P as s_odd * P'.
+     */
+    u256_sub(s_odd, p256_n.m, s); /* no carry, result still in [1, n-1] */
+    uint32_t negate = ~s[0] & 1;
+    u256_cmov(s_odd, s, 1 - negate);
+
+    /* Compute py_neg = - py mod p (that's the y coordinate of -P) */
+    u256_set32(py_use, 0);
+    m256_sub_p(py_neg, py_use, py);
+
+    /* Initialize R = P' = (x:(-1)^negate * y:1) */
+    u256_cmov(rx, px, 1);
+    u256_cmov(ry, py, 1);
+    m256_set32(rz, 1, &p256_p);
+    u256_cmov(ry, py_neg, negate);
+
+    /*
+     * For any odd number s_odd = b255 ... b1 1, we have
+     *      s_odd = 2^255 + 2^254 sbit(b255) + ... + 2 sbit(b2) + sbit(b1)
+     * writing
+     *      sbit(b) = 2 * b - 1 = b ? 1 : -1
+     *
+     * Use that to compute s_odd * P' by repeating R = 2 * R +- P':
+     *      s_odd * P' = 2 * ( ... (2 * P' + sbit(b255) P') ... ) + sbit(b1) P'
+     *
+     * The loop invariant is that when beginning an iteration we have
+     *      R = s_i P'
+     * with
+     *      s_i = 2^(255-i) + 2^(254-i) sbit(b_255) + ...
+     * where the sum has 256 - i terms.
+     *
+     * When updating R we need to make sure the input to point_add() is
+     * neither 0 not +-P'. Since that input is 2 s_i P', it is sufficient to
+     * see that 1 < 2 s_i < n-1. The lower bound is obvious since s_i is a
+     * positive integer, and for the upper bound we distinguish three cases.
+     *
+     * If i > 1, then s_i < 2^254, so 2 s_i < 2^255 < n-1.
+     * Otherwise, i == 1 and we have 2 s_i = s_odd - sbit(b1).
+     *      If s_odd <= n-4, then 2 s_1 <= n-3.
+     *      Otherwise, s_odd = n-2, and for this curve's value of n,
+     *      we have b1 == 1, so sbit(b1) = 1 and 2 s_1 <= n-3.
+     */
+    for (unsigned i = 255; i > 0; i--) {
+        uint32_t bit = (s_odd[i / 32] >> i % 32) & 1;
+
+        /* set (px, py_use) = sbit(bit) P' = sbit(bit) * (-1)^negate P */
+        u256_cmov(py_use, py, bit ^ negate);
+        u256_cmov(py_use, py_neg, (1 - bit) ^ negate);
+
+        /* Update R = 2 * R +- P' */
+        point_double(rx, ry, rz);
+        point_add(rx, ry, rz, px, py_use);
+    }
+
+    point_to_affine(rx, ry, rz);
+}
+
+/*
+ * Scalar import from big-endian bytes
+ *
+ * in: p = p0, ..., p31
+ * out: s = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
+ *      return 0 if s in [1, n-1],
+ *            -1 otherwise.
+ */
+static int scalar_from_bytes(uint32_t s[8], const uint8_t p[32])
+{
+    u256_from_bytes(s, p);
+
+    uint32_t r[8];
+    uint32_t lt_n = u256_sub(r, s, p256_n.m);
+
+    u256_set32(r, 1);
+    uint32_t lt_1 = u256_sub(r, s, r);
+
+    if (lt_n && !lt_1)
+        return 0;
+
+    return -1;
+}
+
+/* test version based on stdlib - never do this in production! */
+int p256_generate_random(uint8_t *output, unsigned output_size)
+{
+    for (unsigned i = 0; i < output_size; i++) {
+        output[i] = (uint8_t) rand();
+    }
+
+    return 0;
+}
+
+/*
+ * Scalar generation, with public key
+ *
+ * out: sbytes the big-endian bytes representation of the scalar
+ *      s its u256 representation
+ *      x, y the affine coordinates of s * G (Montgomery domain)
+ *      return 0 if OK, -1 on failure
+ *      sbytes, s, x, y must be discarded when returning non-zero.
+ */
+static int scalar_gen_with_pub(uint8_t sbytes[32], uint32_t s[8],
+                               uint32_t x[8], uint32_t y[8])
+{
+    /* generate a random valid scalar */
+    int ret;
+    unsigned nb_tried = 0;
+    do {
+        if (nb_tried++ >= 4)
+            return -1;
+
+        ret = p256_generate_random(sbytes, 32);
+        CT_POISON(sbytes, 32);
+        if (ret != 0)
+            return -1;
+
+        ret = scalar_from_bytes(s, sbytes);
+        CT_UNPOISON(&ret, sizeof ret);
+    }
+    while (ret != 0);
+
+    /* compute and ouput the associated public key */
+    scalar_mult(x, y, p256_gx, p256_gy, s);
+
+    /* the associated public key is not a secret */
+    CT_UNPOISON(x, 32);
+    CT_UNPOISON(y, 32);
+
+    return 0;
+}
+
+/*
+ * ECDH/ECDSA generate pair
+ */
+int p256_gen_keypair(uint8_t priv[32], uint8_t pub[64])
+{
+    uint32_t s[8], x[8], y[8];
+    int ret = scalar_gen_with_pub(priv, s, x, y);
+    zeroize(s, sizeof s);
+    if (ret != 0)
+        return P256_RANDOM_FAILED;
+
+    point_to_bytes(pub, x, y);
+    return 0;
+}
+
+/**********************************************************************
+ *
+ * ECDH
+ *
+ **********************************************************************/
+
+/*
+ * ECDH compute shared secret
+ */
+int p256_ecdh_shared_secret(uint8_t secret[32],
+                            const uint8_t priv[32], const uint8_t peer[64])
+{
+    CT_POISON(priv, 32);
+
+    uint32_t s[8], px[8], py[8], x[8], y[8];
+    int ret;
+
+    ret = scalar_from_bytes(s, priv);
+    CT_UNPOISON(&ret, sizeof ret);
+    if (ret != 0) {
+        ret = P256_INVALID_PRIVKEY;
+        goto cleanup;
+    }
+
+    ret = point_from_bytes(px, py, peer);
+    if (ret != 0) {
+        ret = P256_INVALID_PUBKEY;
+        goto cleanup;
+    }
+
+    scalar_mult(x, y, px, py, s);
+
+    m256_to_bytes(secret, x, &p256_p);
+    CT_UNPOISON(secret, 32);
+
+cleanup:
+    zeroize(s, sizeof s);
+    return ret;
+}
+
+/**********************************************************************
+ *
+ * ECDSA
+ *
+ * Reference:
+ * [SEC1] SEC 1: Elliptic Curve Cryptography, Certicom research, 2009.
+ *        http://www.secg.org/sec1-v2.pdf
+ **********************************************************************/
+
+/*
+ * Reduction mod n of a small number
+ *
+ * in: x in [0, 2^256)
+ * out: x_out = x_in mod n in [0, n)
+ */
+static void ecdsa_m256_mod_n(uint32_t x[8])
+{
+    uint32_t t[8];
+    uint32_t c = u256_sub(t, x, p256_n.m);
+    u256_cmov(x, t, 1 - c);
+}
+
+/*
+ * Import integer mod n (Montgomery domain) from hash
+ *
+ * in: h = h0, ..., h_hlen
+ *     hlen the length of h in bytes
+ * out: z = (h0 * 2^l-8 + ... + h_l) * 2^256 mod n
+ *      with l = min(32, hlen)
+ *
+ * Note: in [SEC1] this is step 5 of 4.1.3 (sign) or step 3 or 4.1.4 (verify),
+ * with obvious simplications since n's bit-length is a multiple of 8.
+ */
+static void ecdsa_m256_from_hash(uint32_t z[8],
+                                 const uint8_t *h, size_t hlen)
+{
+    /* convert from h (big-endian) */
+    /* hlen is public data so it's OK to branch on it */
+    if (hlen < 32) {
+        uint8_t p[32];
+        for (unsigned i = 0; i < 32; i++)
+            p[i] = 0;
+        for (unsigned i = 0; i < hlen; i++)
+            p[32 - hlen + i] = h[i];
+        u256_from_bytes(z, p);
+    } else {
+        u256_from_bytes(z, h);
+    }
+
+    /* ensure the result is in [0, n) */
+    ecdsa_m256_mod_n(z);
+
+    /* map to Montgomery domain */
+    m256_prep(z, &p256_n);
+}
+
+/*
+ * ECDSA sign
+ */
+int p256_ecdsa_sign(uint8_t sig[64], const uint8_t priv[32],
+                    const uint8_t *hash, size_t hlen)
+{
+    CT_POISON(priv, 32);
+
+    /*
+     * Steps and notations from [SEC1] 4.1.3
+     *
+     * Instead of retrying on r == 0 or s == 0, just abort,
+     * as those events have negligible probability.
+     */
+    int ret;
+
+    /* Temporary buffers - the first two are mostly stable, so have names */
+    uint32_t xr[8], k[8], t3[8], t4[8];
+
+    /* 1. Set ephemeral keypair */
+    uint8_t *kb = (uint8_t *) t4;
+    /* kb will be erased by re-using t4 for dU - if we exit before that, we
+     * haven't read the private key yet so we kb isn't sensitive yet */
+    ret = scalar_gen_with_pub(kb, k, xr, t3);   /* xr = x_coord(k * G) */
+    if (ret != 0)
+        return P256_RANDOM_FAILED;
+    m256_prep(k, &p256_n);
+
+    /* 2. Convert xr to an integer */
+    m256_done(xr, &p256_p);
+
+    /* 3. Reduce xr mod n (extra: output it while at it) */
+    ecdsa_m256_mod_n(xr);    /* xr = int(xr) mod n */
+
+    /* xr is public data so it's OK to use a branch */
+    if (u256_diff0(xr) == 0)
+        return P256_RANDOM_FAILED;
+
+    u256_to_bytes(sig, xr);
+
+    m256_prep(xr, &p256_n);
+
+    /* 4. Skipped - we take the hash as an input, not the message */
+
+    /* 5. Derive an integer from the hash */
+    ecdsa_m256_from_hash(t3, hash, hlen);   /* t3 = e */
+
+    /* 6. Compute s = k^-1 * (e + r * dU) */
+
+    /* Note: dU will be erased by re-using t4 for the value of s (public) */
+    ret = scalar_from_bytes(t4, priv);   /* t4 = dU (integer domain) */
+    CT_UNPOISON(&ret, sizeof ret); /* Result of input validation */
+    if (ret != 0)
+        return P256_INVALID_PRIVKEY;
+    m256_prep(t4, &p256_n);         /* t4 = dU (Montgomery domain) */
+
+    m256_inv(k, k, &p256_n);        /* k^-1 */
+    m256_mul(t4, xr, t4, &p256_n);  /* t4 = r * dU */
+    m256_add(t4, t3, t4, &p256_n);  /* t4 = e + r * dU */
+    m256_mul(t4, k, t4, &p256_n);   /* t4 = s = k^-1 * (e + r * dU) */
+    zeroize(k, sizeof k);
+
+    /* 7. Output s (r already outputed at step 3) */
+    CT_UNPOISON(t4, 32);
+    if (u256_diff0(t4) == 0) {
+        /* undo early output of r */
+        u256_to_bytes(sig, t4);
+        return P256_RANDOM_FAILED;
+    }
+    m256_to_bytes(sig + 32, t4, &p256_n);
+
+    return P256_SUCCESS;
+}
+
+/*
+ * ECDSA verify
+ */
+int p256_ecdsa_verify(const uint8_t sig[64], const uint8_t pub[64],
+                      const uint8_t *hash, size_t hlen)
+{
+    /*
+     * Steps and notations from [SEC1] 4.1.3
+     *
+     * Note: we're using public data only, so branches are OK
+     */
+    int ret;
+
+    /* 1. Validate range of r and s : [1, n-1] */
+    uint32_t r[8], s[8];
+    ret = scalar_from_bytes(r, sig);
+    if (ret != 0)
+        return P256_INVALID_SIGNATURE;
+    ret = scalar_from_bytes(s, sig + 32);
+    if (ret != 0)
+        return P256_INVALID_SIGNATURE;
+
+    /* 2. Skipped - we take the hash as an input, not the message */
+
+    /* 3. Derive an integer from the hash */
+    uint32_t e[8];
+    ecdsa_m256_from_hash(e, hash, hlen);
+
+    /* 4. Compute u1 = e * s^-1 and u2 = r * s^-1 */
+    uint32_t u1[8], u2[8];
+    m256_prep(s, &p256_n);           /* s in Montgomery domain */
+    m256_inv(s, s, &p256_n);         /* s = s^-1 mod n */
+    m256_mul(u1, e, s, &p256_n);     /* u1 = e * s^-1 mod n */
+    m256_done(u1, &p256_n);          /* u1 out of Montgomery domain */
+
+    u256_cmov(u2, r, 1);
+    m256_prep(u2, &p256_n);          /* r in Montgomery domain */
+    m256_mul(u2, u2, s, &p256_n);    /* u2 = r * s^-1 mod n */
+    m256_done(u2, &p256_n);          /* u2 out of Montgomery domain */
+
+    /* 5. Compute R (and re-use (u1, u2) to store its coordinates */
+    uint32_t px[8], py[8];
+    ret = point_from_bytes(px, py, pub);
+    if (ret != 0)
+        return P256_INVALID_PUBKEY;
+
+    scalar_mult(e, s, px, py, u2);      /* (e, s) = R2 = u2 * Qu */
+
+    if (u256_diff0(u1) == 0) {
+        /* u1 out of range for scalar_mult() - just skip it */
+        u256_cmov(u1, e, 1);
+        /* we don't care about the y coordinate */
+    } else {
+        scalar_mult(px, py, p256_gx, p256_gy, u1); /* (px, py) = R1 = u1 * G */
+
+        /* (u1, u2) = R = R1 + R2 */
+        point_add_or_double_leaky(u1, u2, px, py, e, s);
+        /* No need to check if R == 0 here: if that's the case, it will be
+         * caught when comparating rx (which will be 0) to r (which isn't). */
+    }
+
+    /* 6. Convert xR to an integer */
+    m256_done(u1, &p256_p);
+
+    /* 7. Reduce xR mod n */
+    ecdsa_m256_mod_n(u1);
+
+    /* 8. Compare xR mod n to r */
+    uint32_t diff = u256_diff(u1, r);
+    if (diff == 0)
+        return P256_SUCCESS;
+
+    return P256_INVALID_SIGNATURE;
+}

diff --git a/3rdparty/p256-m/p256-m/p256-m.h b/3rdparty/p256-m/p256-m/p256-m.h
new file mode 100644
index 0000000..f455cf1
--- /dev/null
+++ b/3rdparty/p256-m/p256-m/p256-m.h

@@ -0,0 +1,95 @@
+/*
+ * Interface of curve P-256 (ECDH and ECDSA)
+ *
+ * Author: Manuel Pégourié-Gonnard.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef P256_M_H
+#define P256_M_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Status codes */
+#define P256_SUCCESS            0
+#define P256_RANDOM_FAILED      -1
+#define P256_INVALID_PUBKEY     -2
+#define P256_INVALID_PRIVKEY    -3
+#define P256_INVALID_SIGNATURE  -4
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * RNG function - must be provided externally and be cryptographically secure.
+ *
+ * in: output - must point to a writable buffer of at least output_size bytes.
+ *     output_size - the number of random bytes to write to output.
+ * out: output is filled with output_size random bytes.
+ *      return 0 on success, non-zero on errors.
+ */
+extern int p256_generate_random(uint8_t * output, unsigned output_size);
+
+/*
+ * ECDH/ECDSA generate key pair
+ *
+ * [in] draws from p256_generate_random()
+ * [out] priv: on success, holds the private key, as a big-endian integer
+ * [out] pub: on success, holds the public key, as two big-endian integers
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_RANDOM_FAILED on failure
+ */
+int p256_gen_keypair(uint8_t priv[32], uint8_t pub[64]);
+
+/*
+ * ECDH compute shared secret
+ *
+ * [out] secret: on success, holds the shared secret, as a big-endian integer
+ * [in] priv: our private key as a big-endian integer
+ * [in] pub: the peer's public key, as two big-endian integers
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_INVALID_PRIVKEY if priv is invalid
+ *          P256_INVALID_PUBKEY if pub is invalid
+ */
+int p256_ecdh_shared_secret(uint8_t secret[32],
+                            const uint8_t priv[32], const uint8_t pub[64]);
+
+/*
+ * ECDSA sign
+ *
+ * [in] draws from p256_generate_random()
+ * [out] sig: on success, holds the signature, as two big-endian integers
+ * [in] priv: our private key as a big-endian integer
+ * [in] hash: the hash of the message to be signed
+ * [in] hlen: the size of hash in bytes
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_RANDOM_FAILED on failure
+ *          P256_INVALID_PRIVKEY if priv is invalid
+ */
+int p256_ecdsa_sign(uint8_t sig[64], const uint8_t priv[32],
+                    const uint8_t *hash, size_t hlen);
+
+/*
+ * ECDSA verify
+ *
+ * [in] sig: the signature to be verified, as two big-endian integers
+ * [in] pub: the associated public key, as two big-endian integers
+ * [in] hash: the hash of the message that was signed
+ * [in] hlen: the size of hash in bytes
+ *
+ * return:  P256_SUCCESS on success - the signature was verified as valid
+ *          P256_INVALID_PUBKEY if pub is invalid
+ *          P256_INVALID_SIGNATURE if the signature was found to be invalid
+ */
+int p256_ecdsa_verify(const uint8_t sig[64], const uint8_t pub[64],
+                      const uint8_t *hash, size_t hlen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* P256_M_H */

diff --git a/3rdparty/p256-m/p256-m_driver_entrypoints.c b/3rdparty/p256-m/p256-m_driver_entrypoints.c
new file mode 100644
index 0000000..c0f8fa1
--- /dev/null
+++ b/3rdparty/p256-m/p256-m_driver_entrypoints.c

@@ -0,0 +1,204 @@
+#include "mbedtls/platform.h"
+#include "p256-m_driver_entrypoints.h"
+#include "p256-m/p256-m.h"
+#include "psa/crypto.h"
+#include "psa_crypto_driver_wrappers.h"
+
+#if defined(MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+
+psa_status_t p256m_to_psa_error( int ret )
+{
+    switch( ret )
+    {
+        case P256_SUCCESS:
+            return( PSA_SUCCESS );
+        case P256_INVALID_PUBKEY:
+        case P256_INVALID_PRIVKEY:
+            return( PSA_ERROR_INVALID_ARGUMENT );
+        case P256_INVALID_SIGNATURE:
+            return( PSA_ERROR_INVALID_SIGNATURE );
+        case P256_RANDOM_FAILED:
+        default:
+            return( PSA_ERROR_GENERIC_ERROR );
+    }
+}
+
+psa_status_t p256m_transparent_generate_key(
+    const psa_key_attributes_t *attributes,
+    uint8_t *key_buffer,
+    size_t key_buffer_size,
+    size_t *key_buffer_length )
+{
+    /* We don't use this argument, but the specification mandates the signature
+     * of driver entry-points. (void) used to avoid compiler warning. */
+    (void) attributes;
+
+    psa_status_t status = PSA_ERROR_NOT_SUPPORTED;
+
+    /*
+     *  p256-m generates a 32 byte private key, and expects to write to a buffer
+     *   that is of that size. */
+    if( key_buffer_size != 32 )
+        return( status );
+
+    /*
+     *  p256-m's keypair generation function outputs both public and private
+     *  keys. Allocate a buffer to which the public key will be written. The
+     *  private key will be written to key_buffer, which is passed to this
+     *  function as an argument. */
+    uint8_t *public_key_buffer = NULL;
+    public_key_buffer = mbedtls_calloc( 1, 64);
+    if( public_key_buffer == NULL)
+        return( PSA_ERROR_INSUFFICIENT_MEMORY );
+
+    status = p256m_to_psa_error(
+                p256_gen_keypair(key_buffer, public_key_buffer) );
+    if( status == PSA_SUCCESS )
+        *key_buffer_length = 32;
+
+    /*
+     *  The storage format for a SECP256R1 keypair is just the private key, so
+     *  the public key does not need to be passed back to the caller. Therefore
+     *  the buffer containing it can be freed. */
+    free( public_key_buffer );
+
+    return status;
+}
+
+psa_status_t p256m_transparent_key_agreement(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *peer_key,
+    size_t peer_key_length,
+    uint8_t *shared_secret,
+    size_t shared_secret_size,
+    size_t *shared_secret_length )
+{
+    /* We don't use these arguments, but the specification mandates the
+     * sginature of driver entry-points. (void) used to avoid compiler
+     * warning. */
+    (void) attributes;
+    (void) alg;
+
+    /*
+     *  Check that private key = 32 bytes, peer public key = 65 bytes,
+     *  and that the shared secret buffer is big enough. */
+    psa_status_t status = PSA_ERROR_NOT_SUPPORTED;
+    if( key_buffer_size != 32 || shared_secret_size < 32 ||
+        peer_key_length != 65 )
+        return ( status );
+
+    status = p256m_to_psa_error(
+                p256_ecdh_shared_secret(shared_secret, key_buffer, peer_key+1) );
+    if( status == PSA_SUCCESS )
+        *shared_secret_length = 32;
+
+    return status;
+}
+
+psa_status_t p256m_transparent_sign_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    uint8_t *signature,
+    size_t signature_size,
+    size_t *signature_length )
+{
+    /* We don't use these arguments, but the specification mandates the
+     * sginature of driver entry-points. (void) used to avoid compiler
+     * warning. */
+    (void) attributes;
+    (void) alg;
+
+    psa_status_t status = PSA_ERROR_NOT_SUPPORTED;
+    if( key_buffer_size != 32 || signature_size != 64)
+        return( status );
+
+    status = p256m_to_psa_error(
+            p256_ecdsa_sign(signature, key_buffer, hash, hash_length) );
+    if( status == PSA_SUCCESS )
+        *signature_length = 64;
+
+    return status;
+}
+
+/*  This function expects the key buffer to contain a 65 byte public key,
+ *  as exported by psa_export_public_key() */
+static psa_status_t p256m_verify_hash_with_public_key(
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    const uint8_t *hash,
+    size_t hash_length,
+    const uint8_t *signature,
+    size_t signature_length )
+{
+    psa_status_t status = PSA_ERROR_NOT_SUPPORTED;
+    if( key_buffer_size != 65 || signature_length != 64 || *key_buffer != 0x04 )
+        return status;
+
+    const uint8_t *public_key_buffer = key_buffer + 1;
+    status = p256m_to_psa_error(
+            p256_ecdsa_verify( signature, public_key_buffer, hash, hash_length) );
+
+    return status;
+}
+
+psa_status_t p256m_transparent_verify_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    const uint8_t *signature,
+    size_t signature_length )
+{
+    /* We don't use this argument, but the specification mandates the signature
+     * of driver entry-points. (void) used to avoid compiler warning. */
+    (void) alg;
+
+    psa_status_t status;
+    uint8_t *public_key_buffer = NULL;
+    size_t public_key_buffer_size = 65;
+    public_key_buffer = mbedtls_calloc( 1, public_key_buffer_size);
+    if( public_key_buffer == NULL)
+        return( PSA_ERROR_INSUFFICIENT_MEMORY );
+    size_t *public_key_length = NULL;
+    public_key_length = mbedtls_calloc( 1, sizeof(size_t) );
+    if( public_key_length == NULL)
+        return( PSA_ERROR_INSUFFICIENT_MEMORY );
+    *public_key_length = 65;
+
+    /*  The contents of key_buffer may either be the 32 byte private key
+     *  (keypair representation), or the 65 byte public key. To ensure the
+     *  latter is obtained, the public key is exported. */
+    status = psa_driver_wrapper_export_public_key(
+                attributes,
+                key_buffer,
+                key_buffer_size,
+                public_key_buffer,
+                public_key_buffer_size,
+                public_key_length );
+    if( status != PSA_SUCCESS )
+        goto exit;
+
+    status = p256m_verify_hash_with_public_key(
+                public_key_buffer,
+                public_key_buffer_size,
+                hash,
+                hash_length,
+                signature,
+                signature_length );
+
+exit:
+    free( public_key_buffer );
+    free( public_key_length );
+    return ( status );
+}
+
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */

diff --git a/3rdparty/p256-m/p256-m_driver_entrypoints.h b/3rdparty/p256-m/p256-m_driver_entrypoints.h
new file mode 100644
index 0000000..bf6b18d
--- /dev/null
+++ b/3rdparty/p256-m/p256-m_driver_entrypoints.h

@@ -0,0 +1,155 @@
+#ifndef P256M_DRIVER_ENTRYPOINTS_H
+#define P256M_DRIVER_ENTRYPOINTS_H
+
+#if defined(MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+#ifndef PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT
+#define PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT
+#endif /* PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT */
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */
+
+#include "psa/crypto_types.h"
+
+/** Convert an internal p256-m error code to a PSA error code
+ *
+ * \param ret           An error code thrown by p256-m
+ *
+ * \return              The corresponding PSA error code
+ */
+ //no-check-names
+psa_status_t p256m_to_psa_error( int ret );
+
+
+/** Generate SECP256R1 ECC Key Pair.
+ *  Interface function which calls the p256-m key generation function and
+ *  places it in the key buffer provided by the caller (mbed TLS) in the
+ *  correct format. For a SECP256R1 curve this is the 32 bit private key.
+ *
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[out]  key_buffer          The buffer to contain the key data in
+ *                                  output format upon successful return.
+ * \param[in]   key_buffer_size     Size of the \p key_buffer buffer in bytes.
+ * \param[out]  key_buffer_length   The length of the data written in \p
+ *                                  key_buffer in bytes.
+ *
+ * \retval  #PSA_SUCCESS
+ *          Success. Keypair generated and stored in buffer.
+ * \retval #PSA_ERROR_NOT_SUPPORTED
+ * \retval #PSA_ERROR_GENERIC_ERROR
+ * \retval #PSA_ERROR_INSUFFICIENT_MEMORY
+ */
+ //no-check-names
+psa_status_t p256m_transparent_generate_key(
+    const psa_key_attributes_t *attributes,
+    uint8_t *key_buffer,
+    size_t key_buffer_size,
+    size_t *key_buffer_length );
+
+/** Perform raw key agreement using p256-m's ECDH implementation
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[in]  key_buffer           The buffer containing the private key
+ *                                  in the format specified by PSA.
+ * \param[in]  key_buffer_size      Size of the \p key_buffer buffer in bytes.
+ * \param[in]  alg                  A key agreement algorithm that is
+ *                                  compatible with the type of the key.
+ * \param[in]  peer_key             The buffer containing the peer's public
+ *                                  key in format specified by PSA.
+ * \param[in]  peer_key_length      Size of the \p peer_key buffer in
+ *                                  bytes.
+ * \param[out] shared_secret        The buffer to which the shared secret
+ *                                  is to be written.
+ * \param[in]  shared_secret_size   Size of the \p shared_secret buffer in
+ *                                  bytes.
+ * \param[out] shared_secret_length On success, the number of bytes that
+ *                                  make up the returned shared secret.
+ * \retval #PSA_SUCCESS
+ *         Success. Shared secret successfully calculated.
+ * \retval #PSA_ERROR_NOT_SUPPORTED
+ */
+ //no-check-names
+psa_status_t p256m_transparent_key_agreement(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *peer_key,
+    size_t peer_key_length,
+    uint8_t *shared_secret,
+    size_t shared_secret_size,
+    size_t *shared_secret_length );
+
+/** Sign an already-calculated hash with a private key using p256-m's ECDSA
+ *  implementation
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[in]  key_buffer           The buffer containing the private key
+ *                                  in the format specified by PSA.
+ * \param[in]  key_buffer_size      Size of the \p key_buffer buffer in bytes.
+ * \param[in]  alg                  A signature algorithm that is compatible
+ *                                  with the type of the key.
+ * \param[in]  hash                 The hash to sign.
+ * \param[in]  hash_length          Size of the \p hash buffer in bytes.
+ * \param[out] signature            Buffer where signature is to be written.
+ * \param[in]  signature_size       Size of the \p signature buffer in bytes.
+ * \param[out] signature_length     On success, the number of bytes
+ *                                  that make up the returned signature value.
+ *
+ * \retval #PSA_SUCCESS
+ *          Success. Hash was signed successfully.
+ *         respectively of the key.
+ * \retval #PSA_ERROR_NOT_SUPPORTED
+ */
+//no-check-names
+psa_status_t p256m_transparent_sign_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    uint8_t *signature,
+    size_t signature_size,
+    size_t *signature_length );
+
+/** Verify the signature of a hash using a SECP256R1 public key using p256-m's
+ *  ECDSA implementation.
+ *
+ * \note p256-m expects a 64 byte public key, but the contents of the key
+         buffer may be the 32 byte keypair representation or the 65 byte
+         public key representation. As a result, this function calls
+         psa_driver_wrapper_export_public_key() to ensure the public key
+         can be passed to p256-m.
+ *
+ * \param[in]  attributes       The attributes of the key to use for the
+ *                              operation.
+ *
+ * \param[in]  key_buffer       The buffer containing the key
+ *                              in the format specified by PSA.
+ * \param[in]  key_buffer_size  Size of the \p key_buffer buffer in bytes.
+ * \param[in]  alg              A signature algorithm that is compatible with
+ *                              the type of the key.
+ * \param[in]  hash             The hash whose signature is to be
+ *                              verified.
+ * \param[in]  hash_length      Size of the \p hash buffer in bytes.
+ * \param[in]  signature        Buffer containing the signature to verify.
+ * \param[in]  signature_length Size of the \p signature buffer in bytes.
+ *
+ * \retval #PSA_SUCCESS
+ *         The signature is valid.
+ * \retval #PSA_ERROR_INVALID_SIGNATURE
+ *         The calculation was performed successfully, but the passed
+ *         signature is not a valid signature.
+ * \retval #PSA_ERROR_NOT_SUPPORTED
+ */
+psa_status_t p256m_transparent_verify_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    const uint8_t *signature,
+    size_t signature_length );
+
+#endif /* P256M_DRIVER_ENTRYPOINTS_H */

diff --git a/docs/psa-driver-example-and-guide.md b/docs/psa-driver-example-and-guide.md
new file mode 100644
index 0000000..d952282
--- /dev/null
+++ b/docs/psa-driver-example-and-guide.md

@@ -0,0 +1,74 @@
+# PSA Cryptoprocessor driver development examples
+
+As of Mbed TLS 3.3.0, the PSA Driver Interface has only been partially implemented. As a result, the deliverables for writing a driver and the method for integrating a driver with Mbed TLS will vary depending on the operation being accelerated. This document describes how to write and integrate cryptoprocessor drivers depending on which operation or driver type is being implemented. 
+
+The `docs/proposed/` directory contains three documents which pertain to the proposed, work-in-progress driver system. The [PSA Driver Interface](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-interface.md) describes how drivers will interface with Mbed TLS in the future, as well as driver types, operation types, and entry points. As many key terms and concepts used in the examples in this document are defined in the PSA Driver Interface, it is recommended that developers read it prior to starting work on implementing drivers. 
+The PSA Driver [Developer](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-developer-guide.md) Guide describes the deliverables for writing a driver that can be used with Mbed TLS, and the PSA Driver [Integration](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-integration-guide.md) Guide describes how a driver can be built alongside Mbed TLS.
+
+## Background on how Mbed TLS calls drivers
+
+The PSA Driver Interface specification specifies which cryptographic operations can be accelerated by third-party drivers. Operations that are completed within one step (one function call), such as verifying a signature, are called *Single-Part Operations*. On the other hand, operations that consist of multiple steps implemented by different functions called sequentially are called *Multi-Part Operations*. Single-part operations implemented by a driver will have one entry point, while multi-part operations will have multiple: one for each step.
+
+There are two types of drivers: *transparent* or *opaque*. See below an excerpt from the PSA Driver Interface specification defining them:
+* **Transparent** drivers implement cryptographic operations on keys that are provided in cleartext at the beginning of each operation. They are typically used for hardware **accelerators**. When a transparent driver is available for a particular combination of parameters (cryptographic algorithm, key type and size, etc.), it is used instead of the default software implementation. Transparent drivers can also be pure software implementations that are distributed as plug-ins to a PSA Cryptography implementation (for example, an alternative implementation with different performance characteristics, or a certified implementation).
+* **Opaque** drivers implement cryptographic operations on keys that can only be used inside a protected environment such as a **secure element**, a hardware security module, a smartcard, a secure enclave, etc. An opaque driver is invoked for the specific [key location](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-interface.md#lifetimes-and-locations) that the driver is registered for: the dispatch is based on the key's lifetime.
+
+Mbed TLS contains a **driver dispatch layer** (also called a driver wrapper layer). For each cryptographic operation that supports driver acceleration (or sub-part of a multi-part operation), the library calls the corresponding function in the driver wrapper. Using flags set at compile time, the driver wrapper ascertains whether any present drivers support the operation. When no such driver is present, the built-in library implementation is called as a fallback (if allowed). When a compatible driver is present, the driver wrapper calls the driver entry point function provided by the driver author.
+
+The long-term goal is for the driver dispatch layer to be auto-generated using a JSON driver description file provided by the driver author.
+For some cryptographic operations, this auto-generation logic has already been implemented. When accelerating these operations, the instructions in the above documents can be followed. For the remaining operations which do not yet support auto-generation of the driver wrapper, developers will have to manually edit the driver dispatch layer and call their driver's entry point functions from there.
+
+Auto-generation of the driver wrapper is supported for the operation entry points specified in the table below. Certain operations are only permitted for opaque drivers. All other operation entry points do not support auto-generation of the driver wrapper.
+
+| Transparent Driver  | Opaque Driver       |
+|---------------------|---------------------|
+| `import_key`        | `import_key`        |
+| `export_key`        | `export_key`        |
+| `export_public_key` | `export_public_key` |
+|                     | `copy_key`          |
+|                     | `get_builtin_key`   |
+
+### Process for Entry Points where auto-generation is implemented
+
+If the driver is accelerating operations whose entry points are in the above table, the instructions in the driver [developer](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-developer-guide.md) and [integration](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-integration-guide.md) guides should be followed. 
+
+**TODO: Provide brief summary of the method using the Mbed TLS test driver as an example**
+
+
+### Process for Entry Points where auto-generation is not implemented
+
+If the driver is accelerating operations whose entry points are not present in the table, a different process is followed where the developer manually edits the driver dispatch layer. In general, the following steps must be taken **for each single-part operation** or **for each sub-part of a multi-part operation**:
+
+**1. Choose a driver prefix and a macro name that indicates whether the driver is enabled** \
+A driver prefix is simply a word (often the name of the driver) that all functions/macros associated with the driver should begin with. This is similar to how most functions/macros in Mbed TLS begin with `PSA_XXX/psa_xx` or `MBEDTLS_XXX/mbedtls_xxx`. The macro name can follow the form `DRIVER_PREFIX_ENABLED` or something similar; it will be used to indicate the driver is available to be called. When building with the driver present, define this macro at compile time. For example, when using `make`, this is done using the `-D` flag. 
+
+**2. Locate the function in the driver dispatch layer that corresponds to the entry point of the operation being accelerated.** \
+The file `psa_crypto_driver_wrappers.c.jinja` contains the driver wrapper functions. For the entry points that have driver wrapper auto-generation implemented, the functions have been replaced with `jinja` templating logic. While the file has a `.jinja` extension, the driver wrapper functions for the remaining entry points are simple C functions. The names of these functions are of the form `psa_driver_wrapper` followed by the entry point name. So, for example, the function `psa_driver_wrapper_sign_hash()` corresponds to the `sign_hash` entry point.
+
+**3. If a driver entry point function has been provided then ensure it has the same signature as the driver wrapper function.** \
+If one has not been provided then write one. Its name should begin with the driver prefix, followed by transparent/opaque (depending on driver type), and end with the entry point name. It should have the same signature as the driver wrapper function. The purpose of the entry point function is to take arguments in PSA format for the implemented operation and return outputs/status codes in PSA format.
+
+**4. Include the following in one of the driver header files:** 
+```
+#if defined(DRIVER_PREFIX_ENABLED)
+#ifndef PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT
+#define PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT
+#endif
+```
+
+**5. Conditionally include header files required by the driver**
+Include any header files required by the driver in `psa_crypto_driver_wrappers.h`, placing the `#include` statements within an `#if defined` block which checks if the driver is available:
+```
+#if defined(DRIVER_PREFIX_ENABLED)
+#include ...
+#endif
+```
+
+**6. Modify the driver wrapper function** \
+Each driver wrapper function contains a `switch` statement which checks the location of the key. If the key is stored in local storage, then operations are performed by a transparent driver. If it is stored elsewhere, then operations are performed by an opaque driver.
+ * **Transparent drivers:** Calls to drivers go under `case PSA_KEY_LOCATION_LOCAL_STORAGE`
+
+
+
+
+<!-- the developer must manually edit the driver dispatch layer such that it first checks for the presence of the driver, and its compatibility with operation parameters (such as key type, algorithm type etc.). If the checks are passed, the driver's entry point function for that operation is called. The specification for the signature of entry point functions can be found [here](https://github.com/Mbed-TLS/mbedtls/blob/development/docs/proposed/psa-driver-interface.md#overview-of-driver-entry-points), but as a rule of thumb the signature for the driver entry point for an operation will be the same as the signature of its driver wrapper function. -->

diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 6158850..84b6531 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h

@@ -3919,4 +3919,16 @@
  */
 //#define MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED
 
+/**
+ * Uncomment to enable p256-m, which implements ECC key generation, ECDH,
+ * and ECDSA for SECP256R1 curves. This driver is used as an example to
+ * document how a third-party driver or software accelerator can be integrated
+ * to work alongside Mbed TLS.
+ *
+ * \warning As of now, the built-in RNG for p256-m depends on rand(). This is
+ * fine for examples, but not in production.
+ * DO NOT ENABLE/USE THIS MACRO IN PRODUCTION BUILDS!
+ */
+//#define MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED
+
 /** \} name SECTION: Module configuration options */

diff --git a/library/psa_crypto_driver_wrappers.h b/library/psa_crypto_driver_wrappers.h
index 7d672d6..cf8fe69 100644
--- a/library/psa_crypto_driver_wrappers.h
+++ b/library/psa_crypto_driver_wrappers.h

@@ -24,6 +24,10 @@
 #include "psa/crypto.h"
 #include "psa/crypto_driver_common.h"
 
+#if defined(MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+#include "../3rdparty/p256-m/p256-m_driver_entrypoints.h"
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */
+
 /*
  * Initialization and termination functions
  */

diff --git a/scripts/config.py b/scripts/config.py
index ac5f77c..92a4aad 100755
--- a/scripts/config.py
+++ b/scripts/config.py

@@ -216,6 +216,7 @@
     'MBEDTLS_TEST_CONSTANT_FLOW_VALGRIND', # build dependency (valgrind headers)
     'MBEDTLS_X509_REMOVE_INFO', # removes a feature
     'MBEDTLS_SSL_RECORD_SIZE_LIMIT', # in development, currently breaks other tests
+    'MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED' # influences SECP256R1 KeyGen/ECDH/ECDSA
 ])
 
 def is_seamless_alt(name):

diff --git a/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja b/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja
index 32e6bfe..f0979f3 100644
--- a/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja
+++ b/scripts/data_files/driver_templates/psa_crypto_driver_wrappers.c.jinja

@@ -316,6 +316,26 @@
             if( status != PSA_ERROR_NOT_SUPPORTED )
                 return( status );
 #endif /* PSA_CRYPTO_DRIVER_TEST */
+#if defined (MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+            if( PSA_KEY_TYPE_IS_ECC( attributes->core.type ) &&
+                PSA_ALG_IS_ECDSA(alg) &&
+                !PSA_ALG_ECDSA_IS_DETERMINISTIC( alg ) &&
+                PSA_KEY_TYPE_ECC_GET_FAMILY(attributes->core.type) == PSA_ECC_FAMILY_SECP_R1 &&
+                attributes->core.bits == 256 )
+            {
+                status = p256m_transparent_sign_hash( attributes,
+                                                      key_buffer,
+                                                      key_buffer_size,
+                                                      alg,
+                                                      hash,
+                                                      hash_length,
+                                                      signature,
+                                                      signature_size,
+                                                      signature_length );
+                if( status != PSA_ERROR_NOT_SUPPORTED )
+                return( status );
+            }
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */
 #endif /* PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT */
             /* Fell through, meaning no accelerator supports this operation */
             return( psa_sign_hash_builtin( attributes,
@@ -400,6 +420,25 @@
             if( status != PSA_ERROR_NOT_SUPPORTED )
                 return( status );
 #endif /* PSA_CRYPTO_DRIVER_TEST */
+#if defined (MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+            if( PSA_KEY_TYPE_IS_ECC( attributes->core.type ) &&
+                PSA_ALG_IS_ECDSA(alg) &&
+                !PSA_ALG_ECDSA_IS_DETERMINISTIC( alg ) &&
+                PSA_KEY_TYPE_ECC_GET_FAMILY(attributes->core.type) == PSA_ECC_FAMILY_SECP_R1 &&
+                attributes->core.bits == 256 )
+            {
+                status = p256m_transparent_verify_hash( attributes,
+                                                        key_buffer,
+                                                        key_buffer_size,
+                                                        alg,
+                                                        hash,
+                                                        hash_length,
+                                                        signature,
+                                                        signature_length );
+                if( status != PSA_ERROR_NOT_SUPPORTED )
+                return( status );
+            }
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */
 #endif /* PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT */
 
             return( psa_verify_hash_builtin( attributes,
@@ -814,6 +853,20 @@
                 if( status != PSA_ERROR_NOT_SUPPORTED )
                     break;
 #endif /* PSA_CRYPTO_DRIVER_TEST */
+#if defined(MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+                if( PSA_KEY_TYPE_IS_ECC( attributes->core.type ) &&
+                    attributes->core.type == PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1) &&
+                    attributes->core.bits == 256 )
+                {
+                    status = p256m_transparent_generate_key( attributes,
+                                                             key_buffer,
+                                                             key_buffer_size,
+                                                             key_buffer_length );
+                    if( status != PSA_ERROR_NOT_SUPPORTED )
+                        break;
+                }
+
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */
             }
 #endif /* PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT */
 
@@ -2752,6 +2805,25 @@
             if( status != PSA_ERROR_NOT_SUPPORTED )
                 return( status );
 #endif /* PSA_CRYPTO_DRIVER_TEST */
+#if defined(MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED)
+            if( PSA_KEY_TYPE_IS_ECC( attributes->core.type ) &&
+                PSA_ALG_IS_ECDH(alg) &&
+                PSA_KEY_TYPE_ECC_GET_FAMILY(attributes->core.type) == PSA_ECC_FAMILY_SECP_R1 &&
+                attributes->core.bits == 256 )
+            {
+                status = p256m_transparent_key_agreement( attributes,
+                                                          key_buffer,
+                                                          key_buffer_size,
+                                                          alg,
+                                                          peer_key,
+                                                          peer_key_length,
+                                                          shared_secret,
+                                                          shared_secret_size,
+                                                          shared_secret_length );
+                if( status != PSA_ERROR_NOT_SUPPORTED)
+                    return( status );
+            }
+#endif /* MBEDTLS_P256M_EXAMPLE_DRIVER_ENABLED */
 #endif /* PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT */
 
             /* Software Fallback */
commit	e41f7e457fe039b1ea8a05389460b3af07a9d3dd	[log] [tgz]
author	Aditya Deshpande <aditya.deshpande@arm.com>	Thu Jan 12 16:29:02 2023 +0000
committer	Aditya Deshpande <aditya.deshpande@arm.com>	Fri Apr 28 17:54:09 2023 +0100
tree	c943d054dad5be35e8df5f74c66313e28c7d683f
parent	14d6b1124bffbcefbacedeb6f6eba5c960abcc28 [diff]