Add third_party/fiat from boringssl
This adds the boringssl version of fiat-crypto. The version bundled here
had most if its non-used code removed, like signing, X25519 curve
operations not required by verification, etc.
Under boringssl tree, fiat can be found in third_party/fiat. The version
included here comes from a boringssl tree where the last commit is
f109f2087349712d3ac717d15fab48e130618110.
Signed-off-by: Fabio Utzig <utzig@apache.org>
diff --git a/ext/fiat/src/curve25519.c b/ext/fiat/src/curve25519.c
new file mode 100644
index 0000000..0215caf
--- /dev/null
+++ b/ext/fiat/src/curve25519.c
@@ -0,0 +1,1104 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+// Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
+// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
+// public domain but parts have been replaced with code generated by Fiat
+// (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
+//
+// The field functions are shared by Ed25519 and X25519 where possible.
+
+#include <assert.h>
+#include <string.h>
+#include <stdint.h>
+
+#include <mbedtls/platform_util.h>
+#include <mbedtls/sha512.h>
+
+#include "curve25519.h"
+// Various pre-computed constants.
+#include "curve25519_tables.h"
+
+#define SHA512_DIGEST_LENGTH 64
+
+// Low-level intrinsic operations
+
+static uint64_t load_3(const uint8_t *in) {
+ uint64_t result;
+ result = (uint64_t)in[0];
+ result |= ((uint64_t)in[1]) << 8;
+ result |= ((uint64_t)in[2]) << 16;
+ return result;
+}
+
+static uint64_t load_4(const uint8_t *in) {
+ uint64_t result;
+ result = (uint64_t)in[0];
+ result |= ((uint64_t)in[1]) << 8;
+ result |= ((uint64_t)in[2]) << 16;
+ result |= ((uint64_t)in[3]) << 24;
+ return result;
+}
+
+
+// Field operations.
+
+typedef uint32_t fe_limb_t;
+#define FE_NUM_LIMBS 10
+
+// assert_fe asserts that |f| satisfies bounds:
+//
+// [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
+// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
+// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
+// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
+// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+//
+// See comments in curve25519_32.h for which functions use these bounds for
+// inputs or outputs.
+#define assert_fe(f) \
+ do { \
+ for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
+ assert(f[_assert_fe_i] <= \
+ ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \
+ } \
+ } while (0)
+
+// assert_fe_loose asserts that |f| satisfies bounds:
+//
+// [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
+// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
+// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
+// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
+// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
+//
+// See comments in curve25519_32.h for which functions use these bounds for
+// inputs or outputs.
+#define assert_fe_loose(f) \
+ do { \
+ for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
+ assert(f[_assert_fe_i] <= \
+ ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \
+ } \
+ } while (0)
+
+//FIXME: use Zephyr macro
+_Static_assert(sizeof(fe) == sizeof(fe_limb_t) * FE_NUM_LIMBS,
+ "fe_limb_t[FE_NUM_LIMBS] is inconsistent with fe");
+
+static void fe_frombytes_strict(fe *h, const uint8_t s[32]) {
+ // |fiat_25519_from_bytes| requires the top-most bit be clear.
+ assert((s[31] & 0x80) == 0);
+ fiat_25519_from_bytes(h->v, s);
+ assert_fe(h->v);
+}
+
+static void fe_frombytes(fe *h, const uint8_t s[32]) {
+ uint8_t s_copy[32];
+ memcpy(s_copy, s, 32);
+ s_copy[31] &= 0x7f;
+ fe_frombytes_strict(h, s_copy);
+}
+
+static void fe_tobytes(uint8_t s[32], const fe *f) {
+ assert_fe(f->v);
+ fiat_25519_to_bytes(s, f->v);
+}
+
+// h = 0
+static void fe_0(fe *h) {
+ mbedtls_platform_zeroize(h, sizeof(fe));
+}
+
+// h = 1
+static void fe_1(fe *h) {
+ mbedtls_platform_zeroize(h, sizeof(fe));
+ h->v[0] = 1;
+}
+
+// h = f + g
+// Can overlap h with f or g.
+static void fe_add(fe_loose *h, const fe *f, const fe *g) {
+ assert_fe(f->v);
+ assert_fe(g->v);
+ fiat_25519_add(h->v, f->v, g->v);
+ assert_fe_loose(h->v);
+}
+
+// h = f - g
+// Can overlap h with f or g.
+static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
+ assert_fe(f->v);
+ assert_fe(g->v);
+ fiat_25519_sub(h->v, f->v, g->v);
+ assert_fe_loose(h->v);
+}
+
+static void fe_carry(fe *h, const fe_loose* f) {
+ assert_fe_loose(f->v);
+ fiat_25519_carry(h->v, f->v);
+ assert_fe(h->v);
+}
+
+static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],
+ const fe_limb_t in1[FE_NUM_LIMBS],
+ const fe_limb_t in2[FE_NUM_LIMBS]) {
+ assert_fe_loose(in1);
+ assert_fe_loose(in2);
+ fiat_25519_carry_mul(out, in1, in2);
+ assert_fe(out);
+}
+
+static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
+ fe_mul_impl(h->v, f->v, g->v);
+}
+
+static void fe_sq_tl(fe *h, const fe_loose *f) {
+ assert_fe_loose(f->v);
+ fiat_25519_carry_square(h->v, f->v);
+ assert_fe(h->v);
+}
+
+static void fe_sq_tt(fe *h, const fe *f) {
+ assert_fe_loose(f->v);
+ fiat_25519_carry_square(h->v, f->v);
+ assert_fe(h->v);
+}
+
+// h = -f
+static void fe_neg(fe_loose *h, const fe *f) {
+ assert_fe(f->v);
+ fiat_25519_opp(h->v, f->v);
+ assert_fe_loose(h->v);
+}
+
+// h = f
+static void fe_copy(fe *h, const fe *f) {
+ memmove(h, f, sizeof(fe));
+}
+
+static void fe_copy_lt(fe_loose *h, const fe *f) {
+ //FIXME: use Zephyr macro
+ _Static_assert(sizeof(fe_loose) == sizeof(fe), "fe and fe_loose mismatch");
+ memmove(h, f, sizeof(fe));
+}
+
+static void fe_loose_invert(fe *out, const fe_loose *z) {
+ fe t0;
+ fe t1;
+ fe t2;
+ fe t3;
+ int i;
+
+ fe_sq_tl(&t0, z);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_tlt(&t1, z, &t1);
+ fe_mul_ttt(&t0, &t0, &t1);
+ fe_sq_tt(&t2, &t0);
+ fe_mul_ttt(&t1, &t1, &t2);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 5; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 10; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t2, &t2, &t1);
+ fe_sq_tt(&t3, &t2);
+ for (i = 1; i < 20; ++i) {
+ fe_sq_tt(&t3, &t3);
+ }
+ fe_mul_ttt(&t2, &t3, &t2);
+ fe_sq_tt(&t2, &t2);
+ for (i = 1; i < 10; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 50; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t2, &t2, &t1);
+ fe_sq_tt(&t3, &t2);
+ for (i = 1; i < 100; ++i) {
+ fe_sq_tt(&t3, &t3);
+ }
+ fe_mul_ttt(&t2, &t3, &t2);
+ fe_sq_tt(&t2, &t2);
+ for (i = 1; i < 50; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t1, &t1);
+ for (i = 1; i < 5; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(out, &t1, &t0);
+}
+
+static void fe_invert(fe *out, const fe *z) {
+ fe_loose l;
+ fe_copy_lt(&l, z);
+ fe_loose_invert(out, &l);
+}
+
+static int CRYPTO_memcmp(const void *in_a, const void *in_b, size_t len) {
+ const uint8_t *a = in_a;
+ const uint8_t *b = in_b;
+ uint8_t x = 0;
+
+ for (size_t i = 0; i < len; i++) {
+ x |= a[i] ^ b[i];
+ }
+
+ return x;
+}
+
+// return 0 if f == 0
+// return 1 if f != 0
+static int fe_isnonzero(const fe_loose *f) {
+ fe tight;
+ fe_carry(&tight, f);
+ uint8_t s[32];
+ fe_tobytes(s, &tight);
+
+ static const uint8_t zero[32] = {0};
+ return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
+}
+
+// return 1 if f is in {1,3,5,...,q-2}
+// return 0 if f is in {0,2,4,...,q-1}
+static int fe_isnegative(const fe *f) {
+ uint8_t s[32];
+ fe_tobytes(s, f);
+ return s[0] & 1;
+}
+
+static void fe_sq2_tt(fe *h, const fe *f) {
+ // h = f^2
+ fe_sq_tt(h, f);
+
+ // h = h + h
+ fe_loose tmp;
+ fe_add(&tmp, h, h);
+ fe_carry(h, &tmp);
+}
+
+static void fe_pow22523(fe *out, const fe *z) {
+ fe t0;
+ fe t1;
+ fe t2;
+ int i;
+
+ fe_sq_tt(&t0, z);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(&t1, z, &t1);
+ fe_mul_ttt(&t0, &t0, &t1);
+ fe_sq_tt(&t0, &t0);
+ fe_mul_ttt(&t0, &t1, &t0);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 5; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(&t0, &t1, &t0);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 10; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(&t1, &t1, &t0);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 20; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t1, &t1);
+ for (i = 1; i < 10; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(&t0, &t1, &t0);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 50; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(&t1, &t1, &t0);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 100; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t1, &t1);
+ for (i = 1; i < 50; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(&t0, &t1, &t0);
+ fe_sq_tt(&t0, &t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq_tt(&t0, &t0);
+ }
+ fe_mul_ttt(out, &t0, z);
+}
+
+
+// Group operations.
+
+void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
+ fe recip;
+ fe x;
+ fe y;
+
+ fe_invert(&recip, &h->Z);
+ fe_mul_ttt(&x, &h->X, &recip);
+ fe_mul_ttt(&y, &h->Y, &recip);
+ fe_tobytes(s, &y);
+ s[31] ^= fe_isnegative(&x) << 7;
+}
+
+int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) {
+ fe u;
+ fe_loose v;
+ fe v3;
+ fe vxx;
+ fe_loose check;
+
+ fe_frombytes(&h->Y, s);
+ fe_1(&h->Z);
+ fe_sq_tt(&v3, &h->Y);
+ fe_mul_ttt(&vxx, &v3, &d);
+ fe_sub(&v, &v3, &h->Z); // u = y^2-1
+ fe_carry(&u, &v);
+ fe_add(&v, &vxx, &h->Z); // v = dy^2+1
+
+ fe_sq_tl(&v3, &v);
+ fe_mul_ttl(&v3, &v3, &v); // v3 = v^3
+ fe_sq_tt(&h->X, &v3);
+ fe_mul_ttl(&h->X, &h->X, &v);
+ fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7
+
+ fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8)
+ fe_mul_ttt(&h->X, &h->X, &v3);
+ fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8)
+
+ fe_sq_tt(&vxx, &h->X);
+ fe_mul_ttl(&vxx, &vxx, &v);
+ fe_sub(&check, &vxx, &u);
+ if (fe_isnonzero(&check)) {
+ fe_add(&check, &vxx, &u);
+ if (fe_isnonzero(&check)) {
+ return 0;
+ }
+ fe_mul_ttt(&h->X, &h->X, &sqrtm1);
+ }
+
+ if (fe_isnegative(&h->X) != (s[31] >> 7)) {
+ fe_loose t;
+ fe_neg(&t, &h->X);
+ fe_carry(&h->X, &t);
+ }
+
+ fe_mul_ttt(&h->T, &h->X, &h->Y);
+ return 1;
+}
+
+static void ge_p2_0(ge_p2 *h) {
+ fe_0(&h->X);
+ fe_1(&h->Y);
+ fe_1(&h->Z);
+}
+
+// r = p
+static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
+ fe_copy(&r->X, &p->X);
+ fe_copy(&r->Y, &p->Y);
+ fe_copy(&r->Z, &p->Z);
+}
+
+// r = p
+void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
+ fe_add(&r->YplusX, &p->Y, &p->X);
+ fe_sub(&r->YminusX, &p->Y, &p->X);
+ fe_copy_lt(&r->Z, &p->Z);
+ fe_mul_ltt(&r->T2d, &p->T, &d2);
+}
+
+// r = p
+void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
+ fe_mul_tll(&r->X, &p->X, &p->T);
+ fe_mul_tll(&r->Y, &p->Y, &p->Z);
+ fe_mul_tll(&r->Z, &p->Z, &p->T);
+}
+
+// r = p
+void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
+ fe_mul_tll(&r->X, &p->X, &p->T);
+ fe_mul_tll(&r->Y, &p->Y, &p->Z);
+ fe_mul_tll(&r->Z, &p->Z, &p->T);
+ fe_mul_tll(&r->T, &p->X, &p->Y);
+}
+
+// r = 2 * p
+static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
+ fe trX, trZ, trT;
+ fe t0;
+
+ fe_sq_tt(&trX, &p->X);
+ fe_sq_tt(&trZ, &p->Y);
+ fe_sq2_tt(&trT, &p->Z);
+ fe_add(&r->Y, &p->X, &p->Y);
+ fe_sq_tl(&t0, &r->Y);
+
+ fe_add(&r->Y, &trZ, &trX);
+ fe_sub(&r->Z, &trZ, &trX);
+ fe_carry(&trZ, &r->Y);
+ fe_sub(&r->X, &t0, &trZ);
+ fe_carry(&trZ, &r->Z);
+ fe_sub(&r->T, &trT, &trZ);
+}
+
+// r = 2 * p
+static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
+ ge_p2 q;
+ ge_p3_to_p2(&q, p);
+ ge_p2_dbl(r, &q);
+}
+
+// r = p + q
+static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
+ fe trY, trZ, trT;
+
+ fe_add(&r->X, &p->Y, &p->X);
+ fe_sub(&r->Y, &p->Y, &p->X);
+ fe_mul_tll(&trZ, &r->X, &q->yplusx);
+ fe_mul_tll(&trY, &r->Y, &q->yminusx);
+ fe_mul_tlt(&trT, &q->xy2d, &p->T);
+ fe_add(&r->T, &p->Z, &p->Z);
+ fe_sub(&r->X, &trZ, &trY);
+ fe_add(&r->Y, &trZ, &trY);
+ fe_carry(&trZ, &r->T);
+ fe_add(&r->Z, &trZ, &trT);
+ fe_sub(&r->T, &trZ, &trT);
+}
+
+// r = p - q
+static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
+ fe trY, trZ, trT;
+
+ fe_add(&r->X, &p->Y, &p->X);
+ fe_sub(&r->Y, &p->Y, &p->X);
+ fe_mul_tll(&trZ, &r->X, &q->yminusx);
+ fe_mul_tll(&trY, &r->Y, &q->yplusx);
+ fe_mul_tlt(&trT, &q->xy2d, &p->T);
+ fe_add(&r->T, &p->Z, &p->Z);
+ fe_sub(&r->X, &trZ, &trY);
+ fe_add(&r->Y, &trZ, &trY);
+ fe_carry(&trZ, &r->T);
+ fe_sub(&r->Z, &trZ, &trT);
+ fe_add(&r->T, &trZ, &trT);
+}
+
+// r = p + q
+void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
+ fe trX, trY, trZ, trT;
+
+ fe_add(&r->X, &p->Y, &p->X);
+ fe_sub(&r->Y, &p->Y, &p->X);
+ fe_mul_tll(&trZ, &r->X, &q->YplusX);
+ fe_mul_tll(&trY, &r->Y, &q->YminusX);
+ fe_mul_tlt(&trT, &q->T2d, &p->T);
+ fe_mul_ttl(&trX, &p->Z, &q->Z);
+ fe_add(&r->T, &trX, &trX);
+ fe_sub(&r->X, &trZ, &trY);
+ fe_add(&r->Y, &trZ, &trY);
+ fe_carry(&trZ, &r->T);
+ fe_add(&r->Z, &trZ, &trT);
+ fe_sub(&r->T, &trZ, &trT);
+}
+
+// r = p - q
+void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
+ fe trX, trY, trZ, trT;
+
+ fe_add(&r->X, &p->Y, &p->X);
+ fe_sub(&r->Y, &p->Y, &p->X);
+ fe_mul_tll(&trZ, &r->X, &q->YminusX);
+ fe_mul_tll(&trY, &r->Y, &q->YplusX);
+ fe_mul_tlt(&trT, &q->T2d, &p->T);
+ fe_mul_ttl(&trX, &p->Z, &q->Z);
+ fe_add(&r->T, &trX, &trX);
+ fe_sub(&r->X, &trZ, &trY);
+ fe_add(&r->Y, &trZ, &trY);
+ fe_carry(&trZ, &r->T);
+ fe_sub(&r->Z, &trZ, &trT);
+ fe_add(&r->T, &trZ, &trT);
+}
+
+static void slide(signed char *r, const uint8_t *a) {
+ int i;
+ int b;
+ int k;
+
+ for (i = 0; i < 256; ++i) {
+ r[i] = 1 & (a[i >> 3] >> (i & 7));
+ }
+
+ for (i = 0; i < 256; ++i) {
+ if (r[i]) {
+ for (b = 1; b <= 6 && i + b < 256; ++b) {
+ if (r[i + b]) {
+ if (r[i] + (r[i + b] << b) <= 15) {
+ r[i] += r[i + b] << b;
+ r[i + b] = 0;
+ } else if (r[i] - (r[i + b] << b) >= -15) {
+ r[i] -= r[i + b] << b;
+ for (k = i + b; k < 256; ++k) {
+ if (!r[k]) {
+ r[k] = 1;
+ break;
+ }
+ r[k] = 0;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+// r = a * A + b * B
+// where a = a[0]+256*a[1]+...+256^31 a[31].
+// and b = b[0]+256*b[1]+...+256^31 b[31].
+// B is the Ed25519 base point (x,4/5) with x positive.
+static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
+ const ge_p3 *A, const uint8_t *b) {
+ signed char aslide[256];
+ signed char bslide[256];
+ ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A
+ ge_p1p1 t;
+ ge_p3 u;
+ ge_p3 A2;
+ int i;
+
+ slide(aslide, a);
+ slide(bslide, b);
+
+ x25519_ge_p3_to_cached(&Ai[0], A);
+ ge_p3_dbl(&t, A);
+ x25519_ge_p1p1_to_p3(&A2, &t);
+ x25519_ge_add(&t, &A2, &Ai[0]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[1], &u);
+ x25519_ge_add(&t, &A2, &Ai[1]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[2], &u);
+ x25519_ge_add(&t, &A2, &Ai[2]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[3], &u);
+ x25519_ge_add(&t, &A2, &Ai[3]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[4], &u);
+ x25519_ge_add(&t, &A2, &Ai[4]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[5], &u);
+ x25519_ge_add(&t, &A2, &Ai[5]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[6], &u);
+ x25519_ge_add(&t, &A2, &Ai[6]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[7], &u);
+
+ ge_p2_0(r);
+
+ for (i = 255; i >= 0; --i) {
+ if (aslide[i] || bslide[i]) {
+ break;
+ }
+ }
+
+ for (; i >= 0; --i) {
+ ge_p2_dbl(&t, r);
+
+ if (aslide[i] > 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]);
+ } else if (aslide[i] < 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
+ }
+
+ if (bslide[i] > 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ ge_madd(&t, &u, &Bi[bslide[i] / 2]);
+ } else if (bslide[i] < 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
+ }
+
+ x25519_ge_p1p1_to_p2(r, &t);
+ }
+}
+
+// int64_lshift21 returns |a << 21| but is defined when shifting bits into the
+// sign bit. This works around a language flaw in C.
+static inline int64_t int64_lshift21(int64_t a) {
+ return (int64_t)((uint64_t)a << 21);
+}
+
+// The set of scalars is \Z/l
+// where l = 2^252 + 27742317777372353535851937790883648493.
+
+// Input:
+// s[0]+256*s[1]+...+256^63*s[63] = s
+//
+// Output:
+// s[0]+256*s[1]+...+256^31*s[31] = s mod l
+// where l = 2^252 + 27742317777372353535851937790883648493.
+// Overwrites s in place.
+void x25519_sc_reduce(uint8_t s[64]) {
+ int64_t s0 = 2097151 & load_3(s);
+ int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
+ int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
+ int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
+ int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
+ int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
+ int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
+ int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
+ int64_t s8 = 2097151 & load_3(s + 21);
+ int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
+ int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
+ int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
+ int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
+ int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
+ int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
+ int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
+ int64_t s16 = 2097151 & load_3(s + 42);
+ int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
+ int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
+ int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
+ int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
+ int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
+ int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
+ int64_t s23 = (load_4(s + 60) >> 3);
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+ int64_t carry10;
+ int64_t carry11;
+ int64_t carry12;
+ int64_t carry13;
+ int64_t carry14;
+ int64_t carry15;
+ int64_t carry16;
+
+ s11 += s23 * 666643;
+ s12 += s23 * 470296;
+ s13 += s23 * 654183;
+ s14 -= s23 * 997805;
+ s15 += s23 * 136657;
+ s16 -= s23 * 683901;
+ s23 = 0;
+
+ s10 += s22 * 666643;
+ s11 += s22 * 470296;
+ s12 += s22 * 654183;
+ s13 -= s22 * 997805;
+ s14 += s22 * 136657;
+ s15 -= s22 * 683901;
+ s22 = 0;
+
+ s9 += s21 * 666643;
+ s10 += s21 * 470296;
+ s11 += s21 * 654183;
+ s12 -= s21 * 997805;
+ s13 += s21 * 136657;
+ s14 -= s21 * 683901;
+ s21 = 0;
+
+ s8 += s20 * 666643;
+ s9 += s20 * 470296;
+ s10 += s20 * 654183;
+ s11 -= s20 * 997805;
+ s12 += s20 * 136657;
+ s13 -= s20 * 683901;
+ s20 = 0;
+
+ s7 += s19 * 666643;
+ s8 += s19 * 470296;
+ s9 += s19 * 654183;
+ s10 -= s19 * 997805;
+ s11 += s19 * 136657;
+ s12 -= s19 * 683901;
+ s19 = 0;
+
+ s6 += s18 * 666643;
+ s7 += s18 * 470296;
+ s8 += s18 * 654183;
+ s9 -= s18 * 997805;
+ s10 += s18 * 136657;
+ s11 -= s18 * 683901;
+ s18 = 0;
+
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= int64_lshift21(carry6);
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= int64_lshift21(carry8);
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= int64_lshift21(carry10);
+ carry12 = (s12 + (1 << 20)) >> 21;
+ s13 += carry12;
+ s12 -= int64_lshift21(carry12);
+ carry14 = (s14 + (1 << 20)) >> 21;
+ s15 += carry14;
+ s14 -= int64_lshift21(carry14);
+ carry16 = (s16 + (1 << 20)) >> 21;
+ s17 += carry16;
+ s16 -= int64_lshift21(carry16);
+
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= int64_lshift21(carry7);
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= int64_lshift21(carry9);
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= int64_lshift21(carry11);
+ carry13 = (s13 + (1 << 20)) >> 21;
+ s14 += carry13;
+ s13 -= int64_lshift21(carry13);
+ carry15 = (s15 + (1 << 20)) >> 21;
+ s16 += carry15;
+ s15 -= int64_lshift21(carry15);
+
+ s5 += s17 * 666643;
+ s6 += s17 * 470296;
+ s7 += s17 * 654183;
+ s8 -= s17 * 997805;
+ s9 += s17 * 136657;
+ s10 -= s17 * 683901;
+ s17 = 0;
+
+ s4 += s16 * 666643;
+ s5 += s16 * 470296;
+ s6 += s16 * 654183;
+ s7 -= s16 * 997805;
+ s8 += s16 * 136657;
+ s9 -= s16 * 683901;
+ s16 = 0;
+
+ s3 += s15 * 666643;
+ s4 += s15 * 470296;
+ s5 += s15 * 654183;
+ s6 -= s15 * 997805;
+ s7 += s15 * 136657;
+ s8 -= s15 * 683901;
+ s15 = 0;
+
+ s2 += s14 * 666643;
+ s3 += s14 * 470296;
+ s4 += s14 * 654183;
+ s5 -= s14 * 997805;
+ s6 += s14 * 136657;
+ s7 -= s14 * 683901;
+ s14 = 0;
+
+ s1 += s13 * 666643;
+ s2 += s13 * 470296;
+ s3 += s13 * 654183;
+ s4 -= s13 * 997805;
+ s5 += s13 * 136657;
+ s6 -= s13 * 683901;
+ s13 = 0;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = (s0 + (1 << 20)) >> 21;
+ s1 += carry0;
+ s0 -= int64_lshift21(carry0);
+ carry2 = (s2 + (1 << 20)) >> 21;
+ s3 += carry2;
+ s2 -= int64_lshift21(carry2);
+ carry4 = (s4 + (1 << 20)) >> 21;
+ s5 += carry4;
+ s4 -= int64_lshift21(carry4);
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= int64_lshift21(carry6);
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= int64_lshift21(carry8);
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= int64_lshift21(carry10);
+
+ carry1 = (s1 + (1 << 20)) >> 21;
+ s2 += carry1;
+ s1 -= int64_lshift21(carry1);
+ carry3 = (s3 + (1 << 20)) >> 21;
+ s4 += carry3;
+ s3 -= int64_lshift21(carry3);
+ carry5 = (s5 + (1 << 20)) >> 21;
+ s6 += carry5;
+ s5 -= int64_lshift21(carry5);
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= int64_lshift21(carry7);
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= int64_lshift21(carry9);
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= int64_lshift21(carry11);
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = s0 >> 21;
+ s1 += carry0;
+ s0 -= int64_lshift21(carry0);
+ carry1 = s1 >> 21;
+ s2 += carry1;
+ s1 -= int64_lshift21(carry1);
+ carry2 = s2 >> 21;
+ s3 += carry2;
+ s2 -= int64_lshift21(carry2);
+ carry3 = s3 >> 21;
+ s4 += carry3;
+ s3 -= int64_lshift21(carry3);
+ carry4 = s4 >> 21;
+ s5 += carry4;
+ s4 -= int64_lshift21(carry4);
+ carry5 = s5 >> 21;
+ s6 += carry5;
+ s5 -= int64_lshift21(carry5);
+ carry6 = s6 >> 21;
+ s7 += carry6;
+ s6 -= int64_lshift21(carry6);
+ carry7 = s7 >> 21;
+ s8 += carry7;
+ s7 -= int64_lshift21(carry7);
+ carry8 = s8 >> 21;
+ s9 += carry8;
+ s8 -= int64_lshift21(carry8);
+ carry9 = s9 >> 21;
+ s10 += carry9;
+ s9 -= int64_lshift21(carry9);
+ carry10 = s10 >> 21;
+ s11 += carry10;
+ s10 -= int64_lshift21(carry10);
+ carry11 = s11 >> 21;
+ s12 += carry11;
+ s11 -= int64_lshift21(carry11);
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = s0 >> 21;
+ s1 += carry0;
+ s0 -= int64_lshift21(carry0);
+ carry1 = s1 >> 21;
+ s2 += carry1;
+ s1 -= int64_lshift21(carry1);
+ carry2 = s2 >> 21;
+ s3 += carry2;
+ s2 -= int64_lshift21(carry2);
+ carry3 = s3 >> 21;
+ s4 += carry3;
+ s3 -= int64_lshift21(carry3);
+ carry4 = s4 >> 21;
+ s5 += carry4;
+ s4 -= int64_lshift21(carry4);
+ carry5 = s5 >> 21;
+ s6 += carry5;
+ s5 -= int64_lshift21(carry5);
+ carry6 = s6 >> 21;
+ s7 += carry6;
+ s6 -= int64_lshift21(carry6);
+ carry7 = s7 >> 21;
+ s8 += carry7;
+ s7 -= int64_lshift21(carry7);
+ carry8 = s8 >> 21;
+ s9 += carry8;
+ s8 -= int64_lshift21(carry8);
+ carry9 = s9 >> 21;
+ s10 += carry9;
+ s9 -= int64_lshift21(carry9);
+ carry10 = s10 >> 21;
+ s11 += carry10;
+ s10 -= int64_lshift21(carry10);
+
+ s[0] = s0 >> 0;
+ s[1] = s0 >> 8;
+ s[2] = (s0 >> 16) | (s1 << 5);
+ s[3] = s1 >> 3;
+ s[4] = s1 >> 11;
+ s[5] = (s1 >> 19) | (s2 << 2);
+ s[6] = s2 >> 6;
+ s[7] = (s2 >> 14) | (s3 << 7);
+ s[8] = s3 >> 1;
+ s[9] = s3 >> 9;
+ s[10] = (s3 >> 17) | (s4 << 4);
+ s[11] = s4 >> 4;
+ s[12] = s4 >> 12;
+ s[13] = (s4 >> 20) | (s5 << 1);
+ s[14] = s5 >> 7;
+ s[15] = (s5 >> 15) | (s6 << 6);
+ s[16] = s6 >> 2;
+ s[17] = s6 >> 10;
+ s[18] = (s6 >> 18) | (s7 << 3);
+ s[19] = s7 >> 5;
+ s[20] = s7 >> 13;
+ s[21] = s8 >> 0;
+ s[22] = s8 >> 8;
+ s[23] = (s8 >> 16) | (s9 << 5);
+ s[24] = s9 >> 3;
+ s[25] = s9 >> 11;
+ s[26] = (s9 >> 19) | (s10 << 2);
+ s[27] = s10 >> 6;
+ s[28] = (s10 >> 14) | (s11 << 7);
+ s[29] = s11 >> 1;
+ s[30] = s11 >> 9;
+ s[31] = s11 >> 17;
+}
+
+int ED25519_verify(const uint8_t *message, size_t message_len,
+ const uint8_t signature[64], const uint8_t public_key[32]) {
+ ge_p3 A;
+ if ((signature[63] & 224) != 0 ||
+ !x25519_ge_frombytes_vartime(&A, public_key)) {
+ return 0;
+ }
+
+ fe_loose t;
+ fe_neg(&t, &A.X);
+ fe_carry(&A.X, &t);
+ fe_neg(&t, &A.T);
+ fe_carry(&A.T, &t);
+
+ uint8_t pkcopy[32];
+ memcpy(pkcopy, public_key, 32);
+ uint8_t rcopy[32];
+ memcpy(rcopy, signature, 32);
+ union {
+ uint64_t u64[4];
+ uint8_t u8[32];
+ } scopy;
+ memcpy(&scopy.u8[0], signature + 32, 32);
+
+ // https://tools.ietf.org/html/rfc8032#section-5.1.7 requires that s be in
+ // the range [0, order) in order to prevent signature malleability.
+
+ // kOrder is the order of Curve25519 in little-endian form.
+ static const uint64_t kOrder[4] = {
+ UINT64_C(0x5812631a5cf5d3ed),
+ UINT64_C(0x14def9dea2f79cd6),
+ 0,
+ UINT64_C(0x1000000000000000),
+ };
+ for (size_t i = 3;; i--) {
+ if (scopy.u64[i] > kOrder[i]) {
+ return 0;
+ } else if (scopy.u64[i] < kOrder[i]) {
+ break;
+ } else if (i == 0) {
+ return 0;
+ }
+ }
+
+ mbedtls_sha512_context ctx;
+ mbedtls_sha512_init(&ctx);
+ int ret;
+ ret = mbedtls_sha512_starts_ret(&ctx, 0);
+ assert(ret == 0);
+
+ ret = mbedtls_sha512_update_ret(&ctx, signature, 32);
+ assert(ret == 0);
+ ret = mbedtls_sha512_update_ret(&ctx, public_key, 32);
+ assert(ret == 0);
+ ret = mbedtls_sha512_update_ret(&ctx, message, message_len);
+ assert(ret == 0);
+
+ uint8_t h[SHA512_DIGEST_LENGTH];
+ ret = mbedtls_sha512_finish_ret(&ctx, h);
+ assert(ret == 0);
+ mbedtls_sha512_free(&ctx);
+
+ x25519_sc_reduce(h);
+
+ ge_p2 R;
+ ge_double_scalarmult_vartime(&R, h, &A, scopy.u8);
+
+ uint8_t rcheck[32];
+ x25519_ge_tobytes(rcheck, &R);
+
+ return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0;
+}