Blame - ext/fiat/src/curve25519.c - mirror/mcuboot

blob: f669570c4549e2cd74b0ecbb553a5f6152a2a279 [file] [log] [blame]

Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	1	// The MIT License (MIT)
				2	//
				3	// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
				4	//
				5	// Permission is hereby granted, free of charge, to any person obtaining a copy
				6	// of this software and associated documentation files (the "Software"), to deal
				7	// in the Software without restriction, including without limitation the rights
				8	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				9	// copies of the Software, and to permit persons to whom the Software is
				10	// furnished to do so, subject to the following conditions:
				11	//
				12	// The above copyright notice and this permission notice shall be included in all
				13	// copies or substantial portions of the Software.
				14	//
				15	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				18	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				20	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				21	// SOFTWARE.
				22
				23	// Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
				24	// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
				25	// public domain but parts have been replaced with code generated by Fiat
				26	// (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
				27	//
				28	// The field functions are shared by Ed25519 and X25519 where possible.
				29
				30	#include <assert.h>
				31	#include <string.h>
				32	#include <stdint.h>
				33
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	34	#include <mcuboot_config/mcuboot_config.h>
				35
				36	#if defined(MCUBOOT_USE_MBED_TLS)
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	37	#include <mbedtls/platform_util.h>
				38	#include <mbedtls/sha512.h>
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	39	#else
				40	#include <tinycrypt/constants.h>
				41	#include <tinycrypt/utils.h>
				42	#include <tinycrypt/sha512.h>
				43	#endif
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	44
				45	#include "curve25519.h"
				46	// Various pre-computed constants.
				47	#include "curve25519_tables.h"
				48
				49	#define SHA512_DIGEST_LENGTH 64
				50
				51	// Low-level intrinsic operations
				52
				53	static uint64_t load_3(const uint8_t *in) {
				54	uint64_t result;
				55	result = (uint64_t)in[0];
				56	result \|= ((uint64_t)in[1]) << 8;
				57	result \|= ((uint64_t)in[2]) << 16;
				58	return result;
				59	}
				60
				61	static uint64_t load_4(const uint8_t *in) {
				62	uint64_t result;
				63	result = (uint64_t)in[0];
				64	result \|= ((uint64_t)in[1]) << 8;
				65	result \|= ((uint64_t)in[2]) << 16;
				66	result \|= ((uint64_t)in[3]) << 24;
				67	return result;
				68	}
				69
				70
				71	// Field operations.
				72
				73	typedef uint32_t fe_limb_t;
				74	#define FE_NUM_LIMBS 10
				75
				76	// assert_fe asserts that \|f\| satisfies bounds:
				77	//
				78	// [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
				79	// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
				80	// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
				81	// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
				82	// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
				83	//
				84	// See comments in curve25519_32.h for which functions use these bounds for
				85	// inputs or outputs.
				86	#define assert_fe(f) \
				87	do { \
				88	for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
				89	assert(f[_assert_fe_i] <= \
				90	((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \
				91	} \
				92	} while (0)
				93
				94	// assert_fe_loose asserts that \|f\| satisfies bounds:
				95	//
				96	// [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
				97	// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
				98	// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
				99	// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
				100	// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
				101	//
				102	// See comments in curve25519_32.h for which functions use these bounds for
				103	// inputs or outputs.
				104	#define assert_fe_loose(f) \
				105	do { \
				106	for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
				107	assert(f[_assert_fe_i] <= \
				108	((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \
				109	} \
				110	} while (0)
				111
				112	//FIXME: use Zephyr macro
				113	_Static_assert(sizeof(fe) == sizeof(fe_limb_t) * FE_NUM_LIMBS,
				114	"fe_limb_t[FE_NUM_LIMBS] is inconsistent with fe");
				115
				116	static void fe_frombytes_strict(fe *h, const uint8_t s[32]) {
				117	// \|fiat_25519_from_bytes\| requires the top-most bit be clear.
				118	assert((s[31] & 0x80) == 0);
				119	fiat_25519_from_bytes(h->v, s);
				120	assert_fe(h->v);
				121	}
				122
				123	static void fe_frombytes(fe *h, const uint8_t s[32]) {
				124	uint8_t s_copy[32];
				125	memcpy(s_copy, s, 32);
				126	s_copy[31] &= 0x7f;
				127	fe_frombytes_strict(h, s_copy);
				128	}
				129
				130	static void fe_tobytes(uint8_t s[32], const fe *f) {
				131	assert_fe(f->v);
				132	fiat_25519_to_bytes(s, f->v);
				133	}
				134
				135	// h = 0
				136	static void fe_0(fe *h) {
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	137	#if defined(MCUBOOT_USE_MBED_TLS)
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	138	mbedtls_platform_zeroize(h, sizeof(fe));
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	139	#else
				140	_set(h, 0, sizeof(fe));
				141	#endif
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	142	}
				143
				144	// h = 1
				145	static void fe_1(fe *h) {
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	146	#if defined(MCUBOOT_USE_MBED_TLS)
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	147	mbedtls_platform_zeroize(h, sizeof(fe));
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	148	#else
				149	_set(h, 0, sizeof(fe));
				150	#endif
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	151	h->v[0] = 1;
				152	}
				153
				154	// h = f + g
				155	// Can overlap h with f or g.
				156	static void fe_add(fe_loose h, const fe f, const fe *g) {
				157	assert_fe(f->v);
				158	assert_fe(g->v);
				159	fiat_25519_add(h->v, f->v, g->v);
				160	assert_fe_loose(h->v);
				161	}
				162
				163	// h = f - g
				164	// Can overlap h with f or g.
				165	static void fe_sub(fe_loose h, const fe f, const fe *g) {
				166	assert_fe(f->v);
				167	assert_fe(g->v);
				168	fiat_25519_sub(h->v, f->v, g->v);
				169	assert_fe_loose(h->v);
				170	}
				171
				172	static void fe_carry(fe h, const fe_loose f) {
				173	assert_fe_loose(f->v);
				174	fiat_25519_carry(h->v, f->v);
				175	assert_fe(h->v);
				176	}
				177
				178	static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],
				179	const fe_limb_t in1[FE_NUM_LIMBS],
				180	const fe_limb_t in2[FE_NUM_LIMBS]) {
				181	assert_fe_loose(in1);
				182	assert_fe_loose(in2);
				183	fiat_25519_carry_mul(out, in1, in2);
				184	assert_fe(out);
				185	}
				186
				187	static void fe_mul_ltt(fe_loose h, const fe f, const fe *g) {
				188	fe_mul_impl(h->v, f->v, g->v);
				189	}
				190
				191	static void fe_mul_ttt(fe h, const fe f, const fe *g) {
				192	fe_mul_impl(h->v, f->v, g->v);
				193	}
				194
				195	static void fe_mul_tlt(fe h, const fe_loose f, const fe *g) {
				196	fe_mul_impl(h->v, f->v, g->v);
				197	}
				198
				199	static void fe_mul_ttl(fe h, const fe f, const fe_loose *g) {
				200	fe_mul_impl(h->v, f->v, g->v);
				201	}
				202
				203	static void fe_mul_tll(fe h, const fe_loose f, const fe_loose *g) {
				204	fe_mul_impl(h->v, f->v, g->v);
				205	}
				206
				207	static void fe_sq_tl(fe h, const fe_loose f) {
				208	assert_fe_loose(f->v);
				209	fiat_25519_carry_square(h->v, f->v);
				210	assert_fe(h->v);
				211	}
				212
				213	static void fe_sq_tt(fe h, const fe f) {
				214	assert_fe_loose(f->v);
				215	fiat_25519_carry_square(h->v, f->v);
				216	assert_fe(h->v);
				217	}
				218
				219	// h = -f
				220	static void fe_neg(fe_loose h, const fe f) {
				221	assert_fe(f->v);
				222	fiat_25519_opp(h->v, f->v);
				223	assert_fe_loose(h->v);
				224	}
				225
				226	// h = f
				227	static void fe_copy(fe h, const fe f) {
				228	memmove(h, f, sizeof(fe));
				229	}
				230
				231	static void fe_copy_lt(fe_loose h, const fe f) {
				232	//FIXME: use Zephyr macro
				233	_Static_assert(sizeof(fe_loose) == sizeof(fe), "fe and fe_loose mismatch");
				234	memmove(h, f, sizeof(fe));
				235	}
				236
				237	static void fe_loose_invert(fe out, const fe_loose z) {
				238	fe t0;
				239	fe t1;
				240	fe t2;
				241	fe t3;
				242	int i;
				243
				244	fe_sq_tl(&t0, z);
				245	fe_sq_tt(&t1, &t0);
				246	for (i = 1; i < 2; ++i) {
				247	fe_sq_tt(&t1, &t1);
				248	}
				249	fe_mul_tlt(&t1, z, &t1);
				250	fe_mul_ttt(&t0, &t0, &t1);
				251	fe_sq_tt(&t2, &t0);
				252	fe_mul_ttt(&t1, &t1, &t2);
				253	fe_sq_tt(&t2, &t1);
				254	for (i = 1; i < 5; ++i) {
				255	fe_sq_tt(&t2, &t2);
				256	}
				257	fe_mul_ttt(&t1, &t2, &t1);
				258	fe_sq_tt(&t2, &t1);
				259	for (i = 1; i < 10; ++i) {
				260	fe_sq_tt(&t2, &t2);
				261	}
				262	fe_mul_ttt(&t2, &t2, &t1);
				263	fe_sq_tt(&t3, &t2);
				264	for (i = 1; i < 20; ++i) {
				265	fe_sq_tt(&t3, &t3);
				266	}
				267	fe_mul_ttt(&t2, &t3, &t2);
				268	fe_sq_tt(&t2, &t2);
				269	for (i = 1; i < 10; ++i) {
				270	fe_sq_tt(&t2, &t2);
				271	}
				272	fe_mul_ttt(&t1, &t2, &t1);
				273	fe_sq_tt(&t2, &t1);
				274	for (i = 1; i < 50; ++i) {
				275	fe_sq_tt(&t2, &t2);
				276	}
				277	fe_mul_ttt(&t2, &t2, &t1);
				278	fe_sq_tt(&t3, &t2);
				279	for (i = 1; i < 100; ++i) {
				280	fe_sq_tt(&t3, &t3);
				281	}
				282	fe_mul_ttt(&t2, &t3, &t2);
				283	fe_sq_tt(&t2, &t2);
				284	for (i = 1; i < 50; ++i) {
				285	fe_sq_tt(&t2, &t2);
				286	}
				287	fe_mul_ttt(&t1, &t2, &t1);
				288	fe_sq_tt(&t1, &t1);
				289	for (i = 1; i < 5; ++i) {
				290	fe_sq_tt(&t1, &t1);
				291	}
				292	fe_mul_ttt(out, &t1, &t0);
				293	}
				294
				295	static void fe_invert(fe out, const fe z) {
				296	fe_loose l;
				297	fe_copy_lt(&l, z);
				298	fe_loose_invert(out, &l);
				299	}
				300
				301	static int CRYPTO_memcmp(const void in_a, const void in_b, size_t len) {
				302	const uint8_t *a = in_a;
				303	const uint8_t *b = in_b;
				304	uint8_t x = 0;
				305
				306	for (size_t i = 0; i < len; i++) {
				307	x \|= a[i] ^ b[i];
				308	}
				309
				310	return x;
				311	}
				312
				313	// return 0 if f == 0
				314	// return 1 if f != 0
				315	static int fe_isnonzero(const fe_loose *f) {
				316	fe tight;
				317	fe_carry(&tight, f);
				318	uint8_t s[32];
				319	fe_tobytes(s, &tight);
				320
				321	static const uint8_t zero[32] = {0};
				322	return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
				323	}
				324
				325	// return 1 if f is in {1,3,5,...,q-2}
				326	// return 0 if f is in {0,2,4,...,q-1}
				327	static int fe_isnegative(const fe *f) {
				328	uint8_t s[32];
				329	fe_tobytes(s, f);
				330	return s[0] & 1;
				331	}
				332
				333	static void fe_sq2_tt(fe h, const fe f) {
				334	// h = f^2
				335	fe_sq_tt(h, f);
				336
				337	// h = h + h
				338	fe_loose tmp;
				339	fe_add(&tmp, h, h);
				340	fe_carry(h, &tmp);
				341	}
				342
				343	static void fe_pow22523(fe out, const fe z) {
				344	fe t0;
				345	fe t1;
				346	fe t2;
				347	int i;
				348
				349	fe_sq_tt(&t0, z);
				350	fe_sq_tt(&t1, &t0);
				351	for (i = 1; i < 2; ++i) {
				352	fe_sq_tt(&t1, &t1);
				353	}
				354	fe_mul_ttt(&t1, z, &t1);
				355	fe_mul_ttt(&t0, &t0, &t1);
				356	fe_sq_tt(&t0, &t0);
				357	fe_mul_ttt(&t0, &t1, &t0);
				358	fe_sq_tt(&t1, &t0);
				359	for (i = 1; i < 5; ++i) {
				360	fe_sq_tt(&t1, &t1);
				361	}
				362	fe_mul_ttt(&t0, &t1, &t0);
				363	fe_sq_tt(&t1, &t0);
				364	for (i = 1; i < 10; ++i) {
				365	fe_sq_tt(&t1, &t1);
				366	}
				367	fe_mul_ttt(&t1, &t1, &t0);
				368	fe_sq_tt(&t2, &t1);
				369	for (i = 1; i < 20; ++i) {
				370	fe_sq_tt(&t2, &t2);
				371	}
				372	fe_mul_ttt(&t1, &t2, &t1);
				373	fe_sq_tt(&t1, &t1);
				374	for (i = 1; i < 10; ++i) {
				375	fe_sq_tt(&t1, &t1);
				376	}
				377	fe_mul_ttt(&t0, &t1, &t0);
				378	fe_sq_tt(&t1, &t0);
				379	for (i = 1; i < 50; ++i) {
				380	fe_sq_tt(&t1, &t1);
				381	}
				382	fe_mul_ttt(&t1, &t1, &t0);
				383	fe_sq_tt(&t2, &t1);
				384	for (i = 1; i < 100; ++i) {
				385	fe_sq_tt(&t2, &t2);
				386	}
				387	fe_mul_ttt(&t1, &t2, &t1);
				388	fe_sq_tt(&t1, &t1);
				389	for (i = 1; i < 50; ++i) {
				390	fe_sq_tt(&t1, &t1);
				391	}
				392	fe_mul_ttt(&t0, &t1, &t0);
				393	fe_sq_tt(&t0, &t0);
				394	for (i = 1; i < 2; ++i) {
				395	fe_sq_tt(&t0, &t0);
				396	}
				397	fe_mul_ttt(out, &t0, z);
				398	}
				399
				400
				401	// Group operations.
				402
				403	void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
				404	fe recip;
				405	fe x;
				406	fe y;
				407
				408	fe_invert(&recip, &h->Z);
				409	fe_mul_ttt(&x, &h->X, &recip);
				410	fe_mul_ttt(&y, &h->Y, &recip);
				411	fe_tobytes(s, &y);
				412	s[31] ^= fe_isnegative(&x) << 7;
				413	}
				414
				415	int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) {
				416	fe u;
				417	fe_loose v;
				418	fe v3;
				419	fe vxx;
				420	fe_loose check;
				421
				422	fe_frombytes(&h->Y, s);
				423	fe_1(&h->Z);
				424	fe_sq_tt(&v3, &h->Y);
				425	fe_mul_ttt(&vxx, &v3, &d);
				426	fe_sub(&v, &v3, &h->Z); // u = y^2-1
				427	fe_carry(&u, &v);
				428	fe_add(&v, &vxx, &h->Z); // v = dy^2+1
				429
				430	fe_sq_tl(&v3, &v);
				431	fe_mul_ttl(&v3, &v3, &v); // v3 = v^3
				432	fe_sq_tt(&h->X, &v3);
				433	fe_mul_ttl(&h->X, &h->X, &v);
				434	fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7
				435
				436	fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8)
				437	fe_mul_ttt(&h->X, &h->X, &v3);
				438	fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8)
				439
				440	fe_sq_tt(&vxx, &h->X);
				441	fe_mul_ttl(&vxx, &vxx, &v);
				442	fe_sub(&check, &vxx, &u);
				443	if (fe_isnonzero(&check)) {
				444	fe_add(&check, &vxx, &u);
				445	if (fe_isnonzero(&check)) {
				446	return 0;
				447	}
				448	fe_mul_ttt(&h->X, &h->X, &sqrtm1);
				449	}
				450
				451	if (fe_isnegative(&h->X) != (s[31] >> 7)) {
				452	fe_loose t;
				453	fe_neg(&t, &h->X);
				454	fe_carry(&h->X, &t);
				455	}
				456
				457	fe_mul_ttt(&h->T, &h->X, &h->Y);
				458	return 1;
				459	}
				460
				461	static void ge_p2_0(ge_p2 *h) {
				462	fe_0(&h->X);
				463	fe_1(&h->Y);
				464	fe_1(&h->Z);
				465	}
				466
				467	// r = p
				468	static void ge_p3_to_p2(ge_p2 r, const ge_p3 p) {
				469	fe_copy(&r->X, &p->X);
				470	fe_copy(&r->Y, &p->Y);
				471	fe_copy(&r->Z, &p->Z);
				472	}
				473
				474	// r = p
				475	void x25519_ge_p3_to_cached(ge_cached r, const ge_p3 p) {
				476	fe_add(&r->YplusX, &p->Y, &p->X);
				477	fe_sub(&r->YminusX, &p->Y, &p->X);
				478	fe_copy_lt(&r->Z, &p->Z);
				479	fe_mul_ltt(&r->T2d, &p->T, &d2);
				480	}
				481
				482	// r = p
				483	void x25519_ge_p1p1_to_p2(ge_p2 r, const ge_p1p1 p) {
				484	fe_mul_tll(&r->X, &p->X, &p->T);
				485	fe_mul_tll(&r->Y, &p->Y, &p->Z);
				486	fe_mul_tll(&r->Z, &p->Z, &p->T);
				487	}
				488
				489	// r = p
				490	void x25519_ge_p1p1_to_p3(ge_p3 r, const ge_p1p1 p) {
				491	fe_mul_tll(&r->X, &p->X, &p->T);
				492	fe_mul_tll(&r->Y, &p->Y, &p->Z);
				493	fe_mul_tll(&r->Z, &p->Z, &p->T);
				494	fe_mul_tll(&r->T, &p->X, &p->Y);
				495	}
				496
				497	// r = 2 * p
				498	static void ge_p2_dbl(ge_p1p1 r, const ge_p2 p) {
				499	fe trX, trZ, trT;
				500	fe t0;
				501
				502	fe_sq_tt(&trX, &p->X);
				503	fe_sq_tt(&trZ, &p->Y);
				504	fe_sq2_tt(&trT, &p->Z);
				505	fe_add(&r->Y, &p->X, &p->Y);
				506	fe_sq_tl(&t0, &r->Y);
				507
				508	fe_add(&r->Y, &trZ, &trX);
				509	fe_sub(&r->Z, &trZ, &trX);
				510	fe_carry(&trZ, &r->Y);
				511	fe_sub(&r->X, &t0, &trZ);
				512	fe_carry(&trZ, &r->Z);
				513	fe_sub(&r->T, &trT, &trZ);
				514	}
				515
				516	// r = 2 * p
				517	static void ge_p3_dbl(ge_p1p1 r, const ge_p3 p) {
				518	ge_p2 q;
				519	ge_p3_to_p2(&q, p);
				520	ge_p2_dbl(r, &q);
				521	}
				522
				523	// r = p + q
				524	static void ge_madd(ge_p1p1 r, const ge_p3 p, const ge_precomp *q) {
				525	fe trY, trZ, trT;
				526
				527	fe_add(&r->X, &p->Y, &p->X);
				528	fe_sub(&r->Y, &p->Y, &p->X);
				529	fe_mul_tll(&trZ, &r->X, &q->yplusx);
				530	fe_mul_tll(&trY, &r->Y, &q->yminusx);
				531	fe_mul_tlt(&trT, &q->xy2d, &p->T);
				532	fe_add(&r->T, &p->Z, &p->Z);
				533	fe_sub(&r->X, &trZ, &trY);
				534	fe_add(&r->Y, &trZ, &trY);
				535	fe_carry(&trZ, &r->T);
				536	fe_add(&r->Z, &trZ, &trT);
				537	fe_sub(&r->T, &trZ, &trT);
				538	}
				539
				540	// r = p - q
				541	static void ge_msub(ge_p1p1 r, const ge_p3 p, const ge_precomp *q) {
				542	fe trY, trZ, trT;
				543
				544	fe_add(&r->X, &p->Y, &p->X);
				545	fe_sub(&r->Y, &p->Y, &p->X);
				546	fe_mul_tll(&trZ, &r->X, &q->yminusx);
				547	fe_mul_tll(&trY, &r->Y, &q->yplusx);
				548	fe_mul_tlt(&trT, &q->xy2d, &p->T);
				549	fe_add(&r->T, &p->Z, &p->Z);
				550	fe_sub(&r->X, &trZ, &trY);
				551	fe_add(&r->Y, &trZ, &trY);
				552	fe_carry(&trZ, &r->T);
				553	fe_sub(&r->Z, &trZ, &trT);
				554	fe_add(&r->T, &trZ, &trT);
				555	}
				556
				557	// r = p + q
				558	void x25519_ge_add(ge_p1p1 r, const ge_p3 p, const ge_cached *q) {
				559	fe trX, trY, trZ, trT;
				560
				561	fe_add(&r->X, &p->Y, &p->X);
				562	fe_sub(&r->Y, &p->Y, &p->X);
				563	fe_mul_tll(&trZ, &r->X, &q->YplusX);
				564	fe_mul_tll(&trY, &r->Y, &q->YminusX);
				565	fe_mul_tlt(&trT, &q->T2d, &p->T);
				566	fe_mul_ttl(&trX, &p->Z, &q->Z);
				567	fe_add(&r->T, &trX, &trX);
				568	fe_sub(&r->X, &trZ, &trY);
				569	fe_add(&r->Y, &trZ, &trY);
				570	fe_carry(&trZ, &r->T);
				571	fe_add(&r->Z, &trZ, &trT);
				572	fe_sub(&r->T, &trZ, &trT);
				573	}
				574
				575	// r = p - q
				576	void x25519_ge_sub(ge_p1p1 r, const ge_p3 p, const ge_cached *q) {
				577	fe trX, trY, trZ, trT;
				578
				579	fe_add(&r->X, &p->Y, &p->X);
				580	fe_sub(&r->Y, &p->Y, &p->X);
				581	fe_mul_tll(&trZ, &r->X, &q->YminusX);
				582	fe_mul_tll(&trY, &r->Y, &q->YplusX);
				583	fe_mul_tlt(&trT, &q->T2d, &p->T);
				584	fe_mul_ttl(&trX, &p->Z, &q->Z);
				585	fe_add(&r->T, &trX, &trX);
				586	fe_sub(&r->X, &trZ, &trY);
				587	fe_add(&r->Y, &trZ, &trY);
				588	fe_carry(&trZ, &r->T);
				589	fe_sub(&r->Z, &trZ, &trT);
				590	fe_add(&r->T, &trZ, &trT);
				591	}
				592
				593	static void slide(signed char r, const uint8_t a) {
				594	int i;
				595	int b;
				596	int k;
				597
				598	for (i = 0; i < 256; ++i) {
				599	r[i] = 1 & (a[i >> 3] >> (i & 7));
				600	}
				601
				602	for (i = 0; i < 256; ++i) {
				603	if (r[i]) {
				604	for (b = 1; b <= 6 && i + b < 256; ++b) {
				605	if (r[i + b]) {
				606	if (r[i] + (r[i + b] << b) <= 15) {
				607	r[i] += r[i + b] << b;
				608	r[i + b] = 0;
				609	} else if (r[i] - (r[i + b] << b) >= -15) {
				610	r[i] -= r[i + b] << b;
				611	for (k = i + b; k < 256; ++k) {
				612	if (!r[k]) {
				613	r[k] = 1;
				614	break;
				615	}
				616	r[k] = 0;
				617	}
				618	} else {
				619	break;
				620	}
				621	}
				622	}
				623	}
				624	}
				625	}
				626
				627	// r = a * A + b * B
				628	// where a = a[0]+256*a[1]+...+256^31 a[31].
				629	// and b = b[0]+256*b[1]+...+256^31 b[31].
				630	// B is the Ed25519 base point (x,4/5) with x positive.
				631	static void ge_double_scalarmult_vartime(ge_p2 r, const uint8_t a,
				632	const ge_p3 A, const uint8_t b) {
				633	signed char aslide[256];
				634	signed char bslide[256];
				635	ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A
				636	ge_p1p1 t;
				637	ge_p3 u;
				638	ge_p3 A2;
				639	int i;
				640
				641	slide(aslide, a);
				642	slide(bslide, b);
				643
				644	x25519_ge_p3_to_cached(&Ai[0], A);
				645	ge_p3_dbl(&t, A);
				646	x25519_ge_p1p1_to_p3(&A2, &t);
				647	x25519_ge_add(&t, &A2, &Ai[0]);
				648	x25519_ge_p1p1_to_p3(&u, &t);
				649	x25519_ge_p3_to_cached(&Ai[1], &u);
				650	x25519_ge_add(&t, &A2, &Ai[1]);
				651	x25519_ge_p1p1_to_p3(&u, &t);
				652	x25519_ge_p3_to_cached(&Ai[2], &u);
				653	x25519_ge_add(&t, &A2, &Ai[2]);
				654	x25519_ge_p1p1_to_p3(&u, &t);
				655	x25519_ge_p3_to_cached(&Ai[3], &u);
				656	x25519_ge_add(&t, &A2, &Ai[3]);
				657	x25519_ge_p1p1_to_p3(&u, &t);
				658	x25519_ge_p3_to_cached(&Ai[4], &u);
				659	x25519_ge_add(&t, &A2, &Ai[4]);
				660	x25519_ge_p1p1_to_p3(&u, &t);
				661	x25519_ge_p3_to_cached(&Ai[5], &u);
				662	x25519_ge_add(&t, &A2, &Ai[5]);
				663	x25519_ge_p1p1_to_p3(&u, &t);
				664	x25519_ge_p3_to_cached(&Ai[6], &u);
				665	x25519_ge_add(&t, &A2, &Ai[6]);
				666	x25519_ge_p1p1_to_p3(&u, &t);
				667	x25519_ge_p3_to_cached(&Ai[7], &u);
				668
				669	ge_p2_0(r);
				670
				671	for (i = 255; i >= 0; --i) {
				672	if (aslide[i] \|\| bslide[i]) {
				673	break;
				674	}
				675	}
				676
				677	for (; i >= 0; --i) {
				678	ge_p2_dbl(&t, r);
				679
				680	if (aslide[i] > 0) {
				681	x25519_ge_p1p1_to_p3(&u, &t);
				682	x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]);
				683	} else if (aslide[i] < 0) {
				684	x25519_ge_p1p1_to_p3(&u, &t);
				685	x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
				686	}
				687
				688	if (bslide[i] > 0) {
				689	x25519_ge_p1p1_to_p3(&u, &t);
				690	ge_madd(&t, &u, &Bi[bslide[i] / 2]);
				691	} else if (bslide[i] < 0) {
				692	x25519_ge_p1p1_to_p3(&u, &t);
				693	ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
				694	}
				695
				696	x25519_ge_p1p1_to_p2(r, &t);
				697	}
				698	}
				699
				700	// int64_lshift21 returns \|a << 21\| but is defined when shifting bits into the
				701	// sign bit. This works around a language flaw in C.
				702	static inline int64_t int64_lshift21(int64_t a) {
				703	return (int64_t)((uint64_t)a << 21);
				704	}
				705
				706	// The set of scalars is \Z/l
				707	// where l = 2^252 + 27742317777372353535851937790883648493.
				708
				709	// Input:
				710	// s[0]+256s[1]+...+256^63s[63] = s
				711	//
				712	// Output:
				713	// s[0]+256s[1]+...+256^31s[31] = s mod l
				714	// where l = 2^252 + 27742317777372353535851937790883648493.
				715	// Overwrites s in place.
				716	void x25519_sc_reduce(uint8_t s[64]) {
				717	int64_t s0 = 2097151 & load_3(s);
				718	int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
				719	int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
				720	int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
				721	int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
				722	int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
				723	int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
				724	int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
				725	int64_t s8 = 2097151 & load_3(s + 21);
				726	int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
				727	int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
				728	int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
				729	int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
				730	int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
				731	int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
				732	int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
				733	int64_t s16 = 2097151 & load_3(s + 42);
				734	int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
				735	int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
				736	int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
				737	int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
				738	int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
				739	int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
				740	int64_t s23 = (load_4(s + 60) >> 3);
				741	int64_t carry0;
				742	int64_t carry1;
				743	int64_t carry2;
				744	int64_t carry3;
				745	int64_t carry4;
				746	int64_t carry5;
				747	int64_t carry6;
				748	int64_t carry7;
				749	int64_t carry8;
				750	int64_t carry9;
				751	int64_t carry10;
				752	int64_t carry11;
				753	int64_t carry12;
				754	int64_t carry13;
				755	int64_t carry14;
				756	int64_t carry15;
				757	int64_t carry16;
				758
				759	s11 += s23 * 666643;
				760	s12 += s23 * 470296;
				761	s13 += s23 * 654183;
				762	s14 -= s23 * 997805;
				763	s15 += s23 * 136657;
				764	s16 -= s23 * 683901;
				765	s23 = 0;
				766
				767	s10 += s22 * 666643;
				768	s11 += s22 * 470296;
				769	s12 += s22 * 654183;
				770	s13 -= s22 * 997805;
				771	s14 += s22 * 136657;
				772	s15 -= s22 * 683901;
				773	s22 = 0;
				774
				775	s9 += s21 * 666643;
				776	s10 += s21 * 470296;
				777	s11 += s21 * 654183;
				778	s12 -= s21 * 997805;
				779	s13 += s21 * 136657;
				780	s14 -= s21 * 683901;
				781	s21 = 0;
				782
				783	s8 += s20 * 666643;
				784	s9 += s20 * 470296;
				785	s10 += s20 * 654183;
				786	s11 -= s20 * 997805;
				787	s12 += s20 * 136657;
				788	s13 -= s20 * 683901;
				789	s20 = 0;
				790
				791	s7 += s19 * 666643;
				792	s8 += s19 * 470296;
				793	s9 += s19 * 654183;
				794	s10 -= s19 * 997805;
				795	s11 += s19 * 136657;
				796	s12 -= s19 * 683901;
				797	s19 = 0;
				798
				799	s6 += s18 * 666643;
				800	s7 += s18 * 470296;
				801	s8 += s18 * 654183;
				802	s9 -= s18 * 997805;
				803	s10 += s18 * 136657;
				804	s11 -= s18 * 683901;
				805	s18 = 0;
				806
				807	carry6 = (s6 + (1 << 20)) >> 21;
				808	s7 += carry6;
				809	s6 -= int64_lshift21(carry6);
				810	carry8 = (s8 + (1 << 20)) >> 21;
				811	s9 += carry8;
				812	s8 -= int64_lshift21(carry8);
				813	carry10 = (s10 + (1 << 20)) >> 21;
				814	s11 += carry10;
				815	s10 -= int64_lshift21(carry10);
				816	carry12 = (s12 + (1 << 20)) >> 21;
				817	s13 += carry12;
				818	s12 -= int64_lshift21(carry12);
				819	carry14 = (s14 + (1 << 20)) >> 21;
				820	s15 += carry14;
				821	s14 -= int64_lshift21(carry14);
				822	carry16 = (s16 + (1 << 20)) >> 21;
				823	s17 += carry16;
				824	s16 -= int64_lshift21(carry16);
				825
				826	carry7 = (s7 + (1 << 20)) >> 21;
				827	s8 += carry7;
				828	s7 -= int64_lshift21(carry7);
				829	carry9 = (s9 + (1 << 20)) >> 21;
				830	s10 += carry9;
				831	s9 -= int64_lshift21(carry9);
				832	carry11 = (s11 + (1 << 20)) >> 21;
				833	s12 += carry11;
				834	s11 -= int64_lshift21(carry11);
				835	carry13 = (s13 + (1 << 20)) >> 21;
				836	s14 += carry13;
				837	s13 -= int64_lshift21(carry13);
				838	carry15 = (s15 + (1 << 20)) >> 21;
				839	s16 += carry15;
				840	s15 -= int64_lshift21(carry15);
				841
				842	s5 += s17 * 666643;
				843	s6 += s17 * 470296;
				844	s7 += s17 * 654183;
				845	s8 -= s17 * 997805;
				846	s9 += s17 * 136657;
				847	s10 -= s17 * 683901;
				848	s17 = 0;
				849
				850	s4 += s16 * 666643;
				851	s5 += s16 * 470296;
				852	s6 += s16 * 654183;
				853	s7 -= s16 * 997805;
				854	s8 += s16 * 136657;
				855	s9 -= s16 * 683901;
				856	s16 = 0;
				857
				858	s3 += s15 * 666643;
				859	s4 += s15 * 470296;
				860	s5 += s15 * 654183;
				861	s6 -= s15 * 997805;
				862	s7 += s15 * 136657;
				863	s8 -= s15 * 683901;
				864	s15 = 0;
				865
				866	s2 += s14 * 666643;
				867	s3 += s14 * 470296;
				868	s4 += s14 * 654183;
				869	s5 -= s14 * 997805;
				870	s6 += s14 * 136657;
				871	s7 -= s14 * 683901;
				872	s14 = 0;
				873
				874	s1 += s13 * 666643;
				875	s2 += s13 * 470296;
				876	s3 += s13 * 654183;
				877	s4 -= s13 * 997805;
				878	s5 += s13 * 136657;
				879	s6 -= s13 * 683901;
				880	s13 = 0;
				881
				882	s0 += s12 * 666643;
				883	s1 += s12 * 470296;
				884	s2 += s12 * 654183;
				885	s3 -= s12 * 997805;
				886	s4 += s12 * 136657;
				887	s5 -= s12 * 683901;
				888	s12 = 0;
				889
				890	carry0 = (s0 + (1 << 20)) >> 21;
				891	s1 += carry0;
				892	s0 -= int64_lshift21(carry0);
				893	carry2 = (s2 + (1 << 20)) >> 21;
				894	s3 += carry2;
				895	s2 -= int64_lshift21(carry2);
				896	carry4 = (s4 + (1 << 20)) >> 21;
				897	s5 += carry4;
				898	s4 -= int64_lshift21(carry4);
				899	carry6 = (s6 + (1 << 20)) >> 21;
				900	s7 += carry6;
				901	s6 -= int64_lshift21(carry6);
				902	carry8 = (s8 + (1 << 20)) >> 21;
				903	s9 += carry8;
				904	s8 -= int64_lshift21(carry8);
				905	carry10 = (s10 + (1 << 20)) >> 21;
				906	s11 += carry10;
				907	s10 -= int64_lshift21(carry10);
				908
				909	carry1 = (s1 + (1 << 20)) >> 21;
				910	s2 += carry1;
				911	s1 -= int64_lshift21(carry1);
				912	carry3 = (s3 + (1 << 20)) >> 21;
				913	s4 += carry3;
				914	s3 -= int64_lshift21(carry3);
				915	carry5 = (s5 + (1 << 20)) >> 21;
				916	s6 += carry5;
				917	s5 -= int64_lshift21(carry5);
				918	carry7 = (s7 + (1 << 20)) >> 21;
				919	s8 += carry7;
				920	s7 -= int64_lshift21(carry7);
				921	carry9 = (s9 + (1 << 20)) >> 21;
				922	s10 += carry9;
				923	s9 -= int64_lshift21(carry9);
				924	carry11 = (s11 + (1 << 20)) >> 21;
				925	s12 += carry11;
				926	s11 -= int64_lshift21(carry11);
				927
				928	s0 += s12 * 666643;
				929	s1 += s12 * 470296;
				930	s2 += s12 * 654183;
				931	s3 -= s12 * 997805;
				932	s4 += s12 * 136657;
				933	s5 -= s12 * 683901;
				934	s12 = 0;
				935
				936	carry0 = s0 >> 21;
				937	s1 += carry0;
				938	s0 -= int64_lshift21(carry0);
				939	carry1 = s1 >> 21;
				940	s2 += carry1;
				941	s1 -= int64_lshift21(carry1);
				942	carry2 = s2 >> 21;
				943	s3 += carry2;
				944	s2 -= int64_lshift21(carry2);
				945	carry3 = s3 >> 21;
				946	s4 += carry3;
				947	s3 -= int64_lshift21(carry3);
				948	carry4 = s4 >> 21;
				949	s5 += carry4;
				950	s4 -= int64_lshift21(carry4);
				951	carry5 = s5 >> 21;
				952	s6 += carry5;
				953	s5 -= int64_lshift21(carry5);
				954	carry6 = s6 >> 21;
				955	s7 += carry6;
				956	s6 -= int64_lshift21(carry6);
				957	carry7 = s7 >> 21;
				958	s8 += carry7;
				959	s7 -= int64_lshift21(carry7);
				960	carry8 = s8 >> 21;
				961	s9 += carry8;
				962	s8 -= int64_lshift21(carry8);
				963	carry9 = s9 >> 21;
				964	s10 += carry9;
				965	s9 -= int64_lshift21(carry9);
				966	carry10 = s10 >> 21;
				967	s11 += carry10;
				968	s10 -= int64_lshift21(carry10);
				969	carry11 = s11 >> 21;
				970	s12 += carry11;
				971	s11 -= int64_lshift21(carry11);
				972
				973	s0 += s12 * 666643;
				974	s1 += s12 * 470296;
				975	s2 += s12 * 654183;
				976	s3 -= s12 * 997805;
				977	s4 += s12 * 136657;
				978	s5 -= s12 * 683901;
				979	s12 = 0;
				980
				981	carry0 = s0 >> 21;
				982	s1 += carry0;
				983	s0 -= int64_lshift21(carry0);
				984	carry1 = s1 >> 21;
				985	s2 += carry1;
				986	s1 -= int64_lshift21(carry1);
				987	carry2 = s2 >> 21;
				988	s3 += carry2;
				989	s2 -= int64_lshift21(carry2);
				990	carry3 = s3 >> 21;
				991	s4 += carry3;
				992	s3 -= int64_lshift21(carry3);
				993	carry4 = s4 >> 21;
				994	s5 += carry4;
				995	s4 -= int64_lshift21(carry4);
				996	carry5 = s5 >> 21;
				997	s6 += carry5;
				998	s5 -= int64_lshift21(carry5);
				999	carry6 = s6 >> 21;
				1000	s7 += carry6;
				1001	s6 -= int64_lshift21(carry6);
				1002	carry7 = s7 >> 21;
				1003	s8 += carry7;
				1004	s7 -= int64_lshift21(carry7);
				1005	carry8 = s8 >> 21;
				1006	s9 += carry8;
				1007	s8 -= int64_lshift21(carry8);
				1008	carry9 = s9 >> 21;
				1009	s10 += carry9;
				1010	s9 -= int64_lshift21(carry9);
				1011	carry10 = s10 >> 21;
				1012	s11 += carry10;
				1013	s10 -= int64_lshift21(carry10);
				1014
				1015	s[0] = s0 >> 0;
				1016	s[1] = s0 >> 8;
				1017	s[2] = (s0 >> 16) \| (s1 << 5);
				1018	s[3] = s1 >> 3;
				1019	s[4] = s1 >> 11;
				1020	s[5] = (s1 >> 19) \| (s2 << 2);
				1021	s[6] = s2 >> 6;
				1022	s[7] = (s2 >> 14) \| (s3 << 7);
				1023	s[8] = s3 >> 1;
				1024	s[9] = s3 >> 9;
				1025	s[10] = (s3 >> 17) \| (s4 << 4);
				1026	s[11] = s4 >> 4;
				1027	s[12] = s4 >> 12;
				1028	s[13] = (s4 >> 20) \| (s5 << 1);
				1029	s[14] = s5 >> 7;
				1030	s[15] = (s5 >> 15) \| (s6 << 6);
				1031	s[16] = s6 >> 2;
				1032	s[17] = s6 >> 10;
				1033	s[18] = (s6 >> 18) \| (s7 << 3);
				1034	s[19] = s7 >> 5;
				1035	s[20] = s7 >> 13;
				1036	s[21] = s8 >> 0;
				1037	s[22] = s8 >> 8;
				1038	s[23] = (s8 >> 16) \| (s9 << 5);
				1039	s[24] = s9 >> 3;
				1040	s[25] = s9 >> 11;
				1041	s[26] = (s9 >> 19) \| (s10 << 2);
				1042	s[27] = s10 >> 6;
				1043	s[28] = (s10 >> 14) \| (s11 << 7);
				1044	s[29] = s11 >> 1;
				1045	s[30] = s11 >> 9;
				1046	s[31] = s11 >> 17;
				1047	}
				1048
				1049	int ED25519_verify(const uint8_t *message, size_t message_len,
				1050	const uint8_t signature[64], const uint8_t public_key[32]) {
				1051	ge_p3 A;
				1052	if ((signature[63] & 224) != 0 \|\|
				1053	!x25519_ge_frombytes_vartime(&A, public_key)) {
				1054	return 0;
				1055	}
				1056
				1057	fe_loose t;
				1058	fe_neg(&t, &A.X);
				1059	fe_carry(&A.X, &t);
				1060	fe_neg(&t, &A.T);
				1061	fe_carry(&A.T, &t);
				1062
				1063	uint8_t pkcopy[32];
				1064	memcpy(pkcopy, public_key, 32);
				1065	uint8_t rcopy[32];
				1066	memcpy(rcopy, signature, 32);
				1067	union {
				1068	uint64_t u64[4];
				1069	uint8_t u8[32];
				1070	} scopy;
				1071	memcpy(&scopy.u8[0], signature + 32, 32);
				1072
				1073	// https://tools.ietf.org/html/rfc8032#section-5.1.7 requires that s be in
				1074	// the range [0, order) in order to prevent signature malleability.
				1075
				1076	// kOrder is the order of Curve25519 in little-endian form.
				1077	static const uint64_t kOrder[4] = {
				1078	UINT64_C(0x5812631a5cf5d3ed),
				1079	UINT64_C(0x14def9dea2f79cd6),
				1080	0,
				1081	UINT64_C(0x1000000000000000),
				1082	};
				1083	for (size_t i = 3;; i--) {
				1084	if (scopy.u64[i] > kOrder[i]) {
				1085	return 0;
				1086	} else if (scopy.u64[i] < kOrder[i]) {
				1087	break;
				1088	} else if (i == 0) {
				1089	return 0;
				1090	}
				1091	}
				1092
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	1093	#if defined(MCUBOOT_USE_MBED_TLS)
				1094
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	1095	mbedtls_sha512_context ctx;
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	1096	int ret;
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	1097
				1098	mbedtls_sha512_init(&ctx);
				1099
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	1100	ret = mbedtls_sha512_starts_ret(&ctx, 0);
				1101	assert(ret == 0);
				1102
				1103	ret = mbedtls_sha512_update_ret(&ctx, signature, 32);
				1104	assert(ret == 0);
				1105	ret = mbedtls_sha512_update_ret(&ctx, public_key, 32);
				1106	assert(ret == 0);
				1107	ret = mbedtls_sha512_update_ret(&ctx, message, message_len);
				1108	assert(ret == 0);
				1109
				1110	uint8_t h[SHA512_DIGEST_LENGTH];
				1111	ret = mbedtls_sha512_finish_ret(&ctx, h);
				1112	assert(ret == 0);
				1113	mbedtls_sha512_free(&ctx);
				1114
Fabio Utzig	a1c142d	2020-01-03 08:28:11 -0300	[diff] [blame]	1115	#else
				1116
				1117	struct tc_sha512_state_struct s;
				1118	int rc;
				1119
				1120	rc = tc_sha512_init(&s);
				1121	assert(rc == TC_CRYPTO_SUCCESS);
				1122
				1123	rc = tc_sha512_update(&s, signature, 32);
				1124	assert(rc == TC_CRYPTO_SUCCESS);
				1125	rc = tc_sha512_update(&s, public_key, 32);
				1126	assert(rc == TC_CRYPTO_SUCCESS);
				1127	rc = tc_sha512_update(&s, message, message_len);
				1128	assert(rc == TC_CRYPTO_SUCCESS);
				1129
				1130	uint8_t h[TC_SHA512_DIGEST_SIZE];
				1131	rc = tc_sha512_final(h, &s);
				1132	assert(rc == TC_CRYPTO_SUCCESS);
				1133
				1134	#endif
				1135
Fabio Utzig	705dfb3	2019-05-11 20:06:37 -0300	[diff] [blame]	1136	x25519_sc_reduce(h);
				1137
				1138	ge_p2 R;
				1139	ge_double_scalarmult_vartime(&R, h, &A, scopy.u8);
				1140
				1141	uint8_t rcheck[32];
				1142	x25519_ge_tobytes(rcheck, &R);
				1143
				1144	return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0;
				1145	}
Fabio Utzig	8fcdb6d	2020-04-02 10:22:28 -0300	[diff] [blame]	1146
				1147	static void fe_cswap(fe f, fe g, fe_limb_t b) {
				1148	b = 0-b;
				1149	for (unsigned i = 0; i < FE_NUM_LIMBS; i++) {
				1150	fe_limb_t x = f->v[i] ^ g->v[i];
				1151	x &= b;
				1152	f->v[i] ^= x;
				1153	g->v[i] ^= x;
				1154	}
				1155	}
				1156
				1157	static void fiat_25519_carry_scmul_121666(uint32_t out1[10], const uint32_t arg1[10]) {
				1158	uint64_t x1 = ((uint64_t)UINT32_C(0x1db42) * (arg1[9]));
				1159	uint64_t x2 = ((uint64_t)UINT32_C(0x1db42) * (arg1[8]));
				1160	uint64_t x3 = ((uint64_t)UINT32_C(0x1db42) * (arg1[7]));
				1161	uint64_t x4 = ((uint64_t)UINT32_C(0x1db42) * (arg1[6]));
				1162	uint64_t x5 = ((uint64_t)UINT32_C(0x1db42) * (arg1[5]));
				1163	uint64_t x6 = ((uint64_t)UINT32_C(0x1db42) * (arg1[4]));
				1164	uint64_t x7 = ((uint64_t)UINT32_C(0x1db42) * (arg1[3]));
				1165	uint64_t x8 = ((uint64_t)UINT32_C(0x1db42) * (arg1[2]));
				1166	uint64_t x9 = ((uint64_t)UINT32_C(0x1db42) * (arg1[1]));
				1167	uint64_t x10 = ((uint64_t)UINT32_C(0x1db42) * (arg1[0]));
				1168	uint32_t x11 = (uint32_t)(x10 >> 26);
				1169	uint32_t x12 = (uint32_t)(x10 & UINT32_C(0x3ffffff));
				1170	uint64_t x13 = (x11 + x9);
				1171	uint32_t x14 = (uint32_t)(x13 >> 25);
				1172	uint32_t x15 = (uint32_t)(x13 & UINT32_C(0x1ffffff));
				1173	uint64_t x16 = (x14 + x8);
				1174	uint32_t x17 = (uint32_t)(x16 >> 26);
				1175	uint32_t x18 = (uint32_t)(x16 & UINT32_C(0x3ffffff));
				1176	uint64_t x19 = (x17 + x7);
				1177	uint32_t x20 = (uint32_t)(x19 >> 25);
				1178	uint32_t x21 = (uint32_t)(x19 & UINT32_C(0x1ffffff));
				1179	uint64_t x22 = (x20 + x6);
				1180	uint32_t x23 = (uint32_t)(x22 >> 26);
				1181	uint32_t x24 = (uint32_t)(x22 & UINT32_C(0x3ffffff));
				1182	uint64_t x25 = (x23 + x5);
				1183	uint32_t x26 = (uint32_t)(x25 >> 25);
				1184	uint32_t x27 = (uint32_t)(x25 & UINT32_C(0x1ffffff));
				1185	uint64_t x28 = (x26 + x4);
				1186	uint32_t x29 = (uint32_t)(x28 >> 26);
				1187	uint32_t x30 = (uint32_t)(x28 & UINT32_C(0x3ffffff));
				1188	uint64_t x31 = (x29 + x3);
				1189	uint32_t x32 = (uint32_t)(x31 >> 25);
				1190	uint32_t x33 = (uint32_t)(x31 & UINT32_C(0x1ffffff));
				1191	uint64_t x34 = (x32 + x2);
				1192	uint32_t x35 = (uint32_t)(x34 >> 26);
				1193	uint32_t x36 = (uint32_t)(x34 & UINT32_C(0x3ffffff));
				1194	uint64_t x37 = (x35 + x1);
				1195	uint32_t x38 = (uint32_t)(x37 >> 25);
				1196	uint32_t x39 = (uint32_t)(x37 & UINT32_C(0x1ffffff));
				1197	uint32_t x40 = (x38 * (uint32_t)UINT8_C(0x13));
				1198	uint32_t x41 = (x12 + x40);
				1199	uint32_t x42 = (x41 >> 26);
				1200	uint32_t x43 = (x41 & UINT32_C(0x3ffffff));
				1201	uint32_t x44 = (x42 + x15);
				1202	uint32_t x45 = (x44 >> 25);
				1203	uint32_t x46 = (x44 & UINT32_C(0x1ffffff));
				1204	uint32_t x47 = (x45 + x18);
				1205	out1[0] = x43;
				1206	out1[1] = x46;
				1207	out1[2] = x47;
				1208	out1[3] = x21;
				1209	out1[4] = x24;
				1210	out1[5] = x27;
				1211	out1[6] = x30;
				1212	out1[7] = x33;
				1213	out1[8] = x36;
				1214	out1[9] = x39;
				1215	}
				1216
				1217	static void fe_mul121666(fe h, const fe_loose f) {
				1218	assert_fe_loose(f->v);
				1219	fiat_25519_carry_scmul_121666(h->v, f->v);
				1220	assert_fe(h->v);
				1221	}
				1222
				1223	static void x25519_scalar_mult_generic(uint8_t out[32],
				1224	const uint8_t scalar[32],
				1225	const uint8_t point[32]) {
				1226	fe x1, x2, z2, x3, z3, tmp0, tmp1;
				1227	fe_loose x2l, z2l, x3l, tmp0l, tmp1l;
				1228
				1229	uint8_t e[32];
				1230	memcpy(e, scalar, 32);
				1231	e[0] &= 248;
				1232	e[31] &= 127;
				1233	e[31] \|= 64;
				1234
				1235	// The following implementation was transcribed to Coq and proven to
				1236	// correspond to unary scalar multiplication in affine coordinates given that
				1237	// x1 != 0 is the x coordinate of some point on the curve. It was also checked
				1238	// in Coq that doing a ladderstep with x1 = x3 = 0 gives z2' = z3' = 0, and z2
				1239	// = z3 = 0 gives z2' = z3' = 0. The statement was quantified over the
				1240	// underlying field, so it applies to Curve25519 itself and the quadratic
				1241	// twist of Curve25519. It was not proven in Coq that prime-field arithmetic
				1242	// correctly simulates extension-field arithmetic on prime-field values.
				1243	// The decoding of the byte array representation of e was not considered.
				1244	// Specification of Montgomery curves in affine coordinates:
				1245	// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
				1246	// Proof that these form a group that is isomorphic to a Weierstrass curve:
				1247	// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
				1248	// Coq transcription and correctness proof of the loop (where scalarbits=255):
				1249	// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
				1250	// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
				1251	// preconditions: 0 <= e < 2^255 (not necessarily e < order), fe_invert(0) = 0
				1252	fe_frombytes(&x1, point);
				1253	fe_1(&x2);
				1254	fe_0(&z2);
				1255	fe_copy(&x3, &x1);
				1256	fe_1(&z3);
				1257
				1258	unsigned swap = 0;
				1259	int pos;
				1260	for (pos = 254; pos >= 0; --pos) {
				1261	// loop invariant as of right before the test, for the case where x1 != 0:
				1262	// pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 is nonzero
				1263	// let r := e >> (pos+1) in the following equalities of projective points:
				1264	// to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
				1265	// to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
				1266	// x1 is the nonzero x coordinate of the nonzero point (rP-(r+1)P)
				1267	unsigned b = 1 & (e[pos / 8] >> (pos & 7));
				1268	swap ^= b;
				1269	fe_cswap(&x2, &x3, swap);
				1270	fe_cswap(&z2, &z3, swap);
				1271	swap = b;
				1272	// Coq transcription of ladderstep formula (called from transcribed loop):
				1273	// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
				1274	// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
				1275	// x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
				1276	// x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
				1277	fe_sub(&tmp0l, &x3, &z3);
				1278	fe_sub(&tmp1l, &x2, &z2);
				1279	fe_add(&x2l, &x2, &z2);
				1280	fe_add(&z2l, &x3, &z3);
				1281	fe_mul_tll(&z3, &tmp0l, &x2l);
				1282	fe_mul_tll(&z2, &z2l, &tmp1l);
				1283	fe_sq_tl(&tmp0, &tmp1l);
				1284	fe_sq_tl(&tmp1, &x2l);
				1285	fe_add(&x3l, &z3, &z2);
				1286	fe_sub(&z2l, &z3, &z2);
				1287	fe_mul_ttt(&x2, &tmp1, &tmp0);
				1288	fe_sub(&tmp1l, &tmp1, &tmp0);
				1289	fe_sq_tl(&z2, &z2l);
				1290	fe_mul121666(&z3, &tmp1l);
				1291	fe_sq_tl(&x3, &x3l);
				1292	fe_add(&tmp0l, &tmp0, &z3);
				1293	fe_mul_ttt(&z3, &x1, &z2);
				1294	fe_mul_tll(&z2, &tmp1l, &tmp0l);
				1295	}
				1296	// here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) else (x2, z2)
				1297	fe_cswap(&x2, &x3, swap);
				1298	fe_cswap(&z2, &z3, swap);
				1299
				1300	fe_invert(&z2, &z2);
				1301	fe_mul_ttt(&x2, &x2, &z2);
				1302	fe_tobytes(out, &x2);
				1303	}
				1304
				1305	int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32],
				1306	const uint8_t peer_public_value[32]) {
				1307	static const uint8_t kZeros[32] = {0};
				1308	x25519_scalar_mult_generic(out_shared_key, private_key, peer_public_value);
				1309	// The all-zero output results when the input is a point of small order.
				1310	return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0;
				1311	}