blob: 0215caf6582aa2d8da158e07864f77249f46987a [file] [log] [blame]
Fabio Utzig705dfb32019-05-11 20:06:37 -03001// The MIT License (MIT)
2//
3// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23// Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
24// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
25// public domain but parts have been replaced with code generated by Fiat
26// (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
27//
28// The field functions are shared by Ed25519 and X25519 where possible.
29
30#include <assert.h>
31#include <string.h>
32#include <stdint.h>
33
34#include <mbedtls/platform_util.h>
35#include <mbedtls/sha512.h>
36
37#include "curve25519.h"
38// Various pre-computed constants.
39#include "curve25519_tables.h"
40
41#define SHA512_DIGEST_LENGTH 64
42
43// Low-level intrinsic operations
44
45static uint64_t load_3(const uint8_t *in) {
46 uint64_t result;
47 result = (uint64_t)in[0];
48 result |= ((uint64_t)in[1]) << 8;
49 result |= ((uint64_t)in[2]) << 16;
50 return result;
51}
52
53static uint64_t load_4(const uint8_t *in) {
54 uint64_t result;
55 result = (uint64_t)in[0];
56 result |= ((uint64_t)in[1]) << 8;
57 result |= ((uint64_t)in[2]) << 16;
58 result |= ((uint64_t)in[3]) << 24;
59 return result;
60}
61
62
63// Field operations.
64
65typedef uint32_t fe_limb_t;
66#define FE_NUM_LIMBS 10
67
68// assert_fe asserts that |f| satisfies bounds:
69//
70// [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
71// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
72// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
73// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
74// [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
75//
76// See comments in curve25519_32.h for which functions use these bounds for
77// inputs or outputs.
78#define assert_fe(f) \
79 do { \
80 for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
81 assert(f[_assert_fe_i] <= \
82 ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \
83 } \
84 } while (0)
85
86// assert_fe_loose asserts that |f| satisfies bounds:
87//
88// [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
89// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
90// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
91// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
92// [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
93//
94// See comments in curve25519_32.h for which functions use these bounds for
95// inputs or outputs.
96#define assert_fe_loose(f) \
97 do { \
98 for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
99 assert(f[_assert_fe_i] <= \
100 ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \
101 } \
102 } while (0)
103
104//FIXME: use Zephyr macro
105_Static_assert(sizeof(fe) == sizeof(fe_limb_t) * FE_NUM_LIMBS,
106 "fe_limb_t[FE_NUM_LIMBS] is inconsistent with fe");
107
108static void fe_frombytes_strict(fe *h, const uint8_t s[32]) {
109 // |fiat_25519_from_bytes| requires the top-most bit be clear.
110 assert((s[31] & 0x80) == 0);
111 fiat_25519_from_bytes(h->v, s);
112 assert_fe(h->v);
113}
114
115static void fe_frombytes(fe *h, const uint8_t s[32]) {
116 uint8_t s_copy[32];
117 memcpy(s_copy, s, 32);
118 s_copy[31] &= 0x7f;
119 fe_frombytes_strict(h, s_copy);
120}
121
122static void fe_tobytes(uint8_t s[32], const fe *f) {
123 assert_fe(f->v);
124 fiat_25519_to_bytes(s, f->v);
125}
126
127// h = 0
128static void fe_0(fe *h) {
129 mbedtls_platform_zeroize(h, sizeof(fe));
130}
131
132// h = 1
133static void fe_1(fe *h) {
134 mbedtls_platform_zeroize(h, sizeof(fe));
135 h->v[0] = 1;
136}
137
138// h = f + g
139// Can overlap h with f or g.
140static void fe_add(fe_loose *h, const fe *f, const fe *g) {
141 assert_fe(f->v);
142 assert_fe(g->v);
143 fiat_25519_add(h->v, f->v, g->v);
144 assert_fe_loose(h->v);
145}
146
147// h = f - g
148// Can overlap h with f or g.
149static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
150 assert_fe(f->v);
151 assert_fe(g->v);
152 fiat_25519_sub(h->v, f->v, g->v);
153 assert_fe_loose(h->v);
154}
155
156static void fe_carry(fe *h, const fe_loose* f) {
157 assert_fe_loose(f->v);
158 fiat_25519_carry(h->v, f->v);
159 assert_fe(h->v);
160}
161
162static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],
163 const fe_limb_t in1[FE_NUM_LIMBS],
164 const fe_limb_t in2[FE_NUM_LIMBS]) {
165 assert_fe_loose(in1);
166 assert_fe_loose(in2);
167 fiat_25519_carry_mul(out, in1, in2);
168 assert_fe(out);
169}
170
171static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
172 fe_mul_impl(h->v, f->v, g->v);
173}
174
175static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
176 fe_mul_impl(h->v, f->v, g->v);
177}
178
179static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
180 fe_mul_impl(h->v, f->v, g->v);
181}
182
183static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
184 fe_mul_impl(h->v, f->v, g->v);
185}
186
187static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
188 fe_mul_impl(h->v, f->v, g->v);
189}
190
191static void fe_sq_tl(fe *h, const fe_loose *f) {
192 assert_fe_loose(f->v);
193 fiat_25519_carry_square(h->v, f->v);
194 assert_fe(h->v);
195}
196
197static void fe_sq_tt(fe *h, const fe *f) {
198 assert_fe_loose(f->v);
199 fiat_25519_carry_square(h->v, f->v);
200 assert_fe(h->v);
201}
202
203// h = -f
204static void fe_neg(fe_loose *h, const fe *f) {
205 assert_fe(f->v);
206 fiat_25519_opp(h->v, f->v);
207 assert_fe_loose(h->v);
208}
209
210// h = f
211static void fe_copy(fe *h, const fe *f) {
212 memmove(h, f, sizeof(fe));
213}
214
215static void fe_copy_lt(fe_loose *h, const fe *f) {
216 //FIXME: use Zephyr macro
217 _Static_assert(sizeof(fe_loose) == sizeof(fe), "fe and fe_loose mismatch");
218 memmove(h, f, sizeof(fe));
219}
220
221static void fe_loose_invert(fe *out, const fe_loose *z) {
222 fe t0;
223 fe t1;
224 fe t2;
225 fe t3;
226 int i;
227
228 fe_sq_tl(&t0, z);
229 fe_sq_tt(&t1, &t0);
230 for (i = 1; i < 2; ++i) {
231 fe_sq_tt(&t1, &t1);
232 }
233 fe_mul_tlt(&t1, z, &t1);
234 fe_mul_ttt(&t0, &t0, &t1);
235 fe_sq_tt(&t2, &t0);
236 fe_mul_ttt(&t1, &t1, &t2);
237 fe_sq_tt(&t2, &t1);
238 for (i = 1; i < 5; ++i) {
239 fe_sq_tt(&t2, &t2);
240 }
241 fe_mul_ttt(&t1, &t2, &t1);
242 fe_sq_tt(&t2, &t1);
243 for (i = 1; i < 10; ++i) {
244 fe_sq_tt(&t2, &t2);
245 }
246 fe_mul_ttt(&t2, &t2, &t1);
247 fe_sq_tt(&t3, &t2);
248 for (i = 1; i < 20; ++i) {
249 fe_sq_tt(&t3, &t3);
250 }
251 fe_mul_ttt(&t2, &t3, &t2);
252 fe_sq_tt(&t2, &t2);
253 for (i = 1; i < 10; ++i) {
254 fe_sq_tt(&t2, &t2);
255 }
256 fe_mul_ttt(&t1, &t2, &t1);
257 fe_sq_tt(&t2, &t1);
258 for (i = 1; i < 50; ++i) {
259 fe_sq_tt(&t2, &t2);
260 }
261 fe_mul_ttt(&t2, &t2, &t1);
262 fe_sq_tt(&t3, &t2);
263 for (i = 1; i < 100; ++i) {
264 fe_sq_tt(&t3, &t3);
265 }
266 fe_mul_ttt(&t2, &t3, &t2);
267 fe_sq_tt(&t2, &t2);
268 for (i = 1; i < 50; ++i) {
269 fe_sq_tt(&t2, &t2);
270 }
271 fe_mul_ttt(&t1, &t2, &t1);
272 fe_sq_tt(&t1, &t1);
273 for (i = 1; i < 5; ++i) {
274 fe_sq_tt(&t1, &t1);
275 }
276 fe_mul_ttt(out, &t1, &t0);
277}
278
279static void fe_invert(fe *out, const fe *z) {
280 fe_loose l;
281 fe_copy_lt(&l, z);
282 fe_loose_invert(out, &l);
283}
284
285static int CRYPTO_memcmp(const void *in_a, const void *in_b, size_t len) {
286 const uint8_t *a = in_a;
287 const uint8_t *b = in_b;
288 uint8_t x = 0;
289
290 for (size_t i = 0; i < len; i++) {
291 x |= a[i] ^ b[i];
292 }
293
294 return x;
295}
296
297// return 0 if f == 0
298// return 1 if f != 0
299static int fe_isnonzero(const fe_loose *f) {
300 fe tight;
301 fe_carry(&tight, f);
302 uint8_t s[32];
303 fe_tobytes(s, &tight);
304
305 static const uint8_t zero[32] = {0};
306 return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
307}
308
309// return 1 if f is in {1,3,5,...,q-2}
310// return 0 if f is in {0,2,4,...,q-1}
311static int fe_isnegative(const fe *f) {
312 uint8_t s[32];
313 fe_tobytes(s, f);
314 return s[0] & 1;
315}
316
317static void fe_sq2_tt(fe *h, const fe *f) {
318 // h = f^2
319 fe_sq_tt(h, f);
320
321 // h = h + h
322 fe_loose tmp;
323 fe_add(&tmp, h, h);
324 fe_carry(h, &tmp);
325}
326
327static void fe_pow22523(fe *out, const fe *z) {
328 fe t0;
329 fe t1;
330 fe t2;
331 int i;
332
333 fe_sq_tt(&t0, z);
334 fe_sq_tt(&t1, &t0);
335 for (i = 1; i < 2; ++i) {
336 fe_sq_tt(&t1, &t1);
337 }
338 fe_mul_ttt(&t1, z, &t1);
339 fe_mul_ttt(&t0, &t0, &t1);
340 fe_sq_tt(&t0, &t0);
341 fe_mul_ttt(&t0, &t1, &t0);
342 fe_sq_tt(&t1, &t0);
343 for (i = 1; i < 5; ++i) {
344 fe_sq_tt(&t1, &t1);
345 }
346 fe_mul_ttt(&t0, &t1, &t0);
347 fe_sq_tt(&t1, &t0);
348 for (i = 1; i < 10; ++i) {
349 fe_sq_tt(&t1, &t1);
350 }
351 fe_mul_ttt(&t1, &t1, &t0);
352 fe_sq_tt(&t2, &t1);
353 for (i = 1; i < 20; ++i) {
354 fe_sq_tt(&t2, &t2);
355 }
356 fe_mul_ttt(&t1, &t2, &t1);
357 fe_sq_tt(&t1, &t1);
358 for (i = 1; i < 10; ++i) {
359 fe_sq_tt(&t1, &t1);
360 }
361 fe_mul_ttt(&t0, &t1, &t0);
362 fe_sq_tt(&t1, &t0);
363 for (i = 1; i < 50; ++i) {
364 fe_sq_tt(&t1, &t1);
365 }
366 fe_mul_ttt(&t1, &t1, &t0);
367 fe_sq_tt(&t2, &t1);
368 for (i = 1; i < 100; ++i) {
369 fe_sq_tt(&t2, &t2);
370 }
371 fe_mul_ttt(&t1, &t2, &t1);
372 fe_sq_tt(&t1, &t1);
373 for (i = 1; i < 50; ++i) {
374 fe_sq_tt(&t1, &t1);
375 }
376 fe_mul_ttt(&t0, &t1, &t0);
377 fe_sq_tt(&t0, &t0);
378 for (i = 1; i < 2; ++i) {
379 fe_sq_tt(&t0, &t0);
380 }
381 fe_mul_ttt(out, &t0, z);
382}
383
384
385// Group operations.
386
387void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
388 fe recip;
389 fe x;
390 fe y;
391
392 fe_invert(&recip, &h->Z);
393 fe_mul_ttt(&x, &h->X, &recip);
394 fe_mul_ttt(&y, &h->Y, &recip);
395 fe_tobytes(s, &y);
396 s[31] ^= fe_isnegative(&x) << 7;
397}
398
399int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) {
400 fe u;
401 fe_loose v;
402 fe v3;
403 fe vxx;
404 fe_loose check;
405
406 fe_frombytes(&h->Y, s);
407 fe_1(&h->Z);
408 fe_sq_tt(&v3, &h->Y);
409 fe_mul_ttt(&vxx, &v3, &d);
410 fe_sub(&v, &v3, &h->Z); // u = y^2-1
411 fe_carry(&u, &v);
412 fe_add(&v, &vxx, &h->Z); // v = dy^2+1
413
414 fe_sq_tl(&v3, &v);
415 fe_mul_ttl(&v3, &v3, &v); // v3 = v^3
416 fe_sq_tt(&h->X, &v3);
417 fe_mul_ttl(&h->X, &h->X, &v);
418 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7
419
420 fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8)
421 fe_mul_ttt(&h->X, &h->X, &v3);
422 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8)
423
424 fe_sq_tt(&vxx, &h->X);
425 fe_mul_ttl(&vxx, &vxx, &v);
426 fe_sub(&check, &vxx, &u);
427 if (fe_isnonzero(&check)) {
428 fe_add(&check, &vxx, &u);
429 if (fe_isnonzero(&check)) {
430 return 0;
431 }
432 fe_mul_ttt(&h->X, &h->X, &sqrtm1);
433 }
434
435 if (fe_isnegative(&h->X) != (s[31] >> 7)) {
436 fe_loose t;
437 fe_neg(&t, &h->X);
438 fe_carry(&h->X, &t);
439 }
440
441 fe_mul_ttt(&h->T, &h->X, &h->Y);
442 return 1;
443}
444
445static void ge_p2_0(ge_p2 *h) {
446 fe_0(&h->X);
447 fe_1(&h->Y);
448 fe_1(&h->Z);
449}
450
451// r = p
452static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
453 fe_copy(&r->X, &p->X);
454 fe_copy(&r->Y, &p->Y);
455 fe_copy(&r->Z, &p->Z);
456}
457
458// r = p
459void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
460 fe_add(&r->YplusX, &p->Y, &p->X);
461 fe_sub(&r->YminusX, &p->Y, &p->X);
462 fe_copy_lt(&r->Z, &p->Z);
463 fe_mul_ltt(&r->T2d, &p->T, &d2);
464}
465
466// r = p
467void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
468 fe_mul_tll(&r->X, &p->X, &p->T);
469 fe_mul_tll(&r->Y, &p->Y, &p->Z);
470 fe_mul_tll(&r->Z, &p->Z, &p->T);
471}
472
473// r = p
474void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
475 fe_mul_tll(&r->X, &p->X, &p->T);
476 fe_mul_tll(&r->Y, &p->Y, &p->Z);
477 fe_mul_tll(&r->Z, &p->Z, &p->T);
478 fe_mul_tll(&r->T, &p->X, &p->Y);
479}
480
481// r = 2 * p
482static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
483 fe trX, trZ, trT;
484 fe t0;
485
486 fe_sq_tt(&trX, &p->X);
487 fe_sq_tt(&trZ, &p->Y);
488 fe_sq2_tt(&trT, &p->Z);
489 fe_add(&r->Y, &p->X, &p->Y);
490 fe_sq_tl(&t0, &r->Y);
491
492 fe_add(&r->Y, &trZ, &trX);
493 fe_sub(&r->Z, &trZ, &trX);
494 fe_carry(&trZ, &r->Y);
495 fe_sub(&r->X, &t0, &trZ);
496 fe_carry(&trZ, &r->Z);
497 fe_sub(&r->T, &trT, &trZ);
498}
499
500// r = 2 * p
501static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
502 ge_p2 q;
503 ge_p3_to_p2(&q, p);
504 ge_p2_dbl(r, &q);
505}
506
507// r = p + q
508static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
509 fe trY, trZ, trT;
510
511 fe_add(&r->X, &p->Y, &p->X);
512 fe_sub(&r->Y, &p->Y, &p->X);
513 fe_mul_tll(&trZ, &r->X, &q->yplusx);
514 fe_mul_tll(&trY, &r->Y, &q->yminusx);
515 fe_mul_tlt(&trT, &q->xy2d, &p->T);
516 fe_add(&r->T, &p->Z, &p->Z);
517 fe_sub(&r->X, &trZ, &trY);
518 fe_add(&r->Y, &trZ, &trY);
519 fe_carry(&trZ, &r->T);
520 fe_add(&r->Z, &trZ, &trT);
521 fe_sub(&r->T, &trZ, &trT);
522}
523
524// r = p - q
525static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
526 fe trY, trZ, trT;
527
528 fe_add(&r->X, &p->Y, &p->X);
529 fe_sub(&r->Y, &p->Y, &p->X);
530 fe_mul_tll(&trZ, &r->X, &q->yminusx);
531 fe_mul_tll(&trY, &r->Y, &q->yplusx);
532 fe_mul_tlt(&trT, &q->xy2d, &p->T);
533 fe_add(&r->T, &p->Z, &p->Z);
534 fe_sub(&r->X, &trZ, &trY);
535 fe_add(&r->Y, &trZ, &trY);
536 fe_carry(&trZ, &r->T);
537 fe_sub(&r->Z, &trZ, &trT);
538 fe_add(&r->T, &trZ, &trT);
539}
540
541// r = p + q
542void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
543 fe trX, trY, trZ, trT;
544
545 fe_add(&r->X, &p->Y, &p->X);
546 fe_sub(&r->Y, &p->Y, &p->X);
547 fe_mul_tll(&trZ, &r->X, &q->YplusX);
548 fe_mul_tll(&trY, &r->Y, &q->YminusX);
549 fe_mul_tlt(&trT, &q->T2d, &p->T);
550 fe_mul_ttl(&trX, &p->Z, &q->Z);
551 fe_add(&r->T, &trX, &trX);
552 fe_sub(&r->X, &trZ, &trY);
553 fe_add(&r->Y, &trZ, &trY);
554 fe_carry(&trZ, &r->T);
555 fe_add(&r->Z, &trZ, &trT);
556 fe_sub(&r->T, &trZ, &trT);
557}
558
559// r = p - q
560void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
561 fe trX, trY, trZ, trT;
562
563 fe_add(&r->X, &p->Y, &p->X);
564 fe_sub(&r->Y, &p->Y, &p->X);
565 fe_mul_tll(&trZ, &r->X, &q->YminusX);
566 fe_mul_tll(&trY, &r->Y, &q->YplusX);
567 fe_mul_tlt(&trT, &q->T2d, &p->T);
568 fe_mul_ttl(&trX, &p->Z, &q->Z);
569 fe_add(&r->T, &trX, &trX);
570 fe_sub(&r->X, &trZ, &trY);
571 fe_add(&r->Y, &trZ, &trY);
572 fe_carry(&trZ, &r->T);
573 fe_sub(&r->Z, &trZ, &trT);
574 fe_add(&r->T, &trZ, &trT);
575}
576
577static void slide(signed char *r, const uint8_t *a) {
578 int i;
579 int b;
580 int k;
581
582 for (i = 0; i < 256; ++i) {
583 r[i] = 1 & (a[i >> 3] >> (i & 7));
584 }
585
586 for (i = 0; i < 256; ++i) {
587 if (r[i]) {
588 for (b = 1; b <= 6 && i + b < 256; ++b) {
589 if (r[i + b]) {
590 if (r[i] + (r[i + b] << b) <= 15) {
591 r[i] += r[i + b] << b;
592 r[i + b] = 0;
593 } else if (r[i] - (r[i + b] << b) >= -15) {
594 r[i] -= r[i + b] << b;
595 for (k = i + b; k < 256; ++k) {
596 if (!r[k]) {
597 r[k] = 1;
598 break;
599 }
600 r[k] = 0;
601 }
602 } else {
603 break;
604 }
605 }
606 }
607 }
608 }
609}
610
611// r = a * A + b * B
612// where a = a[0]+256*a[1]+...+256^31 a[31].
613// and b = b[0]+256*b[1]+...+256^31 b[31].
614// B is the Ed25519 base point (x,4/5) with x positive.
615static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
616 const ge_p3 *A, const uint8_t *b) {
617 signed char aslide[256];
618 signed char bslide[256];
619 ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A
620 ge_p1p1 t;
621 ge_p3 u;
622 ge_p3 A2;
623 int i;
624
625 slide(aslide, a);
626 slide(bslide, b);
627
628 x25519_ge_p3_to_cached(&Ai[0], A);
629 ge_p3_dbl(&t, A);
630 x25519_ge_p1p1_to_p3(&A2, &t);
631 x25519_ge_add(&t, &A2, &Ai[0]);
632 x25519_ge_p1p1_to_p3(&u, &t);
633 x25519_ge_p3_to_cached(&Ai[1], &u);
634 x25519_ge_add(&t, &A2, &Ai[1]);
635 x25519_ge_p1p1_to_p3(&u, &t);
636 x25519_ge_p3_to_cached(&Ai[2], &u);
637 x25519_ge_add(&t, &A2, &Ai[2]);
638 x25519_ge_p1p1_to_p3(&u, &t);
639 x25519_ge_p3_to_cached(&Ai[3], &u);
640 x25519_ge_add(&t, &A2, &Ai[3]);
641 x25519_ge_p1p1_to_p3(&u, &t);
642 x25519_ge_p3_to_cached(&Ai[4], &u);
643 x25519_ge_add(&t, &A2, &Ai[4]);
644 x25519_ge_p1p1_to_p3(&u, &t);
645 x25519_ge_p3_to_cached(&Ai[5], &u);
646 x25519_ge_add(&t, &A2, &Ai[5]);
647 x25519_ge_p1p1_to_p3(&u, &t);
648 x25519_ge_p3_to_cached(&Ai[6], &u);
649 x25519_ge_add(&t, &A2, &Ai[6]);
650 x25519_ge_p1p1_to_p3(&u, &t);
651 x25519_ge_p3_to_cached(&Ai[7], &u);
652
653 ge_p2_0(r);
654
655 for (i = 255; i >= 0; --i) {
656 if (aslide[i] || bslide[i]) {
657 break;
658 }
659 }
660
661 for (; i >= 0; --i) {
662 ge_p2_dbl(&t, r);
663
664 if (aslide[i] > 0) {
665 x25519_ge_p1p1_to_p3(&u, &t);
666 x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]);
667 } else if (aslide[i] < 0) {
668 x25519_ge_p1p1_to_p3(&u, &t);
669 x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
670 }
671
672 if (bslide[i] > 0) {
673 x25519_ge_p1p1_to_p3(&u, &t);
674 ge_madd(&t, &u, &Bi[bslide[i] / 2]);
675 } else if (bslide[i] < 0) {
676 x25519_ge_p1p1_to_p3(&u, &t);
677 ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
678 }
679
680 x25519_ge_p1p1_to_p2(r, &t);
681 }
682}
683
684// int64_lshift21 returns |a << 21| but is defined when shifting bits into the
685// sign bit. This works around a language flaw in C.
686static inline int64_t int64_lshift21(int64_t a) {
687 return (int64_t)((uint64_t)a << 21);
688}
689
690// The set of scalars is \Z/l
691// where l = 2^252 + 27742317777372353535851937790883648493.
692
693// Input:
694// s[0]+256*s[1]+...+256^63*s[63] = s
695//
696// Output:
697// s[0]+256*s[1]+...+256^31*s[31] = s mod l
698// where l = 2^252 + 27742317777372353535851937790883648493.
699// Overwrites s in place.
700void x25519_sc_reduce(uint8_t s[64]) {
701 int64_t s0 = 2097151 & load_3(s);
702 int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
703 int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
704 int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
705 int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
706 int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
707 int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
708 int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
709 int64_t s8 = 2097151 & load_3(s + 21);
710 int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
711 int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
712 int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
713 int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
714 int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
715 int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
716 int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
717 int64_t s16 = 2097151 & load_3(s + 42);
718 int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
719 int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
720 int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
721 int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
722 int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
723 int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
724 int64_t s23 = (load_4(s + 60) >> 3);
725 int64_t carry0;
726 int64_t carry1;
727 int64_t carry2;
728 int64_t carry3;
729 int64_t carry4;
730 int64_t carry5;
731 int64_t carry6;
732 int64_t carry7;
733 int64_t carry8;
734 int64_t carry9;
735 int64_t carry10;
736 int64_t carry11;
737 int64_t carry12;
738 int64_t carry13;
739 int64_t carry14;
740 int64_t carry15;
741 int64_t carry16;
742
743 s11 += s23 * 666643;
744 s12 += s23 * 470296;
745 s13 += s23 * 654183;
746 s14 -= s23 * 997805;
747 s15 += s23 * 136657;
748 s16 -= s23 * 683901;
749 s23 = 0;
750
751 s10 += s22 * 666643;
752 s11 += s22 * 470296;
753 s12 += s22 * 654183;
754 s13 -= s22 * 997805;
755 s14 += s22 * 136657;
756 s15 -= s22 * 683901;
757 s22 = 0;
758
759 s9 += s21 * 666643;
760 s10 += s21 * 470296;
761 s11 += s21 * 654183;
762 s12 -= s21 * 997805;
763 s13 += s21 * 136657;
764 s14 -= s21 * 683901;
765 s21 = 0;
766
767 s8 += s20 * 666643;
768 s9 += s20 * 470296;
769 s10 += s20 * 654183;
770 s11 -= s20 * 997805;
771 s12 += s20 * 136657;
772 s13 -= s20 * 683901;
773 s20 = 0;
774
775 s7 += s19 * 666643;
776 s8 += s19 * 470296;
777 s9 += s19 * 654183;
778 s10 -= s19 * 997805;
779 s11 += s19 * 136657;
780 s12 -= s19 * 683901;
781 s19 = 0;
782
783 s6 += s18 * 666643;
784 s7 += s18 * 470296;
785 s8 += s18 * 654183;
786 s9 -= s18 * 997805;
787 s10 += s18 * 136657;
788 s11 -= s18 * 683901;
789 s18 = 0;
790
791 carry6 = (s6 + (1 << 20)) >> 21;
792 s7 += carry6;
793 s6 -= int64_lshift21(carry6);
794 carry8 = (s8 + (1 << 20)) >> 21;
795 s9 += carry8;
796 s8 -= int64_lshift21(carry8);
797 carry10 = (s10 + (1 << 20)) >> 21;
798 s11 += carry10;
799 s10 -= int64_lshift21(carry10);
800 carry12 = (s12 + (1 << 20)) >> 21;
801 s13 += carry12;
802 s12 -= int64_lshift21(carry12);
803 carry14 = (s14 + (1 << 20)) >> 21;
804 s15 += carry14;
805 s14 -= int64_lshift21(carry14);
806 carry16 = (s16 + (1 << 20)) >> 21;
807 s17 += carry16;
808 s16 -= int64_lshift21(carry16);
809
810 carry7 = (s7 + (1 << 20)) >> 21;
811 s8 += carry7;
812 s7 -= int64_lshift21(carry7);
813 carry9 = (s9 + (1 << 20)) >> 21;
814 s10 += carry9;
815 s9 -= int64_lshift21(carry9);
816 carry11 = (s11 + (1 << 20)) >> 21;
817 s12 += carry11;
818 s11 -= int64_lshift21(carry11);
819 carry13 = (s13 + (1 << 20)) >> 21;
820 s14 += carry13;
821 s13 -= int64_lshift21(carry13);
822 carry15 = (s15 + (1 << 20)) >> 21;
823 s16 += carry15;
824 s15 -= int64_lshift21(carry15);
825
826 s5 += s17 * 666643;
827 s6 += s17 * 470296;
828 s7 += s17 * 654183;
829 s8 -= s17 * 997805;
830 s9 += s17 * 136657;
831 s10 -= s17 * 683901;
832 s17 = 0;
833
834 s4 += s16 * 666643;
835 s5 += s16 * 470296;
836 s6 += s16 * 654183;
837 s7 -= s16 * 997805;
838 s8 += s16 * 136657;
839 s9 -= s16 * 683901;
840 s16 = 0;
841
842 s3 += s15 * 666643;
843 s4 += s15 * 470296;
844 s5 += s15 * 654183;
845 s6 -= s15 * 997805;
846 s7 += s15 * 136657;
847 s8 -= s15 * 683901;
848 s15 = 0;
849
850 s2 += s14 * 666643;
851 s3 += s14 * 470296;
852 s4 += s14 * 654183;
853 s5 -= s14 * 997805;
854 s6 += s14 * 136657;
855 s7 -= s14 * 683901;
856 s14 = 0;
857
858 s1 += s13 * 666643;
859 s2 += s13 * 470296;
860 s3 += s13 * 654183;
861 s4 -= s13 * 997805;
862 s5 += s13 * 136657;
863 s6 -= s13 * 683901;
864 s13 = 0;
865
866 s0 += s12 * 666643;
867 s1 += s12 * 470296;
868 s2 += s12 * 654183;
869 s3 -= s12 * 997805;
870 s4 += s12 * 136657;
871 s5 -= s12 * 683901;
872 s12 = 0;
873
874 carry0 = (s0 + (1 << 20)) >> 21;
875 s1 += carry0;
876 s0 -= int64_lshift21(carry0);
877 carry2 = (s2 + (1 << 20)) >> 21;
878 s3 += carry2;
879 s2 -= int64_lshift21(carry2);
880 carry4 = (s4 + (1 << 20)) >> 21;
881 s5 += carry4;
882 s4 -= int64_lshift21(carry4);
883 carry6 = (s6 + (1 << 20)) >> 21;
884 s7 += carry6;
885 s6 -= int64_lshift21(carry6);
886 carry8 = (s8 + (1 << 20)) >> 21;
887 s9 += carry8;
888 s8 -= int64_lshift21(carry8);
889 carry10 = (s10 + (1 << 20)) >> 21;
890 s11 += carry10;
891 s10 -= int64_lshift21(carry10);
892
893 carry1 = (s1 + (1 << 20)) >> 21;
894 s2 += carry1;
895 s1 -= int64_lshift21(carry1);
896 carry3 = (s3 + (1 << 20)) >> 21;
897 s4 += carry3;
898 s3 -= int64_lshift21(carry3);
899 carry5 = (s5 + (1 << 20)) >> 21;
900 s6 += carry5;
901 s5 -= int64_lshift21(carry5);
902 carry7 = (s7 + (1 << 20)) >> 21;
903 s8 += carry7;
904 s7 -= int64_lshift21(carry7);
905 carry9 = (s9 + (1 << 20)) >> 21;
906 s10 += carry9;
907 s9 -= int64_lshift21(carry9);
908 carry11 = (s11 + (1 << 20)) >> 21;
909 s12 += carry11;
910 s11 -= int64_lshift21(carry11);
911
912 s0 += s12 * 666643;
913 s1 += s12 * 470296;
914 s2 += s12 * 654183;
915 s3 -= s12 * 997805;
916 s4 += s12 * 136657;
917 s5 -= s12 * 683901;
918 s12 = 0;
919
920 carry0 = s0 >> 21;
921 s1 += carry0;
922 s0 -= int64_lshift21(carry0);
923 carry1 = s1 >> 21;
924 s2 += carry1;
925 s1 -= int64_lshift21(carry1);
926 carry2 = s2 >> 21;
927 s3 += carry2;
928 s2 -= int64_lshift21(carry2);
929 carry3 = s3 >> 21;
930 s4 += carry3;
931 s3 -= int64_lshift21(carry3);
932 carry4 = s4 >> 21;
933 s5 += carry4;
934 s4 -= int64_lshift21(carry4);
935 carry5 = s5 >> 21;
936 s6 += carry5;
937 s5 -= int64_lshift21(carry5);
938 carry6 = s6 >> 21;
939 s7 += carry6;
940 s6 -= int64_lshift21(carry6);
941 carry7 = s7 >> 21;
942 s8 += carry7;
943 s7 -= int64_lshift21(carry7);
944 carry8 = s8 >> 21;
945 s9 += carry8;
946 s8 -= int64_lshift21(carry8);
947 carry9 = s9 >> 21;
948 s10 += carry9;
949 s9 -= int64_lshift21(carry9);
950 carry10 = s10 >> 21;
951 s11 += carry10;
952 s10 -= int64_lshift21(carry10);
953 carry11 = s11 >> 21;
954 s12 += carry11;
955 s11 -= int64_lshift21(carry11);
956
957 s0 += s12 * 666643;
958 s1 += s12 * 470296;
959 s2 += s12 * 654183;
960 s3 -= s12 * 997805;
961 s4 += s12 * 136657;
962 s5 -= s12 * 683901;
963 s12 = 0;
964
965 carry0 = s0 >> 21;
966 s1 += carry0;
967 s0 -= int64_lshift21(carry0);
968 carry1 = s1 >> 21;
969 s2 += carry1;
970 s1 -= int64_lshift21(carry1);
971 carry2 = s2 >> 21;
972 s3 += carry2;
973 s2 -= int64_lshift21(carry2);
974 carry3 = s3 >> 21;
975 s4 += carry3;
976 s3 -= int64_lshift21(carry3);
977 carry4 = s4 >> 21;
978 s5 += carry4;
979 s4 -= int64_lshift21(carry4);
980 carry5 = s5 >> 21;
981 s6 += carry5;
982 s5 -= int64_lshift21(carry5);
983 carry6 = s6 >> 21;
984 s7 += carry6;
985 s6 -= int64_lshift21(carry6);
986 carry7 = s7 >> 21;
987 s8 += carry7;
988 s7 -= int64_lshift21(carry7);
989 carry8 = s8 >> 21;
990 s9 += carry8;
991 s8 -= int64_lshift21(carry8);
992 carry9 = s9 >> 21;
993 s10 += carry9;
994 s9 -= int64_lshift21(carry9);
995 carry10 = s10 >> 21;
996 s11 += carry10;
997 s10 -= int64_lshift21(carry10);
998
999 s[0] = s0 >> 0;
1000 s[1] = s0 >> 8;
1001 s[2] = (s0 >> 16) | (s1 << 5);
1002 s[3] = s1 >> 3;
1003 s[4] = s1 >> 11;
1004 s[5] = (s1 >> 19) | (s2 << 2);
1005 s[6] = s2 >> 6;
1006 s[7] = (s2 >> 14) | (s3 << 7);
1007 s[8] = s3 >> 1;
1008 s[9] = s3 >> 9;
1009 s[10] = (s3 >> 17) | (s4 << 4);
1010 s[11] = s4 >> 4;
1011 s[12] = s4 >> 12;
1012 s[13] = (s4 >> 20) | (s5 << 1);
1013 s[14] = s5 >> 7;
1014 s[15] = (s5 >> 15) | (s6 << 6);
1015 s[16] = s6 >> 2;
1016 s[17] = s6 >> 10;
1017 s[18] = (s6 >> 18) | (s7 << 3);
1018 s[19] = s7 >> 5;
1019 s[20] = s7 >> 13;
1020 s[21] = s8 >> 0;
1021 s[22] = s8 >> 8;
1022 s[23] = (s8 >> 16) | (s9 << 5);
1023 s[24] = s9 >> 3;
1024 s[25] = s9 >> 11;
1025 s[26] = (s9 >> 19) | (s10 << 2);
1026 s[27] = s10 >> 6;
1027 s[28] = (s10 >> 14) | (s11 << 7);
1028 s[29] = s11 >> 1;
1029 s[30] = s11 >> 9;
1030 s[31] = s11 >> 17;
1031}
1032
1033int ED25519_verify(const uint8_t *message, size_t message_len,
1034 const uint8_t signature[64], const uint8_t public_key[32]) {
1035 ge_p3 A;
1036 if ((signature[63] & 224) != 0 ||
1037 !x25519_ge_frombytes_vartime(&A, public_key)) {
1038 return 0;
1039 }
1040
1041 fe_loose t;
1042 fe_neg(&t, &A.X);
1043 fe_carry(&A.X, &t);
1044 fe_neg(&t, &A.T);
1045 fe_carry(&A.T, &t);
1046
1047 uint8_t pkcopy[32];
1048 memcpy(pkcopy, public_key, 32);
1049 uint8_t rcopy[32];
1050 memcpy(rcopy, signature, 32);
1051 union {
1052 uint64_t u64[4];
1053 uint8_t u8[32];
1054 } scopy;
1055 memcpy(&scopy.u8[0], signature + 32, 32);
1056
1057 // https://tools.ietf.org/html/rfc8032#section-5.1.7 requires that s be in
1058 // the range [0, order) in order to prevent signature malleability.
1059
1060 // kOrder is the order of Curve25519 in little-endian form.
1061 static const uint64_t kOrder[4] = {
1062 UINT64_C(0x5812631a5cf5d3ed),
1063 UINT64_C(0x14def9dea2f79cd6),
1064 0,
1065 UINT64_C(0x1000000000000000),
1066 };
1067 for (size_t i = 3;; i--) {
1068 if (scopy.u64[i] > kOrder[i]) {
1069 return 0;
1070 } else if (scopy.u64[i] < kOrder[i]) {
1071 break;
1072 } else if (i == 0) {
1073 return 0;
1074 }
1075 }
1076
1077 mbedtls_sha512_context ctx;
1078 mbedtls_sha512_init(&ctx);
1079 int ret;
1080 ret = mbedtls_sha512_starts_ret(&ctx, 0);
1081 assert(ret == 0);
1082
1083 ret = mbedtls_sha512_update_ret(&ctx, signature, 32);
1084 assert(ret == 0);
1085 ret = mbedtls_sha512_update_ret(&ctx, public_key, 32);
1086 assert(ret == 0);
1087 ret = mbedtls_sha512_update_ret(&ctx, message, message_len);
1088 assert(ret == 0);
1089
1090 uint8_t h[SHA512_DIGEST_LENGTH];
1091 ret = mbedtls_sha512_finish_ret(&ctx, h);
1092 assert(ret == 0);
1093 mbedtls_sha512_free(&ctx);
1094
1095 x25519_sc_reduce(h);
1096
1097 ge_p2 R;
1098 ge_double_scalarmult_vartime(&R, h, &A, scopy.u8);
1099
1100 uint8_t rcheck[32];
1101 x25519_ge_tobytes(rcheck, &R);
1102
1103 return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0;
1104}