Improve comb method (less precomputed points)
diff --git a/library/ecp.c b/library/ecp.c
index 7aa98c0..b606a02 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1515,27 +1515,33 @@
* Compute the representation of m that will be used with the comb method.
*
* The basic comb method is described in GECC 3.44 for example. We use a
- * modified version [3] that provides resistance to SPA by avoiding zero
- * digits in the representation. We represent (K_i, s_i) from the paper as a
- * single signed char.
+ * modified version that provides resistance to SPA by avoiding zero
+ * digits in the representation as in [3]. We modify the method further by
+ * requiring that all K_i be odd, which has the small cost that our
+ * representation uses on more K, due to carries.
+ *
+ * Also, for the sake of compactness, only the seven low-order bits of x[i]
+ * are used to represent K_i, and the msb of x[i] encodes the the sign (s_i in
+ * the paper): it is set if and only if if s_i == -1;
*
* Calling conventions:
- * - x is an array of size d
+ * - x is an array of size d + 1
* - w is the size, ie number of teeth, of the comb
- * - m is the MPI, expected to be odd and such that, if l = bitlength(m):
- * ceil( l / w ) <= d (these two assumptions are not checked, an incorrect
- * result my be returned if they are not satisfied)
+ * - m is the MPI, expected to be odd and such that bitlength(m) <= w * d
+ * (the result will be incorrect if these assumptions are not satisfied)
*/
-static void ecp_comb_fixed( signed char x[], size_t d,
+static void ecp_comb_fixed( unsigned char x[], size_t d,
unsigned char w, const mpi *m )
{
size_t i, j;
+ unsigned char c, cc, adjust;
- memset( x, 0, d );
+ memset( x, 0, d+1 );
/* For x[0] use the classical comb value without adjustement */
for( j = 0; j < w; j++ )
x[0] |= mpi_get_bit( m, d * j ) << j;
+ c = 0;
for( i = 1; i < d; i++ )
{
@@ -1543,95 +1549,100 @@
for( j = 0; j < w; j++ )
x[i] |= mpi_get_bit( m, i + d * j ) << j;
- /* Adjust if it's zero */
- if( x[i] == 0 )
- {
- x[i] = x[i-1];
- x[i-1] *= -1;
- }
+ /* Add carry and update it */
+ cc = x[i] & c;
+ x[i] = x[i] ^ c;
+ c = cc;
+
+ /* Make sure x[i] is odd, avoiding if-branches */
+ adjust = 1 - ( x[i] & 0x01 );
+ c |= x[i] & ( x[i-1] * adjust );
+ x[i] = x[i] ^ ( x[i-1] * adjust );
+ x[i-1] |= adjust << 7;
}
+
+ /* Finish with the carry */
+ x[i] = c;
+ adjust = 1 - ( x[i] & 0x01 );
+ c |= x[i] & ( x[i-1] * adjust );
+ x[i] = x[i] ^ ( x[i-1] * adjust );
+ x[i-1] |= adjust << 7;
}
/*
* Precompute points for the comb method
*
- * If i = i_{w-1} ... i_0 is the binary representation of i, then
- * T[i-1] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + i_0 P
+ * If i = i_{w-1} ... i_1 is the binary representation of i, then
+ * T[i] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + P
*
- * T must be able to hold at least 2^w - 1 elements
+ * T must be able to hold at least 2^{w - 1} elements
*/
static int ecp_precompute_comb( const ecp_group *grp,
ecp_point T[], const ecp_point *P,
unsigned char w, size_t d )
{
int ret;
- unsigned char i, mask;
- size_t j, t_len = ( 1U << w ) - 1;
- ecp_point *cur, *TT[t_len - 1];
+ unsigned char i, j, k;
+ ecp_point *cur, *TT[200]; // TODO
/*
- * Compute the 2^{di}
+ * Set T[0] = P and
+ * T[2^{l-1}] = 2^{dl} P for l = 1 .. w-1 (this is not the final value)
*/
MPI_CHK( ecp_copy( &T[0], P ) );
- for( i = 1; i < w; i++ )
+ k = 0;
+ for( i = 1; i < ( 1U << (w-1) ); i <<= 1 )
{
- cur = T + ( 1 << i ) - 1;
- ecp_copy( cur, T + ( 1 << (i-1) ) - 1 );
+ cur = T + i;
+ MPI_CHK( ecp_copy( cur, T + ( i >> 1 ) ) );
for( j = 0; j < d; j++ )
MPI_CHK( ecp_double_jac( grp, cur, cur ) );
- TT[i-1] = cur;
+
+ TT[k++] = cur;
}
- /* P already normalized, so w - 1 points to do */
- ecp_normalize_many( grp, TT, w - 1);
+ ecp_normalize_many( grp, TT, k );
/*
* Compute the remaining ones using the minimal number of additions
+ * Be careful to update T[2^l] only after using it!
*/
- j = 0;
- for( i = 3; i < (1U << w); i++ )
+ k = 0;
+ for( i = 1; i < ( 1U << (w-1) ); i <<= 1 )
{
- if( T[i - 1].X.p != NULL )
- continue;
-
- /* Find the least significant non-zero bit of the index */
- for( mask = 1; mask != 0; mask <<=1 )
- if( ( i & mask ) != 0 )
- break;
-
- /* Use the previously computed values */
- ecp_add_mixed( grp, &T[i - 1], &T[i - mask - 1], &T[mask - 1], +1 );
-
- /* Register for normalisation */
- TT[j++] = &T[i - 1];
+ j = i;
+ while( j-- )
+ {
+ ecp_add_mixed( grp, &T[i + j], &T[j], &T[i], +1 );
+ TT[k++] = &T[i + j];
+ }
}
- ecp_normalize_many( grp, TT, j );
+ ecp_normalize_many( grp, TT, k );
cleanup:
return( ret );
}
/*
- * Select precomputed point: R = sign(i) * T[ abs(i) ]
+ * Select precomputed point: R = sign(i) * T[ abs(i) / 2 ]
*/
static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
- const ecp_point T[], signed char i )
+ const ecp_point T[], unsigned char i )
{
int ret;
- if( i > 0 )
- return( ecp_copy( R, &T[i - 1] ) );
-
- MPI_CHK( ecp_copy( R, &T[-i - 1] ) );
+ /* Ignore the "sign" bit */
+ MPI_CHK( ecp_copy( R, &T[ ( i & 0x7Fu ) >> 1 ] ) );
/*
* -R = (R.X, -R.Y, R.Z), and
* -R.Y mod P = P - R.Y unless R.Y == 0
*/
- if( mpi_cmp_int( &R->Y, 0 ) != 0 )
- MPI_CHK( mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
+ if( ( i & 0x80 ) != 0 )
+ if( mpi_cmp_int( &R->Y, 0 ) != 0 )
+ MPI_CHK( mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
cleanup:
return( ret );
@@ -1642,7 +1653,7 @@
* This part is actually common with the basic comb method (GECC 3.44)
*/
static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
- const ecp_point T[], const signed char x[],
+ const ecp_point T[], const unsigned char x[],
size_t d )
{
int ret;
@@ -1652,7 +1663,7 @@
ecp_point_init( &Txi );
/* Avoid useless doubling/addition of 0 by better initialisation */
- i = d - 1;
+ i = d;
MPI_CHK( ecp_select_comb( grp, R, T, x[i] ) );
while( i-- != 0 )
@@ -1678,7 +1689,7 @@
int ret;
unsigned char w, m_is_odd, p_eq_g;
size_t pre_len, d, i;
- signed char k[100]; // TODO
+ unsigned char k[200]; // TODO
ecp_point Q, *T = NULL, S[2];
mpi M;