Optimize w in the comb method
diff --git a/library/ecp.c b/library/ecp.c
index bba69fe..0cefd0a 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -870,6 +870,7 @@
/*
* Normalize jacobian coordinates so that Z == 0 || Z == 1 (GECC 3.2.1)
+ * Cost: 1N := 1I + 3M + 1S
*/
static int ecp_normalize( const ecp_group *grp, ecp_point *pt )
{
@@ -914,6 +915,8 @@
*
* Warning: fails (returning an error) if one of the points is zero!
* This should never happen, see choice of w in ecp_mul().
+ *
+ * Cost: 1N(t) := 1I + (6t - 3)M + 1S
*/
static int ecp_normalize_many( const ecp_group *grp,
ecp_point *T[], size_t t_len )
@@ -992,6 +995,8 @@
* with heavy variable renaming, some reordering and one minor modification
* (a = 2 * b, c = d - 2a replaced with c = d, c = c - b, c = c - b)
* in order to use a lot less intermediate variables (6 vs 25).
+ *
+ * Cost: 1D := 2M + 8S
*/
static int ecp_double_jac( const ecp_group *grp, ecp_point *R,
const ecp_point *P )
@@ -1052,6 +1057,8 @@
* If sign >= 0, perform addition, otherwise perform subtraction,
* taking advantage of the fact that, for Q != 0, we have
* -Q = (Q.X, -Q.Y, Q.Z)
+ *
+ * Cost: 1A := 8M + 3S
*/
static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
const ecp_point *P, const ecp_point *Q,
@@ -1153,6 +1160,7 @@
/*
* Addition: R = P + Q, result's coordinates normalized
+ * Cost: 1A + 1N = 1I + 11M + 4S
*/
int ecp_add( const ecp_group *grp, ecp_point *R,
const ecp_point *P, const ecp_point *Q )
@@ -1168,6 +1176,7 @@
/*
* Subtraction: R = P - Q, result's coordinates normalized
+ * Cost: 1A + 1N = 1I + 11M + 4S
*/
int ecp_sub( const ecp_group *grp, ecp_point *R,
const ecp_point *P, const ecp_point *Q )
@@ -1589,7 +1598,9 @@
* If i = i_{w-1} ... i_1 is the binary representation of i, then
* T[i] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + P
*
- * T must be able to hold at least 2^{w - 1} elements
+ * T must be able to hold 2^{w - 1} elements
+ *
+ * Cost: d(w-1) D + (2^{w-1} - 1) A + 1 N(w-1) + 1 N(2^{w-1} - 1)
*/
static int ecp_precompute_comb( const ecp_group *grp,
ecp_point T[], const ecp_point *P,
@@ -1666,6 +1677,8 @@
/*
* Core multiplication algorithm for the (modified) comb method.
* This part is actually common with the basic comb method (GECC 3.44)
+ *
+ * Cost: d A + d D + 1 R
*/
static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
const ecp_point T[],
@@ -1699,7 +1712,7 @@
}
/*
- * Multiplication using the comb method, WIP
+ * Multiplication using the comb method
*/
int ecp_mul_comb( ecp_group *grp, ecp_point *R,
const mpi *m, const ecp_point *P,
@@ -1727,9 +1740,30 @@
mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
- /* TODO: adjust exact value */
- w = grp->nbits >= 192 ? 5 : 2;
+ /*
+ * Minimize the number of multiplications, that is minimize
+ * 10 * d * w + 18 * 2^(w-1) + 11 * d + 7 * w
+ * (see costs of the various parts, with 1S = 1M)
+ */
+ w = grp->nbits >= 384 ? 5 : 4;
+ /*
+ * If P == G, pre-compute a bit more, since this may be re-used later.
+ * Just adding one ups the cost of the first mul by at most 3%.
+ */
+ if( p_eq_g )
+ w++;
+
+ /*
+ * Make sure w is within limits.
+ * (The last test is useful only for very small curves in the test suite.)
+ */
+ if( w > POLARSSL_ECP_WINDOW_SIZE )
+ w = POLARSSL_ECP_WINDOW_SIZE;
+ if( w < 2 || w >= grp->nbits )
+ w = 2;
+
+ /* Other sizes that depend on w */
pre_len = 1U << ( w - 1 );
d = ( grp->nbits + w - 1 ) / w;