Now reducing mod p after every single operation
diff --git a/library/ecp.c b/library/ecp.c
index d64fd05..01076a1 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -207,6 +207,25 @@
}
/*
+ * Reduce a mpi mod p in-place, general case, to use after mpi_mul_mpi
+ */
+#define MOD_MUL( N ) MPI_CHK( mpi_mod_mpi( &N, &N, &grp->P ) )
+
+/*
+ * Reduce a mpi mod p in-place, to use after mpi_sub_mpi
+ */
+#define MOD_SUB( N ) \
+ while( mpi_cmp_int( &N, 0 ) < 0 ) \
+ MPI_CHK( mpi_add_mpi( &N, &N, &grp->P ) )
+
+/*
+ * Reduce a mpi mod p in-place, to use after mpi_add_mpi and mpi_mul_int
+ */
+#define MOD_ADD( N ) \
+ while( mpi_cmp_mpi( &N, &grp->P ) >= 0 ) \
+ MPI_CHK( mpi_sub_mpi( &N, &N, &grp->P ) )
+
+/*
* Internal point format used for fast addition/doubling/multiplication:
* Jacobian coordinates (GECC example 3.20)
*/
@@ -274,7 +293,7 @@
MPI_CHK( mpi_copy( &jac->X, &aff->X ) );
MPI_CHK( mpi_copy( &jac->Y, &aff->Y ) );
- MPI_CHK( mpi_lset( &jac->Z, 1 ) );
+ MPI_CHK( mpi_lset( &jac->Z, 1 ) );
cleanup:
return( ret );
@@ -301,17 +320,15 @@
/*
* aff.X = jac.X / (jac.Z)^2 mod p
*/
- MPI_CHK( mpi_inv_mod( &Zi, &jac->Z, &grp->P ) );
- MPI_CHK( mpi_mul_mpi( &ZZi, &Zi, &Zi ) );
- MPI_CHK( mpi_mul_mpi( &T, &jac->X, &ZZi ) );
- MPI_CHK( mpi_mod_mpi( &aff->X, &T, &grp->P ) );
+ MPI_CHK( mpi_inv_mod( &Zi, &jac->Z, &grp->P ) );
+ MPI_CHK( mpi_mul_mpi( &ZZi, &Zi, &Zi ) ); MOD_MUL( ZZi );
+ MPI_CHK( mpi_mul_mpi( &aff->X, &jac->X, &ZZi ) ); MOD_MUL( aff->X );
/*
* aff.Y = jac.Y / (jac.Z)^3 mod p
*/
- MPI_CHK( mpi_mul_mpi( &T, &jac->Y, &ZZi ) );
- MPI_CHK( mpi_mul_mpi( &T, &T, &Zi ) );
- MPI_CHK( mpi_mod_mpi( &aff->Y, &T, &grp->P ) );
+ MPI_CHK( mpi_mul_mpi( &aff->Y, &jac->Y, &ZZi ) ); MOD_MUL( aff->Y );
+ MPI_CHK( mpi_mul_mpi( &aff->Y, &aff->Y, &Zi ) ); MOD_MUL( aff->Y );
cleanup:
@@ -335,29 +352,37 @@
mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 );
mpi_init( &X ); mpi_init( &Y ); mpi_init( &Z );
- MPI_CHK( mpi_mul_mpi( &T1, &P->Z, &P->Z ) );
- MPI_CHK( mpi_sub_mpi( &T2, &P->X, &T1 ) );
- MPI_CHK( mpi_add_mpi( &T1, &P->X, &T1 ) );
- MPI_CHK( mpi_mul_mpi( &T2, &T2, &T1 ) );
- MPI_CHK( mpi_mul_int( &T2, &T2, 3 ) );
+ MPI_CHK( mpi_mul_mpi( &T1, &P->Z, &P->Z ) ); MOD_MUL( T1 );
+ MPI_CHK( mpi_sub_mpi( &T2, &P->X, &T1 ) ); MOD_SUB( T2 );
+ MPI_CHK( mpi_add_mpi( &T1, &P->X, &T1 ) ); MOD_ADD( T1 );
+ MPI_CHK( mpi_mul_mpi( &T2, &T2, &T1 ) ); MOD_MUL( T2 );
+ MPI_CHK( mpi_mul_int( &T2, &T2, 3 ) ); MOD_ADD( T2 );
MPI_CHK( mpi_copy ( &Y, &P->Y ) );
- MPI_CHK( mpi_shift_l( &Y, 1 ) );
- MPI_CHK( mpi_mul_mpi( &Z, &Y, &P->Z ) );
- MPI_CHK( mpi_mul_mpi( &Y, &Y, &Y ) );
- MPI_CHK( mpi_mul_mpi( &T3, &Y, &P->X ) );
- MPI_CHK( mpi_mul_mpi( &Y, &Y, &Y ) );
- MPI_CHK( mpi_shift_r( &Y, 1 ) );
- MPI_CHK( mpi_mul_mpi( &X, &T2, &T2 ) );
- MPI_CHK( mpi_copy ( &T1, &T3 ) );
- MPI_CHK( mpi_shift_l( &T1, 1 ) );
- MPI_CHK( mpi_sub_mpi( &X, &X, &T1 ) );
- MPI_CHK( mpi_sub_mpi( &T1, &T3, &X ) );
- MPI_CHK( mpi_mul_mpi( &T1, &T1, &T2 ) );
- MPI_CHK( mpi_sub_mpi( &Y, &T1, &Y ) );
+ MPI_CHK( mpi_shift_l( &Y, 1 ) ); MOD_ADD( Y );
+ MPI_CHK( mpi_mul_mpi( &Z, &Y, &P->Z ) ); MOD_MUL( Z );
+ MPI_CHK( mpi_mul_mpi( &Y, &Y, &Y ) ); MOD_MUL( Y );
+ MPI_CHK( mpi_mul_mpi( &T3, &Y, &P->X ) ); MOD_MUL( T3 );
+ MPI_CHK( mpi_mul_mpi( &Y, &Y, &Y ) ); MOD_MUL( Y );
- MPI_CHK( mpi_mod_mpi( &R->X, &X, &grp->P ) );
- MPI_CHK( mpi_mod_mpi( &R->Y, &Y, &grp->P ) );
- MPI_CHK( mpi_mod_mpi( &R->Z, &Z, &grp->P ) );
+ /*
+ * For Y = Y / 2 mod p, we must make sure that Y is even before
+ * using right-shift. No need to reduce mod p afterwards.
+ */
+ if( mpi_get_bit( &Y, 0 ) == 1 )
+ MPI_CHK( mpi_add_mpi( &Y, &Y, &grp->P ) );
+ MPI_CHK( mpi_shift_r( &Y, 1 ) );
+
+ MPI_CHK( mpi_mul_mpi( &X, &T2, &T2 ) ); MOD_MUL( X );
+ MPI_CHK( mpi_copy ( &T1, &T3 ) );
+ MPI_CHK( mpi_shift_l( &T1, 1 ) ); MOD_ADD( T1 );
+ MPI_CHK( mpi_sub_mpi( &X, &X, &T1 ) ); MOD_SUB( X );
+ MPI_CHK( mpi_sub_mpi( &T1, &T3, &X ) ); MOD_SUB( T1 );
+ MPI_CHK( mpi_mul_mpi( &T1, &T1, &T2 ) ); MOD_MUL( T1 );
+ MPI_CHK( mpi_sub_mpi( &Y, &T1, &Y ) ); MOD_SUB( Y );
+
+ MPI_CHK( mpi_copy( &R->X, &X ) );
+ MPI_CHK( mpi_copy( &R->Y, &Y ) );
+ MPI_CHK( mpi_copy( &R->Z, &Z ) );
cleanup:
@@ -388,12 +413,12 @@
mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 ); mpi_init( &T4 );
mpi_init( &X ); mpi_init( &Y ); mpi_init( &Z );
- MPI_CHK( mpi_mul_mpi( &T1, &P->Z, &P->Z ) );
- MPI_CHK( mpi_mul_mpi( &T2, &T1, &P->Z ) );
- MPI_CHK( mpi_mul_mpi( &T1, &T1, &Q->X ) );
- MPI_CHK( mpi_mul_mpi( &T2, &T2, &Q->Y ) );
- MPI_CHK( mpi_sub_mpi( &T1, &T1, &P->X ) );
- MPI_CHK( mpi_sub_mpi( &T2, &T2, &P->Y ) );
+ MPI_CHK( mpi_mul_mpi( &T1, &P->Z, &P->Z ) ); MOD_MUL( T1 );
+ MPI_CHK( mpi_mul_mpi( &T2, &T1, &P->Z ) ); MOD_MUL( T2 );
+ MPI_CHK( mpi_mul_mpi( &T1, &T1, &Q->X ) ); MOD_MUL( T1 );
+ MPI_CHK( mpi_mul_mpi( &T2, &T2, &Q->Y ) ); MOD_MUL( T2 );
+ MPI_CHK( mpi_sub_mpi( &T1, &T1, &P->X ) ); MOD_SUB( T1 );
+ MPI_CHK( mpi_sub_mpi( &T2, &T2, &P->Y ) ); MOD_SUB( T2 );
if( mpi_cmp_int( &T1, 0 ) == 0 )
{
@@ -409,22 +434,22 @@
}
}
- MPI_CHK( mpi_mul_mpi( &Z, &P->Z, &T1 ) );
- MPI_CHK( mpi_mul_mpi( &T3, &T1, &T1 ) );
- MPI_CHK( mpi_mul_mpi( &T4, &T3, &T1 ) );
- MPI_CHK( mpi_mul_mpi( &T3, &T3, &P->X ) );
- MPI_CHK( mpi_mul_int( &T1, &T3, 2 ) );
- MPI_CHK( mpi_mul_mpi( &X, &T2, &T2 ) );
- MPI_CHK( mpi_sub_mpi( &X, &X, &T1 ) );
- MPI_CHK( mpi_sub_mpi( &X, &X, &T4 ) );
- MPI_CHK( mpi_sub_mpi( &T3, &T3, &X ) );
- MPI_CHK( mpi_mul_mpi( &T3, &T3, &T2 ) );
- MPI_CHK( mpi_mul_mpi( &T4, &T4, &P->Y ) );
- MPI_CHK( mpi_sub_mpi( &Y, &T3, &T4 ) );
+ MPI_CHK( mpi_mul_mpi( &Z, &P->Z, &T1 ) ); MOD_MUL( Z );
+ MPI_CHK( mpi_mul_mpi( &T3, &T1, &T1 ) ); MOD_MUL( T3 );
+ MPI_CHK( mpi_mul_mpi( &T4, &T3, &T1 ) ); MOD_MUL( T4 );
+ MPI_CHK( mpi_mul_mpi( &T3, &T3, &P->X ) ); MOD_MUL( T3 );
+ MPI_CHK( mpi_mul_int( &T1, &T3, 2 ) ); MOD_ADD( T1 );
+ MPI_CHK( mpi_mul_mpi( &X, &T2, &T2 ) ); MOD_MUL( X );
+ MPI_CHK( mpi_sub_mpi( &X, &X, &T1 ) ); MOD_SUB( X );
+ MPI_CHK( mpi_sub_mpi( &X, &X, &T4 ) ); MOD_SUB( X );
+ MPI_CHK( mpi_sub_mpi( &T3, &T3, &X ) ); MOD_SUB( T3 );
+ MPI_CHK( mpi_mul_mpi( &T3, &T3, &T2 ) ); MOD_MUL( T3 );
+ MPI_CHK( mpi_mul_mpi( &T4, &T4, &P->Y ) ); MOD_MUL( T4 );
+ MPI_CHK( mpi_sub_mpi( &Y, &T3, &T4 ) ); MOD_SUB( Y );
- MPI_CHK( mpi_mod_mpi( &R->X, &X, &grp->P ) );
- MPI_CHK( mpi_mod_mpi( &R->Y, &Y, &grp->P ) );
- MPI_CHK( mpi_mod_mpi( &R->Z, &Z, &grp->P ) );
+ MPI_CHK( mpi_copy( &R->X, &X ) );
+ MPI_CHK( mpi_copy( &R->Y, &Y ) );
+ MPI_CHK( mpi_copy( &R->Z, &Z ) );
cleanup: