Optimize ecp_modp()
Makes it 22% faster, for a 5% gain on ecp_mul()
diff --git a/library/ecp.c b/library/ecp.c
index d53d306..78b05c4 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -451,6 +451,8 @@
/*
* Wrapper around fast quasi-modp functions, with fall-back to mpi_mod_mpi.
* See the documentation of struct ecp_group.
+ *
+ * This function is in the critial loop for ecp_mul, so pay attention to perf.
*/
static int ecp_modp( mpi *N, const ecp_group *grp )
{
@@ -459,16 +461,22 @@
if( grp->modp == NULL )
return( mpi_mod_mpi( N, N, &grp->P ) );
- if( mpi_cmp_int( N, 0 ) < 0 || mpi_msb( N ) > 2 * grp->pbits )
+ /* N->s < 0 is a much faster test, which fails only if N is 0 */
+ if( ( N->s < 0 && mpi_cmp_int( N, 0 ) != 0 ) ||
+ mpi_msb( N ) > 2 * grp->pbits )
+ {
return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
+ }
MPI_CHK( grp->modp( N ) );
- while( mpi_cmp_int( N, 0 ) < 0 )
+ /* N->s < 0 is a much faster test, which fails only if N is 0 */
+ while( N->s < 0 && mpi_cmp_int( N, 0 ) != 0 )
MPI_CHK( mpi_add_mpi( N, N, &grp->P ) );
while( mpi_cmp_mpi( N, &grp->P ) >= 0 )
- MPI_CHK( mpi_sub_mpi( N, N, &grp->P ) );
+ /* we known P, N and the result are positive */
+ MPI_CHK( mpi_sub_abs( N, N, &grp->P ) );
cleanup:
return( ret );
@@ -915,17 +923,20 @@
/*
* Reduce a mpi mod p in-place, to use after mpi_sub_mpi
+ * N->s < 0 is a very fast test, which fails only if N is 0
*/
#define MOD_SUB( N ) \
- while( mpi_cmp_int( &N, 0 ) < 0 ) \
+ while( N.s < 0 && mpi_cmp_int( &N, 0 ) != 0 ) \
MPI_CHK( mpi_add_mpi( &N, &N, &grp->P ) )
/*
- * Reduce a mpi mod p in-place, to use after mpi_add_mpi and mpi_mul_int
+ * Reduce a mpi mod p in-place, to use after mpi_add_mpi and mpi_mul_int.
+ * We known P, N and the result are positive, so sub_abs is correct, and
+ * a bit faster.
*/
#define MOD_ADD( N ) \
while( mpi_cmp_mpi( &N, &grp->P ) >= 0 ) \
- MPI_CHK( mpi_sub_mpi( &N, &N, &grp->P ) )
+ MPI_CHK( mpi_sub_abs( &N, &N, &grp->P ) )
/*
* Normalize jacobian coordinates so that Z == 0 || Z == 1 (GECC 3.2.1)