Perf improvement in memcpy_if
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/constant_time.c b/library/constant_time.c
index 86cc066..6c7ef56 100644
--- a/library/constant_time.c
+++ b/library/constant_time.c
@@ -152,8 +152,13 @@
const unsigned char *src2,
size_t len)
{
+#if defined(MBEDTLS_CT_SIZE_64)
+ const uint64_t mask = (uint64_t) condition;
+ const uint64_t not_mask = (uint64_t) ~mbedtls_ct_compiler_opaque(condition);
+#else
const uint32_t mask = (uint32_t) condition;
const uint32_t not_mask = (uint32_t) ~mbedtls_ct_compiler_opaque(condition);
+#endif
/* If src2 is NULL, setup src2 so that we read from the destination address.
*
@@ -167,11 +172,19 @@
/* dest[i] = c1 == c2 ? src[i] : dest[i] */
size_t i = 0;
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
+#if defined(MBEDTLS_CT_SIZE_64)
+ for (; (i + 8) <= len; i += 8) {
+ uint64_t a = mbedtls_get_unaligned_uint64(src1 + i) & mask;
+ uint64_t b = mbedtls_get_unaligned_uint64(src2 + i) & not_mask;
+ mbedtls_put_unaligned_uint64(dest + i, a | b);
+ }
+#else
for (; (i + 4) <= len; i += 4) {
uint32_t a = mbedtls_get_unaligned_uint32(src1 + i) & mask;
uint32_t b = mbedtls_get_unaligned_uint32(src2 + i) & not_mask;
mbedtls_put_unaligned_uint32(dest + i, a | b);
}
+#endif /* defined(MBEDTLS_CT_SIZE_64) */
#endif /* MBEDTLS_EFFICIENT_UNALIGNED_ACCESS */
for (; i < len; i++) {
dest[i] = (src1[i] & mask) | (src2[i] & not_mask);