Use mbedtls_xor_no_simd from cmac and cbc
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/aes.c b/library/aes.c
index 0a61d1b..d2687bc 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -1040,23 +1040,6 @@
#if defined(MBEDTLS_CIPHER_MODE_CBC)
-#if defined(__ARM_NEON) && defined(__aarch64__)
-/* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on
- * the result for the next block in CBC, and the cost of transferring that data from
- * NEON registers, it is faster to use the following on aarch64.
- * For 32-bit arm, NEON should be faster. */
-#define CBC_XOR_16(r, a, b) do { \
- mbedtls_put_unaligned_uint64(r, \
- mbedtls_get_unaligned_uint64(a) ^ \
- mbedtls_get_unaligned_uint64(b)); \
- mbedtls_put_unaligned_uint64(r + 8, \
- mbedtls_get_unaligned_uint64(a + 8) ^ \
- mbedtls_get_unaligned_uint64(b + 8)); \
-} while (0)
-#else
-#define CBC_XOR_16(r, a, b) mbedtls_xor(r, a, b, 16)
-#endif
-
/*
* AES-CBC buffer encryption/decryption
*/
@@ -1099,7 +1082,10 @@
if (ret != 0) {
goto exit;
}
- CBC_XOR_16(output, output, iv);
+ /* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on
+ * the result for the next block in CBC, and the cost of transferring that data from
+ * NEON registers, NEON is slower on aarch64. */
+ mbedtls_xor_no_simd(output, output, iv, 16);
memcpy(iv, temp, 16);
@@ -1109,7 +1095,7 @@
}
} else {
while (length > 0) {
- CBC_XOR_16(output, input, ivp);
+ mbedtls_xor_no_simd(output, input, ivp, 16);
ret = mbedtls_aes_crypt_ecb(ctx, mode, output, output);
if (ret != 0) {