Merge pull request #6932 from yuhaoth/pr/fix-arm64-host-build-and-illegal_instrucion-fail
Replace CPU modifier check with file scope target cpu modifiers
diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h
index ac374d2..654a845 100644
--- a/include/mbedtls/check_config.h
+++ b/include/mbedtls/check_config.h
@@ -712,41 +712,6 @@
#if defined(MBEDTLS_SHA512_ALT) || defined(MBEDTLS_SHA512_PROCESS_ALT)
#error "MBEDTLS_SHA512_*ALT can't be used with MBEDTLS_SHA512_USE_A64_CRYPTO_*"
#endif
-/*
- * Best performance comes from most recent compilers, with intrinsics and -O3.
- * Must compile with -march=armv8.2-a+sha3, but we can't detect armv8.2-a, and
- * can't always detect __ARM_FEATURE_SHA512 (notably clang 7-12).
- *
- * GCC < 8 won't work at all (lacks the sha512 instructions)
- * GCC >= 8 uses intrinsics, sets __ARM_FEATURE_SHA512
- *
- * Clang < 7 won't work at all (lacks the sha512 instructions)
- * Clang 7-12 don't have intrinsics (but we work around that with inline
- * assembler) or __ARM_FEATURE_SHA512
- * Clang == 13.0.0 same as clang 12 (only seen on macOS)
- * Clang >= 13.0.1 has __ARM_FEATURE_SHA512 and intrinsics
- */
-#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512)
- /* Test Clang first, as it defines __GNUC__ */
-# if defined(__clang__)
-# if __clang_major__ < 7
-# error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-# elif __clang_major__ < 13 || \
- (__clang_major__ == 13 && __clang_minor__ == 0 && __clang_patchlevel__ == 0)
- /* We implement the intrinsics with inline assembler, so don't error */
-# else
-# error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-# endif
-# elif defined(__GNUC__)
-# if __GNUC__ < 8
-# error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-# else
-# error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-# endif
-# else
-# error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-# endif
-#endif
#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */
@@ -767,9 +732,7 @@
#if defined(MBEDTLS_SHA256_ALT) || defined(MBEDTLS_SHA256_PROCESS_ALT)
#error "MBEDTLS_SHA256_*ALT can't be used with MBEDTLS_SHA256_USE_A64_CRYPTO_*"
#endif
-#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO)
-#error "Must use minimum -march=armv8-a+crypto for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
-#endif
+
#endif
#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) && \
diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 5aff9c5..1995e54 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -3115,9 +3115,6 @@
* \note If MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT is defined when building
* for a non-Aarch64 build it will be silently ignored.
*
- * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum
- * of \c -march=armv8-a+crypto.
- *
* \warning MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT cannot be defined at the
* same time as MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY.
*
@@ -3140,9 +3137,6 @@
* \note This allows builds with a smaller code size than with
* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT
*
- * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum
- * of \c -march=armv8-a+crypto.
- *
* \warning MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY cannot be defined at the same
* time as MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT.
*
@@ -3197,9 +3191,7 @@
* for a non-Aarch64 build it will be silently ignored.
*
* \note The code uses the SHA-512 Neon intrinsics, so requires GCC >= 8 or
- * Clang >= 7, and \c CFLAGS must be set to a minimum of
- * \c -march=armv8.2-a+sha3. An optimisation level of \c -O3 generates the
- * fastest code.
+ * Clang >= 7.
*
* \warning MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT cannot be defined at the
* same time as MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY.
@@ -3224,9 +3216,7 @@
* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
*
* \note The code uses the SHA-512 Neon intrinsics, so requires GCC >= 8 or
- * Clang >= 7, and \c CFLAGS must be set to a minimum of
- * \c -march=armv8.2-a+sha3. An optimisation level of \c -O3 generates the
- * fastest code.
+ * Clang >= 7.
*
* \warning MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY cannot be defined at the same
* time as MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT.
diff --git a/library/sha256.c b/library/sha256.c
index cb09a71..23cd406 100644
--- a/library/sha256.c
+++ b/library/sha256.c
@@ -22,6 +22,23 @@
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
*/
+#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) && \
+ defined(__clang__) && __clang_major__ < 18 && __clang_major__ > 3
+/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
+ *
+ * The intrinsic declaration are guarded by predefined ACLE macros in clang:
+ * these are normally only enabled by the -march option on the command line.
+ * By defining the macros ourselves we gain access to those declarations without
+ * requiring -march on the command line.
+ *
+ * `arm_neon.h` could be included by any header file, so we put these defines
+ * at the top of this file, before any includes.
+ */
+#define __ARM_FEATURE_CRYPTO 1
+#define NEED_TARGET_OPTIONS
+#endif /* __aarch64__ && __clang__ &&
+ !__ARM_FEATURE_CRYPTO && __clang_major__ < 18 && __clang_major__ > 3 */
+
#include "common.h"
#if defined(MBEDTLS_SHA256_C) || defined(MBEDTLS_SHA224_C)
@@ -37,6 +54,30 @@
#if defined(__aarch64__)
# if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \
defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY)
+/* *INDENT-OFF* */
+# if !defined(__ARM_FEATURE_CRYPTO) || defined(NEED_TARGET_OPTIONS)
+# if defined(__clang__)
+# if __clang_major__ < 4
+# error "A more recent Clang is required for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
+# endif
+# pragma clang attribute push (__attribute__((target("crypto"))), apply_to=function)
+# define MBEDTLS_POP_TARGET_PRAGMA
+# elif defined(__GNUC__)
+ /* FIXME: GCC-5 annouce crypto extension, but some intrinsic are missed.
+ * Known miss intrinsic can be workaround.
+ */
+# if __GNUC__ < 6
+# error "A more recent GCC is required for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
+# else
+# pragma GCC push_options
+# pragma GCC target ("arch=armv8-a+crypto")
+# define MBEDTLS_POP_TARGET_PRAGMA
+# endif
+# else
+# error "Only GCC and Clang supported for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
+# endif
+# endif
+/* *INDENT-ON* */
# include <arm_neon.h>
# endif
# if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT)
@@ -353,8 +394,16 @@
SHA256_BLOCK_SIZE) ? 0 : -1;
}
-#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */
+#if defined(MBEDTLS_POP_TARGET_PRAGMA)
+#if defined(__clang__)
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#undef MBEDTLS_POP_TARGET_PRAGMA
+#endif
+#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */
#if !defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT)
#define mbedtls_internal_sha256_process_many_c mbedtls_internal_sha256_process_many
diff --git a/library/sha512.c b/library/sha512.c
index efcbed4..bc92a8d 100644
--- a/library/sha512.c
+++ b/library/sha512.c
@@ -22,6 +22,26 @@
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
*/
+#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512) && \
+ defined(__clang__) && __clang_major__ < 18 && \
+ __clang_major__ >= 13 && __clang_minor__ > 0 && __clang_patchlevel__ > 0
+/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
+ *
+ * The intrinsic declaration are guarded by predefined ACLE macros in clang:
+ * these are normally only enabled by the -march option on the command line.
+ * By defining the macros ourselves we gain access to those declarations without
+ * requiring -march on the command line.
+ *
+ * `arm_neon.h` could be included by any header file, so we put these defines
+ * at the top of this file, before any includes.
+ */
+#define __ARM_FEATURE_SHA512 1
+#define NEED_TARGET_OPTIONS
+#endif /* __aarch64__ && __clang__ &&
+ !__ARM_FEATURE_SHA512 && __clang_major__ < 18 &&
+ __clang_major__ >= 13 && __clang_minor__ > 0 &&
+ __clang_patchlevel__ > 0 */
+
#include "common.h"
#if defined(MBEDTLS_SHA512_C) || defined(MBEDTLS_SHA384_C)
@@ -43,6 +63,47 @@
#if defined(__aarch64__)
# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+/* *INDENT-OFF* */
+/*
+ * Best performance comes from most recent compilers, with intrinsics and -O3.
+ * Must compile with -march=armv8.2-a+sha3, but we can't detect armv8.2-a, and
+ * can't always detect __ARM_FEATURE_SHA512 (notably clang 7-12).
+ *
+ * GCC < 8 won't work at all (lacks the sha512 instructions)
+ * GCC >= 8 uses intrinsics, sets __ARM_FEATURE_SHA512
+ *
+ * Clang < 7 won't work at all (lacks the sha512 instructions)
+ * Clang 7-12 don't have intrinsics (but we work around that with inline
+ * assembler) or __ARM_FEATURE_SHA512
+ * Clang == 13.0.0 same as clang 12 (only seen on macOS)
+ * Clang >= 13.0.1 has __ARM_FEATURE_SHA512 and intrinsics
+ */
+# if !defined(__ARM_FEATURE_SHA512) || defined(NEED_TARGET_OPTIONS)
+ /* Test Clang first, as it defines __GNUC__ */
+# if defined(__clang__)
+# if __clang_major__ < 7
+# error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+# elif __clang_major__ < 13 || \
+ (__clang_major__ == 13 && __clang_minor__ == 0 && \
+ __clang_patchlevel__ == 0)
+ /* We implement the intrinsics with inline assembler, so don't error */
+# else
+# pragma clang attribute push (__attribute__((target("sha3"))), apply_to=function)
+# define MBEDTLS_POP_TARGET_PRAGMA
+# endif
+# elif defined(__GNUC__)
+# if __GNUC__ < 8
+# error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+# else
+# pragma GCC push_options
+# pragma GCC target ("arch=armv8.2-a+sha3")
+# define MBEDTLS_POP_TARGET_PRAGMA
+# endif
+# else
+# error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+# endif
+# endif
+/* *INDENT-ON* */
# include <arm_neon.h>
# endif
# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
@@ -516,6 +577,15 @@
SHA512_BLOCK_SIZE) ? 0 : -1;
}
+#if defined(MBEDTLS_POP_TARGET_PRAGMA)
+#if defined(__clang__)
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#undef MBEDTLS_POP_TARGET_PRAGMA
+#endif
+
#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */