Fix aarch64 assembly for bignum multiplication

Add memory constraints to the aarch64 inline assembly in MULADDC_STOP.
This fixes an issue where Clang 12 and 13 were generating
non-functional code on aarch64 platforms. See #4962, #4943
for further details.

Signed-off-by: David Horstmann <david.horstmann@arm.com>
diff --git a/include/mbedtls/bn_mul.h b/include/mbedtls/bn_mul.h
index 716bd3b..31137cd 100644
--- a/include/mbedtls/bn_mul.h
+++ b/include/mbedtls/bn_mul.h
@@ -244,18 +244,18 @@
 #define MULADDC_CORE                \
         "ldr x4, [%2], #8   \n\t"   \
         "ldr x5, [%1]       \n\t"   \
-        "mul x6, x4, %3     \n\t"   \
-        "umulh x7, x4, %3   \n\t"   \
+        "mul x6, x4, %4     \n\t"   \
+        "umulh x7, x4, %4   \n\t"   \
         "adds x5, x5, x6    \n\t"   \
         "adc x7, x7, xzr    \n\t"   \
         "adds x5, x5, %0    \n\t"   \
         "adc %0, x7, xzr    \n\t"   \
         "str x5, [%1], #8   \n\t"
 
-#define MULADDC_STOP                        \
-         : "+r" (c),  "+r" (d), "+r" (s)    \
-         : "r" (b)                          \
-         : "x4", "x5", "x6", "x7", "cc"     \
+#define MULADDC_STOP                                                    \
+         : "+r" (c),  "+r" (d), "+r" (s), "+m" (*(uint64_t (*)[16]) d)  \
+         : "r" (b), "m" (*(const uint64_t (*)[16]) s)                   \
+         : "x4", "x5", "x6", "x7", "cc"                                 \
     );
 
 #endif /* Aarch64 */