Merge pull request #4968 from davidhorstmann-arm/fix-aarch64-asm-constraints

Fix aarch64 assembly for bignum multiplication
diff --git a/ChangeLog.d/muladdc-amd64-memory.txt b/ChangeLog.d/muladdc-amd64-memory.txt
deleted file mode 100644
index b834331..0000000
--- a/ChangeLog.d/muladdc-amd64-memory.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-Bugfix
-   * Fix missing constraints on x86_64 assembly code for bignum multiplication
-     that broke some bignum operations with (at least) Clang 12.
-     Fixes #4116, #4786, #4917.
diff --git a/ChangeLog.d/muladdc-memory.txt b/ChangeLog.d/muladdc-memory.txt
new file mode 100644
index 0000000..218be5a
--- /dev/null
+++ b/ChangeLog.d/muladdc-memory.txt
@@ -0,0 +1,5 @@
+Bugfix
+   * Fix missing constraints on x86_64 and aarch64 assembly code
+     for bignum multiplication that broke some bignum operations with
+     (at least) Clang 12.
+     Fixes #4116, #4786, #4917, #4962.
diff --git a/library/bn_mul.h b/library/bn_mul.h
index 328e765..b71ddd8 100644
--- a/library/bn_mul.h
+++ b/library/bn_mul.h
@@ -224,7 +224,7 @@
         "adcq   %%rdx, %%rcx\n"             \
         "addq   $8, %%rdi\n"
 
-#define MULADDC_STOP                        \
+#define MULADDC_STOP                                                 \
         : "+c" (c), "+D" (d), "+S" (s), "+m" (*(uint64_t (*)[16]) d) \
         : "b" (b), "m" (*(const uint64_t (*)[16]) s)                 \
         : "rax", "rdx", "r8"                                         \
@@ -240,18 +240,18 @@
 #define MULADDC_CORE                \
         "ldr x4, [%2], #8   \n\t"   \
         "ldr x5, [%1]       \n\t"   \
-        "mul x6, x4, %3     \n\t"   \
-        "umulh x7, x4, %3   \n\t"   \
+        "mul x6, x4, %4     \n\t"   \
+        "umulh x7, x4, %4   \n\t"   \
         "adds x5, x5, x6    \n\t"   \
         "adc x7, x7, xzr    \n\t"   \
         "adds x5, x5, %0    \n\t"   \
         "adc %0, x7, xzr    \n\t"   \
         "str x5, [%1], #8   \n\t"
 
-#define MULADDC_STOP                        \
-         : "+r" (c),  "+r" (d), "+r" (s)    \
-         : "r" (b)                          \
-         : "x4", "x5", "x6", "x7", "cc"     \
+#define MULADDC_STOP                                                    \
+         : "+r" (c),  "+r" (d), "+r" (s), "+m" (*(uint64_t (*)[16]) d)  \
+         : "r" (b), "m" (*(const uint64_t (*)[16]) s)                   \
+         : "x4", "x5", "x6", "x7", "cc"                                 \
     );
 
 #endif /* Aarch64 */