Merge pull request #7651 from daverodgman/fix-armclang-compile-fail

Fix armclang compile fail
diff --git a/ChangeLog.d/armclang-compile-fix.txt b/ChangeLog.d/armclang-compile-fix.txt
new file mode 100644
index 0000000..59ae1cd
--- /dev/null
+++ b/ChangeLog.d/armclang-compile-fix.txt
@@ -0,0 +1,7 @@
+Bugfix
+   * Fix clang and armclang compilation error when targeting certain Arm
+     M-class CPUs (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23,
+     SecurCore SC000). Fixes #1077.
+Changes
+   * Enable Arm / Thumb bignum assembly for most Arm platforms when
+     compiling with gcc, clang or armclang and -O0.
diff --git a/library/bn_mul.h b/library/bn_mul.h
index ab59fbd..c5994f7 100644
--- a/library/bn_mul.h
+++ b/library/bn_mul.h
@@ -658,6 +658,20 @@
 
 #endif /* TriCore */
 
+#if defined(__arm__)
+
+#if defined(__thumb__) && !defined(__thumb2__)
+#if !defined(__ARMCC_VERSION) && !defined(__clang__) \
+    && !defined(__llvm__) && !defined(__INTEL_COMPILER)
+/*
+ * Thumb 1 ISA. This code path has only been tested successfully on gcc;
+ * it does not compile on clang or armclang.
+ *
+ * Other compilers which define __GNUC__ may not work. The above macro
+ * attempts to exclude these untested compilers.
+ */
+
+#if !defined(__OPTIMIZE__) && defined(__GNUC__)
 /*
  * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
  * our use of r7 below, unless -fomit-frame-pointer is passed.
@@ -666,32 +680,39 @@
  * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
  * clang and armcc5 under the same conditions).
  *
- * So, only use the optimized assembly below for optimized build, which avoids
- * the build error and is pretty reasonable anyway.
+ * If gcc needs to use r7, we use r1 as a scratch register and have a few extra
+ * instructions to preserve/restore it; otherwise, we can use r7 and avoid
+ * the preserve/restore overhead.
  */
-#if defined(__GNUC__) && !defined(__OPTIMIZE__)
-#define MULADDC_CANNOT_USE_R7
-#endif
-
-#if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
-
-#if defined(__thumb__) && !defined(__thumb2__)
+#define MULADDC_SCRATCH              "RS .req r1         \n\t"
+#define MULADDC_PRESERVE_SCRATCH     "mov    r10, r1     \n\t"
+#define MULADDC_RESTORE_SCRATCH      "mov    r1, r10     \n\t"
+#define MULADDC_SCRATCH_CLOBBER      "r10"
+#else /* !defined(__OPTIMIZE__) && defined(__GNUC__) */
+#define MULADDC_SCRATCH              "RS .req r7         \n\t"
+#define MULADDC_PRESERVE_SCRATCH     ""
+#define MULADDC_RESTORE_SCRATCH      ""
+#define MULADDC_SCRATCH_CLOBBER      "r7"
+#endif /* !defined(__OPTIMIZE__) && defined(__GNUC__) */
 
 #define MULADDC_X1_INIT                                 \
     asm(                                                \
+    MULADDC_SCRATCH                                     \
             "ldr    r0, %3                      \n\t"   \
             "ldr    r1, %4                      \n\t"   \
             "ldr    r2, %5                      \n\t"   \
             "ldr    r3, %6                      \n\t"   \
-            "lsr    r7, r3, #16                 \n\t"   \
-            "mov    r9, r7                      \n\t"   \
-            "lsl    r7, r3, #16                 \n\t"   \
-            "lsr    r7, r7, #16                 \n\t"   \
-            "mov    r8, r7                      \n\t"
+            "lsr    r4, r3, #16                 \n\t"   \
+            "mov    r9, r4                      \n\t"   \
+            "lsl    r4, r3, #16                 \n\t"   \
+            "lsr    r4, r4, #16                 \n\t"   \
+            "mov    r8, r4                      \n\t"   \
+
 
 #define MULADDC_X1_CORE                                 \
+            MULADDC_PRESERVE_SCRATCH                    \
             "ldmia  r0!, {r6}                   \n\t"   \
-            "lsr    r7, r6, #16                 \n\t"   \
+            "lsr    RS, r6, #16                 \n\t"   \
             "lsl    r6, r6, #16                 \n\t"   \
             "lsr    r6, r6, #16                 \n\t"   \
             "mov    r4, r8                      \n\t"   \
@@ -699,12 +720,12 @@
             "mov    r3, r9                      \n\t"   \
             "mul    r6, r3                      \n\t"   \
             "mov    r5, r9                      \n\t"   \
-            "mul    r5, r7                      \n\t"   \
+            "mul    r5, RS                      \n\t"   \
             "mov    r3, r8                      \n\t"   \
-            "mul    r7, r3                      \n\t"   \
+            "mul    RS, r3                      \n\t"   \
             "lsr    r3, r6, #16                 \n\t"   \
             "add    r5, r5, r3                  \n\t"   \
-            "lsr    r3, r7, #16                 \n\t"   \
+            "lsr    r3, RS, #16                 \n\t"   \
             "add    r5, r5, r3                  \n\t"   \
             "add    r4, r4, r2                  \n\t"   \
             "mov    r2, #0                      \n\t"   \
@@ -712,9 +733,10 @@
             "lsl    r3, r6, #16                 \n\t"   \
             "add    r4, r4, r3                  \n\t"   \
             "adc    r5, r2                      \n\t"   \
-            "lsl    r3, r7, #16                 \n\t"   \
+            "lsl    r3, RS, #16                 \n\t"   \
             "add    r4, r4, r3                  \n\t"   \
             "adc    r5, r2                      \n\t"   \
+            MULADDC_RESTORE_SCRATCH                     \
             "ldr    r3, [r1]                    \n\t"   \
             "add    r4, r4, r3                  \n\t"   \
             "adc    r2, r5                      \n\t"   \
@@ -727,11 +749,15 @@
          : "=m" (c),  "=m" (d), "=m" (s)        \
          : "m" (s), "m" (d), "m" (c), "m" (b)   \
          : "r0", "r1", "r2", "r3", "r4", "r5",  \
-           "r6", "r7", "r8", "r9", "cc"         \
+           "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \
          );
+#endif /* !defined(__ARMCC_VERSION) && !defined(__clang__) */
 
 #elif (__ARM_ARCH >= 6) && \
     defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)
+/* Armv6-M (or later) with DSP Instruction Set Extensions.
+ * Requires support for either Thumb 2 or Arm ISA.
+ */
 
 #define MULADDC_X1_INIT                            \
     {                                              \
@@ -796,7 +822,7 @@
         );                                                   \
     }
 
-#else
+#else /* Thumb 2 or Arm ISA, without DSP extensions */
 
 #define MULADDC_X1_INIT                                 \
     asm(                                                \
@@ -810,9 +836,9 @@
             "mov    r5, #0                      \n\t"   \
             "ldr    r6, [r1]                    \n\t"   \
             "umlal  r2, r5, r3, r4              \n\t"   \
-            "adds   r7, r6, r2                  \n\t"   \
+            "adds   r4, r6, r2                  \n\t"   \
             "adc    r2, r5, #0                  \n\t"   \
-            "str    r7, [r1], #4                \n\t"
+            "str    r4, [r1], #4                \n\t"
 
 #define MULADDC_X1_STOP                                 \
             "str    r2, %0                      \n\t"   \
@@ -821,12 +847,12 @@
          : "=m" (c),  "=m" (d), "=m" (s)        \
          : "m" (s), "m" (d), "m" (c), "m" (b)   \
          : "r0", "r1", "r2", "r3", "r4", "r5",  \
-           "r6", "r7", "cc"                     \
+           "r6", "cc"                     \
          );
 
-#endif /* Thumb */
+#endif /* ISA codepath selection */
 
-#endif /* ARMv3 */
+#endif /* defined(__arm__) */
 
 #if defined(__alpha__)
 
diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh
index 7b0893b..8242f88 100755
--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@@ -3895,6 +3895,25 @@
     not grep __aeabi_lmul library/*.o
 }
 
+component_build_arm_clang_thumb () {
+    # ~ 30s
+
+    scripts/config.py baremetal
+
+    msg "build: clang thumb 2, make"
+    make clean
+    make CC="clang" CFLAGS='-std=c99 -Werror -Os --target=arm-linux-gnueabihf -march=armv7-m -mthumb' lib
+
+    # Some Thumb 1 asm is sensitive to optimisation level, so test both -O0 and -Os
+    msg "build: clang thumb 1 -O0, make"
+    make clean
+    make CC="clang" CFLAGS='-std=c99 -Werror -O0 --target=arm-linux-gnueabihf -mcpu=arm1136j-s -mthumb' lib
+
+    msg "build: clang thumb 1 -Os, make"
+    make clean
+    make CC="clang" CFLAGS='-std=c99 -Werror -Os --target=arm-linux-gnueabihf -mcpu=arm1136j-s -mthumb' lib
+}
+
 component_build_armcc () {
     msg "build: ARM Compiler 5"
     scripts/config.py baremetal
@@ -3918,7 +3937,7 @@
 
     make clean
 
-    # Compile with -O1 since some Arm inline assembly is disabled for -O0.
+    # Compile mostly with -O1 since some Arm inline assembly is disabled for -O0.
 
     # ARM Compiler 6 - Target ARMv7-A
     armc6_build_test "-O1 --target=arm-arm-none-eabi -march=armv7-a"
@@ -3937,6 +3956,12 @@
 
     # ARM Compiler 6 - Target ARMv8.2-A - AArch64
     armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto"
+
+    # ARM Compiler 6 - Target Cortex-M0 - no optimisation
+    armc6_build_test "-O0 --target=arm-arm-none-eabi -mcpu=cortex-m0"
+
+    # ARM Compiler 6 - Target Cortex-M0
+    armc6_build_test "-Os --target=arm-arm-none-eabi -mcpu=cortex-m0"
 }
 support_build_armcc () {
     armc5_cc="$ARMC5_BIN_DIR/armcc"