Restrict use of r7 in Thumb 1 code
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/bn_mul.h b/library/bn_mul.h
index c91743a..af0adb6 100644
--- a/library/bn_mul.h
+++ b/library/bn_mul.h
@@ -662,20 +662,11 @@
* There is a fairly complex matrix of supported options for Thumb / Thumb2 / Arm
* assembly. Choosing the correct codepath depends on the target, the compiler,
* and the optimisation level.
- *
- * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
- * our use of r7 below, unless -fomit-frame-pointer is passed.
- *
- * On the other hand, -fomit-frame-pointer is implied by any -Ox options with
- * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
- * clang and armcc5 under the same conditions).
*/
#if defined(__thumb__) && !defined(__thumb2__) // Thumb 1 (not Thumb 2) ISA
// Only supported by gcc, when optimisation is enabled; only Thumb 1 codepath works
- #if defined(__OPTIMIZE__) && defined(__GNUC__)
#define ARM_THUMB_1
- #endif
#elif defined(__thumb2__) // Thumb 2 ISA
@@ -702,21 +693,48 @@
#if defined(ARM_THUMB_1)
+#if defined(__OPTIMIZE__) && defined(__GNUC__)
+/*
+ * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
+ * our use of r7 below, unless -fomit-frame-pointer is passed.
+ *
+ * On the other hand, -fomit-frame-pointer is implied by any -Ox options with
+ * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
+ * clang and armcc5 under the same conditions).
+ *
+ * If gcc needs to use r7, we use r1 as a scratch register and have a few extra
+ * instructions to preserve/restore it; otherwise, we can use r7 and avoid
+ * the preserve/restore overhead.
+ */
+#define MULADDC_SCRATCH "RS .req r1 \n\t"
+#define MULADDC_PRESERVE_R1 "mov r10, r1 \n\t"
+#define MULADDC_RESTORE_R1 "mov r1, r10 \n\t"
+#define MULADDC_SCRATCH_CLOBBER "r10"
+#else
+#define MULADDC_SCRATCH "RS .req r7 \n\t"
+#define MULADDC_PRESERVE_R1 ""
+#define MULADDC_RESTORE_R1 ""
+#define MULADDC_SCRATCH_CLOBBER "r7"
+#endif
+
#define MULADDC_X1_INIT \
asm( \
+ MULADDC_SCRATCH \
"ldr r0, %3 \n\t" \
"ldr r1, %4 \n\t" \
"ldr r2, %5 \n\t" \
"ldr r3, %6 \n\t" \
- "lsr r7, r3, #16 \n\t" \
- "mov r9, r7 \n\t" \
- "lsl r7, r3, #16 \n\t" \
- "lsr r7, r7, #16 \n\t" \
- "mov r8, r7 \n\t"
+ "lsr r4, r3, #16 \n\t" \
+ "mov r9, r4 \n\t" \
+ "lsl r4, r3, #16 \n\t" \
+ "lsr r4, r4, #16 \n\t" \
+ "mov r8, r4 \n\t" \
+
#define MULADDC_X1_CORE \
+ MULADDC_PRESERVE_R1 \
"ldmia r0!, {r6} \n\t" \
- "lsr r7, r6, #16 \n\t" \
+ "lsr RS, r6, #16 \n\t" \
"lsl r6, r6, #16 \n\t" \
"lsr r6, r6, #16 \n\t" \
"mov r4, r8 \n\t" \
@@ -724,12 +742,12 @@
"mov r3, r9 \n\t" \
"mul r6, r3 \n\t" \
"mov r5, r9 \n\t" \
- "mul r5, r7 \n\t" \
+ "mul r5, RS \n\t" \
"mov r3, r8 \n\t" \
- "mul r7, r3 \n\t" \
+ "mul RS, r3 \n\t" \
"lsr r3, r6, #16 \n\t" \
"add r5, r5, r3 \n\t" \
- "lsr r3, r7, #16 \n\t" \
+ "lsr r3, RS, #16 \n\t" \
"add r5, r5, r3 \n\t" \
"add r4, r4, r2 \n\t" \
"mov r2, #0 \n\t" \
@@ -737,9 +755,10 @@
"lsl r3, r6, #16 \n\t" \
"add r4, r4, r3 \n\t" \
"adc r5, r2 \n\t" \
- "lsl r3, r7, #16 \n\t" \
+ "lsl r3, RS, #16 \n\t" \
"add r4, r4, r3 \n\t" \
"adc r5, r2 \n\t" \
+ MULADDC_RESTORE_R1 \
"ldr r3, [r1] \n\t" \
"add r4, r4, r3 \n\t" \
"adc r2, r5 \n\t" \
@@ -752,7 +771,7 @@
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r0", "r1", "r2", "r3", "r4", "r5", \
- "r6", "r7", "r8", "r9", "cc" \
+ "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \
);
#elif defined(ARM_V6_DSP)