refactor(sve): move sve operations to a lib routine
This patch moves the SVE subtract operation to a common sve library
routine and takes a callback function that does the world switch while
SVE operations are done in a loop.
The callback is invoked after z0, z1 vectors are loaded and before
the calculated results are stored back in the vector registers.
This refactoring later helps to use this function to do context switch
from NS to Secure world or from NS to Realm world based on the
callback type.
This patch also moves the SVE fill vector registers, read vector
registers to a common sve library routine.
Signed-off-by: Arunachalam Ganapathy <arunachalam.ganapathy@arm.com>
Change-Id: Iceb34b96fa85597be63a50c429ae0eb29f8fcaf8
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index 994fbfe..2fdaa55 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -33,6 +33,16 @@
void sve_config_vq(uint8_t sve_vq);
uint32_t sve_probe_vl(uint8_t sve_max_vq);
+void sve_fill_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS]);
+void sve_read_vector_regs(sve_vector_t v[SVE_NUM_VECTORS]);
+
+/* Assembly routines */
+bool sve_subtract_arrays_interleaved(int *dst_array, int *src_array1,
+ int *src_array2, int array_size,
+ bool (*world_switch_cb)(void));
+
+void sve_subtract_arrays(int *dst_array, int *src_array1, int *src_array2,
+ int array_size);
#ifdef __aarch64__
diff --git a/lib/extensions/sve/aarch64/sve.c b/lib/extensions/sve/aarch64/sve.c
index 698e78b..83f61fe 100644
--- a/lib/extensions/sve/aarch64/sve.c
+++ b/lib/extensions/sve/aarch64/sve.c
@@ -83,3 +83,87 @@
return vl_bitmap;
}
+
+void sve_fill_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS])
+{
+ assert(is_armv8_2_sve_present());
+
+ __asm__ volatile(
+ ".arch_extension sve\n"
+ fill_sve_helper(0)
+ fill_sve_helper(1)
+ fill_sve_helper(2)
+ fill_sve_helper(3)
+ fill_sve_helper(4)
+ fill_sve_helper(5)
+ fill_sve_helper(6)
+ fill_sve_helper(7)
+ fill_sve_helper(8)
+ fill_sve_helper(9)
+ fill_sve_helper(10)
+ fill_sve_helper(11)
+ fill_sve_helper(12)
+ fill_sve_helper(13)
+ fill_sve_helper(14)
+ fill_sve_helper(15)
+ fill_sve_helper(16)
+ fill_sve_helper(17)
+ fill_sve_helper(18)
+ fill_sve_helper(19)
+ fill_sve_helper(20)
+ fill_sve_helper(21)
+ fill_sve_helper(22)
+ fill_sve_helper(23)
+ fill_sve_helper(24)
+ fill_sve_helper(25)
+ fill_sve_helper(26)
+ fill_sve_helper(27)
+ fill_sve_helper(28)
+ fill_sve_helper(29)
+ fill_sve_helper(30)
+ fill_sve_helper(31)
+ ".arch_extension nosve\n"
+ : : "r" (v));
+}
+
+void sve_read_vector_regs(sve_vector_t v[SVE_NUM_VECTORS])
+{
+ assert(is_armv8_2_sve_present());
+
+ __asm__ volatile(
+ ".arch_extension sve\n"
+ read_sve_helper(0)
+ read_sve_helper(1)
+ read_sve_helper(2)
+ read_sve_helper(3)
+ read_sve_helper(4)
+ read_sve_helper(5)
+ read_sve_helper(6)
+ read_sve_helper(7)
+ read_sve_helper(8)
+ read_sve_helper(9)
+ read_sve_helper(10)
+ read_sve_helper(11)
+ read_sve_helper(12)
+ read_sve_helper(13)
+ read_sve_helper(14)
+ read_sve_helper(15)
+ read_sve_helper(16)
+ read_sve_helper(17)
+ read_sve_helper(18)
+ read_sve_helper(19)
+ read_sve_helper(20)
+ read_sve_helper(21)
+ read_sve_helper(22)
+ read_sve_helper(23)
+ read_sve_helper(24)
+ read_sve_helper(25)
+ read_sve_helper(26)
+ read_sve_helper(27)
+ read_sve_helper(28)
+ read_sve_helper(29)
+ read_sve_helper(30)
+ read_sve_helper(31)
+ ".arch_extension nosve\n"
+ : : "r" (v));
+}
diff --git a/lib/extensions/sve/aarch64/sve_helpers.S b/lib/extensions/sve/aarch64/sve_helpers.S
new file mode 100644
index 0000000..128b350
--- /dev/null
+++ b/lib/extensions/sve/aarch64/sve_helpers.S
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+
+.global sve_subtract_arrays_interleaved
+.global sve_subtract_arrays
+
+#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
+
+/*
+ * Based on example code from:
+ * Arm Compiler Scalable Vector Extension User Guide Version 6.12 [1].
+ *
+ * [1] https://developer.arm.com/documentation/100891/0612/getting-started-with-the-sve-compiler/compiling-c-and-c---code-for-sve-enabled-targets
+ */
+
+/*
+ * Subtracts arrays using SVE operations with interleaved callback.
+ * dst_array = src_array_1 - src_array_2
+ * Inputs:
+ * x0 - dst_array
+ * x1 - src_array_1
+ * x2 - src_array_2
+ * x3 - array size
+ * x4 - callback function pointer
+ * Returns:
+ * Callback function's return value
+ */
+func sve_subtract_arrays_interleaved
+.arch_extension sve
+ stp x29, x30, [sp, #-80]!
+ mov x29, sp
+ stp x19, x20, [sp, #16]
+ mov x19, x0
+ mov x20, x1
+ stp x21, x22, [sp, #32]
+ mov x21, x2
+ mov x22, x3
+ stp x23, x24, [sp, #48]
+ mov x23, x4
+ mov x24, x3
+ str x25, [sp, #64]
+ mov x25, 0
+
+ whilelo p0.s, xzr, x4
+.loop:
+ ld1w z0.s, p0/z, [x20, x25, lsl 2]
+ ld1w z1.s, p0/z, [x21, x25, lsl 2]
+
+ /* Invoke the world switch callback */
+ blr x23
+
+ /* Exit loop if callback returns non-zero */
+ cmp w0, #0x0
+ bne .exit_loop
+
+ sub z0.s, z0.s, z1.s
+ st1w z0.s, p0, [x19, x25, lsl 2]
+ incw x25
+
+ whilelo p0.s, x25, x24
+ bne .loop
+.exit_loop:
+ ldp x19, x20, [sp, #16]
+ ldp x21, x22, [sp, #32]
+ ldp x23, x24, [sp, #48]
+ ldr x25, [sp, #64]
+ ldp x29, x30, [sp], #80
+ ret
+.arch_extension nosve
+endfunc sve_subtract_arrays_interleaved
+
+/*
+ * Subtracts arrays using SVE operations.
+ * dst_array = src_array_1 - src_array_2
+ * Inputs:
+ * x0 - dst_array
+ * x1 - src_array_1
+ * x2 - src_array_2
+ * x3 - array size
+ * Returns:
+ * none
+ */
+func sve_subtract_arrays
+.arch_extension sve
+ mov x4, x3
+ mov x5, 0
+ whilelo p0.s, xzr, x3
+.sub_loop:
+ ld1w z0.s, p0/z, [x1, x5, lsl 2]
+ ld1w z1.s, p0/z, [x2, x5, lsl 2]
+ sub z0.s, z0.s, z1.s
+ st1w z0.s, p0, [x0, x5, lsl 2]
+ incw x5
+ whilelo p0.s, x5, x4
+ bne .sub_loop
+ ret
+.arch_extension nosve
+endfunc sve_subtract_arrays
+
+#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
diff --git a/tftf/framework/framework.mk b/tftf/framework/framework.mk
index ab9033a..ddae823 100644
--- a/tftf/framework/framework.mk
+++ b/tftf/framework/framework.mk
@@ -88,7 +88,8 @@
lib/extensions/sme/aarch64/sme2.c \
lib/extensions/sme/aarch64/sme_helpers.S \
lib/extensions/sme/aarch64/sme2_helpers.S \
- lib/extensions/sve/aarch64/sve.c
+ lib/extensions/sve/aarch64/sve.c \
+ lib/extensions/sve/aarch64/sve_helpers.S
endif
TFTF_LINKERFILE := tftf/framework/tftf.ld.S
diff --git a/tftf/tests/extensions/sve/sve_operations.S b/tftf/tests/extensions/sve/sve_operations.S
deleted file mode 100644
index e528b2b..0000000
--- a/tftf/tests/extensions/sve/sve_operations.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-
-#include "./test_sve.h"
-
-#ifdef __aarch64__
-#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-
-/*
- * Based on example code from the Arm Compiler Scalable Vector Extension User
- * Guide[1].
- * [1] https://developer.arm.com/docs/100891/latest/getting-started-with-the-sve-compiler/compiling-c-and-c-code-for-sve-enabled-targets
- */
-
- .arch armv8.2-a+crc+fp16+sve
- .global sve_subtract_arrays
-func sve_subtract_arrays
- mov x4, SVE_ARRAYSIZE
- mov x5, x4
- mov x3, 0
- whilelo p0.s, xzr, x4
-.loop:
- ld1w z0.s, p0/z, [x1, x3, lsl 2]
- ld1w z1.s, p0/z, [x2, x3, lsl 2]
- sub z0.s, z0.s, z1.s
- st1w z0.s, p0, [x0, x3, lsl 2]
- incw x3
- whilelo p0.s, x3, x5
- bne .loop
- ret
-endfunc sve_subtract_arrays
-
-#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
-#endif /* __aarch64__ */
diff --git a/tftf/tests/extensions/sve/test_sve.c b/tftf/tests/extensions/sve/test_sve.c
index eabc0de..68ab775 100644
--- a/tftf/tests/extensions/sve/test_sve.c
+++ b/tftf/tests/extensions/sve/test_sve.c
@@ -15,9 +15,6 @@
#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-extern void sve_subtract_arrays(int *difference, const int *sve_op_1,
- const int *sve_op_2);
-
static int sve_difference[SVE_ARRAYSIZE];
static int sve_op_1[SVE_ARRAYSIZE];
static int sve_op_2[SVE_ARRAYSIZE];
@@ -43,7 +40,7 @@
}
/* Perform SVE operations */
- sve_subtract_arrays(sve_difference, sve_op_1, sve_op_2);
+ sve_subtract_arrays(sve_difference, sve_op_1, sve_op_2, SVE_ARRAYSIZE);
return TEST_RESULT_SUCCESS;
}
diff --git a/tftf/tests/runtime_services/secure_service/spm_common.c b/tftf/tests/runtime_services/secure_service/spm_common.c
index 60b77b8..0e1c694 100644
--- a/tftf/tests/runtime_services/secure_service/spm_common.c
+++ b/tftf/tests/runtime_services/secure_service/spm_common.c
@@ -103,90 +103,6 @@
ret.arg7);
}
-void fill_sve_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS])
-{
-#ifdef __aarch64__
- __asm__ volatile(
- ".arch_extension sve\n"
- fill_sve_helper(0)
- fill_sve_helper(1)
- fill_sve_helper(2)
- fill_sve_helper(3)
- fill_sve_helper(4)
- fill_sve_helper(5)
- fill_sve_helper(6)
- fill_sve_helper(7)
- fill_sve_helper(8)
- fill_sve_helper(9)
- fill_sve_helper(10)
- fill_sve_helper(11)
- fill_sve_helper(12)
- fill_sve_helper(13)
- fill_sve_helper(14)
- fill_sve_helper(15)
- fill_sve_helper(16)
- fill_sve_helper(17)
- fill_sve_helper(18)
- fill_sve_helper(19)
- fill_sve_helper(20)
- fill_sve_helper(21)
- fill_sve_helper(22)
- fill_sve_helper(23)
- fill_sve_helper(24)
- fill_sve_helper(25)
- fill_sve_helper(26)
- fill_sve_helper(27)
- fill_sve_helper(28)
- fill_sve_helper(29)
- fill_sve_helper(30)
- fill_sve_helper(31)
- ".arch_extension nosve\n"
- : : "r" (v));
-#endif
-}
-
-void read_sve_vector_regs(sve_vector_t v[SVE_NUM_VECTORS])
-{
-#ifdef __aarch64__
- __asm__ volatile(
- ".arch_extension sve\n"
- read_sve_helper(0)
- read_sve_helper(1)
- read_sve_helper(2)
- read_sve_helper(3)
- read_sve_helper(4)
- read_sve_helper(5)
- read_sve_helper(6)
- read_sve_helper(7)
- read_sve_helper(8)
- read_sve_helper(9)
- read_sve_helper(10)
- read_sve_helper(11)
- read_sve_helper(12)
- read_sve_helper(13)
- read_sve_helper(14)
- read_sve_helper(15)
- read_sve_helper(16)
- read_sve_helper(17)
- read_sve_helper(18)
- read_sve_helper(19)
- read_sve_helper(20)
- read_sve_helper(21)
- read_sve_helper(22)
- read_sve_helper(23)
- read_sve_helper(24)
- read_sve_helper(25)
- read_sve_helper(26)
- read_sve_helper(27)
- read_sve_helper(28)
- read_sve_helper(29)
- read_sve_helper(30)
- read_sve_helper(31)
- ".arch_extension nosve\n"
- : : "r" (v));
-#endif
-}
-
/*
* check_spmc_execution_level
*
diff --git a/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S b/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S
deleted file mode 100644
index f538b2c..0000000
--- a/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2022, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-
-#ifdef __aarch64__
-#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-
-#define SVE_ARRAYSIZE 1024
-
-/*
- * Based on example code from the Arm Compiler Scalable Vector Extension User
- * Guide[1].
- * [1] https://developer.arm.com/docs/100891/latest/getting-started-with-the-sve-compiler/compiling-c-and-c-code-for-sve-enabled-targets
- */
-
- .arch armv8.2-a+crc+fp16+sve
- .global sve_subtract_interleaved_smc
-func sve_subtract_interleaved_smc
- mov x4, SVE_ARRAYSIZE
- mov x5, x4
- mov x3, 0
- whilelo p0.s, xzr, x4
-.loop:
- ld1w z0.s, p0/z, [x1, x3, lsl 2]
- ld1w z1.s, p0/z, [x2, x3, lsl 2]
- sub z0.s, z0.s, z1.s
- st1w z0.s, p0, [x0, x3, lsl 2]
- incw x3
-
- stp x0, x1, [sp, #-48]!
- stp x2, x3, [sp, #16]
- stp x4, x5, [sp, #32]
-
- /*
- * Forge a FF-A direct request with a command for cactus to fill SIMD
- * vectors in the secure world.
- */
- mov w0, #0x6f /* FFA_MSG_SEND_DIRECT_REQ_SMC32 */
- movk w0, #0x8400, lsl #16
- mov x1, #0x8001 /* src: nwd, dest: SP1 */
- mov x2, xzr
- mov x3, #0x4d44
- movk w3, #0x5349, lsl #16 /* CACTUS_REQ_SIMD_FILL_CMD */
- smc #0
- and w1, w0, #0xffff
- cmp w1, #0x70 /* FFA_MSG_SEND_DIRECT_RESP_SMC32 (low 16bits) */
- bne . /* Test hangs if direct response not received */
- cmp w3, #0x0 /* Check CACTUS_SUCCESS (0x0) returned */
- bne .
- ldp x4, x5, [sp, #32]
- ldp x2, x3, [sp, #16]
- ldp x0, x1, [sp], #48
-
- whilelo p0.s, x3, x5
- bne .loop
- ret
-endfunc sve_subtract_interleaved_smc
-
-#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
-#endif /* __aarch64__ */
diff --git a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
index 8cb54f7..8f090a2 100644
--- a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
+++ b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
@@ -9,17 +9,15 @@
#include <ffa_helpers.h>
#include <fpu.h>
#include <test_helpers.h>
+#include <lib/extensions/sve.h>
#define SENDER HYP_ID
#define RECEIVER SP_ID(1)
#define SVE_TEST_ITERATIONS 100
-#define SVE_ARRAYSIZE 1024
+#define NS_SVE_OP_ARRAYSIZE 1024
static const struct ffa_uuid expected_sp_uuids[] = { {PRIMARY_UUID} };
-extern void sve_subtract_interleaved_smc(int *difference, const int *sve_op_1,
- const int *sve_op_2);
-
static test_result_t fp_vector_compare(uint8_t *a, uint8_t *b,
size_t vector_size, uint8_t vectors_num)
{
@@ -31,8 +29,8 @@
static sve_vector_t sve_vectors_input[SVE_NUM_VECTORS] __aligned(16);
static sve_vector_t sve_vectors_output[SVE_NUM_VECTORS] __aligned(16);
-static int sve_op_1[SVE_ARRAYSIZE];
-static int sve_op_2[SVE_ARRAYSIZE];
+static int sve_op_1[NS_SVE_OP_ARRAYSIZE];
+static int sve_op_2[NS_SVE_OP_ARRAYSIZE];
static fpu_reg_state_t g_fpu_template;
/*
@@ -114,7 +112,7 @@
}
/* Fill SVE vector registers with the buffer contents prepared above. */
- fill_sve_vector_regs(sve_vectors_input);
+ sve_fill_vector_regs(sve_vectors_input);
/*
* Call cactus secure partition which uses SIMD (and expect it doesn't
@@ -131,7 +129,7 @@
}
/* Get the SVE vectors state after returning to normal world. */
- read_sve_vector_regs(sve_vectors_output);
+ sve_read_vector_regs(sve_vectors_output);
/* Compare to state before calling into secure world. */
return fp_vector_compare((uint8_t *)sve_vectors_input,
@@ -140,12 +138,36 @@
}
/*
+ * Sends SIMD fill command to Cactus SP
+ * Returns:
+ * false - On success
+ * true - On failure
+ */
+#ifdef __aarch64__
+static bool callback_enter_cactus_sp(void)
+{
+ struct ffa_value ret = cactus_req_simd_fill_send_cmd(SENDER, RECEIVER);
+
+ if (!is_ffa_direct_response(ret)) {
+ return true;
+ }
+
+ if (cactus_get_response(ret) == CACTUS_ERROR) {
+ return true;
+ }
+
+ return false;
+}
+#endif /* __aarch64__ */
+
+/*
* Tests that SVE vector operations in normal world are not affected by context
* switches between normal world and the secure world.
*/
test_result_t test_sve_vectors_operations(void)
{
unsigned int val;
+ bool cb_err;
SKIP_TEST_IF_SVE_NOT_SUPPORTED();
@@ -156,7 +178,7 @@
val = 2 * SVE_TEST_ITERATIONS;
- for (unsigned int i = 0; i < SVE_ARRAYSIZE; i++) {
+ for (unsigned int i = 0; i < NS_SVE_OP_ARRAYSIZE; i++) {
sve_op_1[i] = val;
sve_op_2[i] = 1;
}
@@ -167,11 +189,19 @@
for (unsigned int i = 0; i < SVE_TEST_ITERATIONS; i++) {
/* Perform SVE operations with intermittent calls to Swd. */
- sve_subtract_interleaved_smc(sve_op_1, sve_op_1, sve_op_2);
+ cb_err = sve_subtract_arrays_interleaved(sve_op_1, sve_op_1,
+ sve_op_2,
+ NS_SVE_OP_ARRAYSIZE,
+ &callback_enter_cactus_sp);
+ if (cb_err == true) {
+ ERROR("Callback to Cactus SP failed\n");
+ return TEST_RESULT_FAIL;
+ }
+
}
/* Check result of SVE operations. */
- for (unsigned int i = 0; i < SVE_ARRAYSIZE; i++) {
+ for (unsigned int i = 0; i < NS_SVE_OP_ARRAYSIZE; i++) {
if (sve_op_1[i] != (val - SVE_TEST_ITERATIONS)) {
return TEST_RESULT_FAIL;
}
diff --git a/tftf/tests/tests-cpu-extensions.mk b/tftf/tests/tests-cpu-extensions.mk
index f838b4b..0b1839a 100644
--- a/tftf/tests/tests-cpu-extensions.mk
+++ b/tftf/tests/tests-cpu-extensions.mk
@@ -13,7 +13,6 @@
extensions/pmuv3/test_pmuv3.c \
extensions/mte/test_mte.c \
extensions/pauth/test_pauth.c \
- extensions/sve/sve_operations.S \
extensions/sme/test_sme.c \
extensions/sme/test_sme2.c \
extensions/spe/test_spe.c \
diff --git a/tftf/tests/tests-spm.mk b/tftf/tests/tests-spm.mk
index 737c4cb..c0a7eb0 100644
--- a/tftf/tests/tests-spm.mk
+++ b/tftf/tests/tests-spm.mk
@@ -27,7 +27,6 @@
TESTS_SOURCES += \
$(addprefix tftf/tests/runtime_services/secure_service/, \
test_spm_cpu_features.c \
- sve_operations_cactus.S \
)
TESTS_SOURCES += \