refactor(sve): move sve operations to a lib routine

This patch moves the SVE subtract operation to a common sve library
routine and takes a callback function that does the world switch while
SVE operations are done in a loop.

The callback is invoked after z0, z1 vectors are loaded and before
the calculated results are stored back in the vector registers.

This refactoring later helps to use this function to do context switch
from NS to Secure world or from NS to Realm world based on the
callback type.

This patch also moves the SVE fill vector registers, read vector
registers to a common sve library routine.

Signed-off-by: Arunachalam Ganapathy <arunachalam.ganapathy@arm.com>
Change-Id: Iceb34b96fa85597be63a50c429ae0eb29f8fcaf8
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index 994fbfe..2fdaa55 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -33,6 +33,16 @@
 
 void sve_config_vq(uint8_t sve_vq);
 uint32_t sve_probe_vl(uint8_t sve_max_vq);
+void sve_fill_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS]);
+void sve_read_vector_regs(sve_vector_t v[SVE_NUM_VECTORS]);
+
+/* Assembly routines */
+bool sve_subtract_arrays_interleaved(int *dst_array, int *src_array1,
+				     int *src_array2, int array_size,
+				     bool (*world_switch_cb)(void));
+
+void sve_subtract_arrays(int *dst_array, int *src_array1, int *src_array2,
+			 int array_size);
 
 #ifdef __aarch64__
 
diff --git a/lib/extensions/sve/aarch64/sve.c b/lib/extensions/sve/aarch64/sve.c
index 698e78b..83f61fe 100644
--- a/lib/extensions/sve/aarch64/sve.c
+++ b/lib/extensions/sve/aarch64/sve.c
@@ -83,3 +83,87 @@
 
 	return vl_bitmap;
 }
+
+void sve_fill_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS])
+{
+	assert(is_armv8_2_sve_present());
+
+	__asm__ volatile(
+		".arch_extension sve\n"
+		fill_sve_helper(0)
+		fill_sve_helper(1)
+		fill_sve_helper(2)
+		fill_sve_helper(3)
+		fill_sve_helper(4)
+		fill_sve_helper(5)
+		fill_sve_helper(6)
+		fill_sve_helper(7)
+		fill_sve_helper(8)
+		fill_sve_helper(9)
+		fill_sve_helper(10)
+		fill_sve_helper(11)
+		fill_sve_helper(12)
+		fill_sve_helper(13)
+		fill_sve_helper(14)
+		fill_sve_helper(15)
+		fill_sve_helper(16)
+		fill_sve_helper(17)
+		fill_sve_helper(18)
+		fill_sve_helper(19)
+		fill_sve_helper(20)
+		fill_sve_helper(21)
+		fill_sve_helper(22)
+		fill_sve_helper(23)
+		fill_sve_helper(24)
+		fill_sve_helper(25)
+		fill_sve_helper(26)
+		fill_sve_helper(27)
+		fill_sve_helper(28)
+		fill_sve_helper(29)
+		fill_sve_helper(30)
+		fill_sve_helper(31)
+		".arch_extension nosve\n"
+		: : "r" (v));
+}
+
+void sve_read_vector_regs(sve_vector_t v[SVE_NUM_VECTORS])
+{
+	assert(is_armv8_2_sve_present());
+
+	__asm__ volatile(
+		".arch_extension sve\n"
+		read_sve_helper(0)
+		read_sve_helper(1)
+		read_sve_helper(2)
+		read_sve_helper(3)
+		read_sve_helper(4)
+		read_sve_helper(5)
+		read_sve_helper(6)
+		read_sve_helper(7)
+		read_sve_helper(8)
+		read_sve_helper(9)
+		read_sve_helper(10)
+		read_sve_helper(11)
+		read_sve_helper(12)
+		read_sve_helper(13)
+		read_sve_helper(14)
+		read_sve_helper(15)
+		read_sve_helper(16)
+		read_sve_helper(17)
+		read_sve_helper(18)
+		read_sve_helper(19)
+		read_sve_helper(20)
+		read_sve_helper(21)
+		read_sve_helper(22)
+		read_sve_helper(23)
+		read_sve_helper(24)
+		read_sve_helper(25)
+		read_sve_helper(26)
+		read_sve_helper(27)
+		read_sve_helper(28)
+		read_sve_helper(29)
+		read_sve_helper(30)
+		read_sve_helper(31)
+		".arch_extension nosve\n"
+		: : "r" (v));
+}
diff --git a/lib/extensions/sve/aarch64/sve_helpers.S b/lib/extensions/sve/aarch64/sve_helpers.S
new file mode 100644
index 0000000..128b350
--- /dev/null
+++ b/lib/extensions/sve/aarch64/sve_helpers.S
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+
+.global	sve_subtract_arrays_interleaved
+.global	sve_subtract_arrays
+
+#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
+
+/*
+ * Based on example code from:
+ * Arm Compiler Scalable Vector Extension User Guide Version 6.12 [1].
+ *
+ * [1] https://developer.arm.com/documentation/100891/0612/getting-started-with-the-sve-compiler/compiling-c-and-c---code-for-sve-enabled-targets
+ */
+
+/*
+ * Subtracts arrays using SVE operations with interleaved callback.
+ * dst_array = src_array_1 - src_array_2
+ * Inputs:
+ *   x0 - dst_array
+ *   x1 - src_array_1
+ *   x2 - src_array_2
+ *   x3 - array size
+ *   x4 - callback function pointer
+ * Returns:
+ *   Callback function's return value
+ */
+func sve_subtract_arrays_interleaved
+.arch_extension sve
+	stp	x29, x30, [sp, #-80]!
+	mov	x29, sp
+	stp	x19, x20, [sp, #16]
+	mov	x19, x0
+	mov	x20, x1
+	stp	x21, x22, [sp, #32]
+	mov	x21, x2
+	mov	x22, x3
+	stp	x23, x24, [sp, #48]
+	mov	x23, x4
+	mov	x24, x3
+	str	x25, [sp, #64]
+	mov	x25, 0
+
+	whilelo	p0.s, xzr, x4
+.loop:
+	ld1w	z0.s, p0/z, [x20, x25, lsl 2]
+	ld1w	z1.s, p0/z, [x21, x25, lsl 2]
+
+	/* Invoke the world switch callback */
+	blr	x23
+
+	/* Exit loop if callback returns non-zero */
+	cmp	w0, #0x0
+	bne	.exit_loop
+
+	sub	z0.s, z0.s, z1.s
+	st1w	z0.s, p0, [x19, x25, lsl 2]
+	incw	x25
+
+	whilelo	p0.s, x25, x24
+	bne	.loop
+.exit_loop:
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldr	x25, [sp, #64]
+	ldp	x29, x30, [sp], #80
+	ret
+.arch_extension nosve
+endfunc sve_subtract_arrays_interleaved
+
+/*
+ * Subtracts arrays using SVE operations.
+ * dst_array = src_array_1 - src_array_2
+ * Inputs:
+ *   x0 - dst_array
+ *   x1 - src_array_1
+ *   x2 - src_array_2
+ *   x3 - array size
+ * Returns:
+ *   none
+ */
+func sve_subtract_arrays
+.arch_extension sve
+	mov	x4, x3
+	mov	x5, 0
+	whilelo	p0.s, xzr, x3
+.sub_loop:
+	ld1w	z0.s, p0/z, [x1, x5, lsl 2]
+	ld1w	z1.s, p0/z, [x2, x5, lsl 2]
+	sub	z0.s, z0.s, z1.s
+	st1w	z0.s, p0, [x0, x5, lsl 2]
+	incw	x5
+	whilelo	p0.s, x5, x4
+	bne	.sub_loop
+	ret
+.arch_extension nosve
+endfunc sve_subtract_arrays
+
+#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
diff --git a/tftf/framework/framework.mk b/tftf/framework/framework.mk
index ab9033a..ddae823 100644
--- a/tftf/framework/framework.mk
+++ b/tftf/framework/framework.mk
@@ -88,7 +88,8 @@
 	lib/extensions/sme/aarch64/sme2.c				\
 	lib/extensions/sme/aarch64/sme_helpers.S			\
 	lib/extensions/sme/aarch64/sme2_helpers.S			\
-	lib/extensions/sve/aarch64/sve.c
+	lib/extensions/sve/aarch64/sve.c				\
+	lib/extensions/sve/aarch64/sve_helpers.S
 endif
 
 TFTF_LINKERFILE		:=	tftf/framework/tftf.ld.S
diff --git a/tftf/tests/extensions/sve/sve_operations.S b/tftf/tests/extensions/sve/sve_operations.S
deleted file mode 100644
index e528b2b..0000000
--- a/tftf/tests/extensions/sve/sve_operations.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-
-#include "./test_sve.h"
-
-#ifdef __aarch64__
-#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-
-/*
- * Based on example code from the Arm Compiler Scalable Vector Extension User
- * Guide[1].
- * [1] https://developer.arm.com/docs/100891/latest/getting-started-with-the-sve-compiler/compiling-c-and-c-code-for-sve-enabled-targets
- */
-
-	.arch armv8.2-a+crc+fp16+sve
-	.global	sve_subtract_arrays
-func sve_subtract_arrays
-	mov	x4, SVE_ARRAYSIZE
-	mov	x5, x4
-	mov	x3, 0
-	whilelo	p0.s, xzr, x4
-.loop:
-	ld1w	z0.s, p0/z, [x1, x3, lsl 2]
-	ld1w	z1.s, p0/z, [x2, x3, lsl 2]
-	sub	z0.s, z0.s, z1.s
-	st1w	z0.s, p0, [x0, x3, lsl 2]
-	incw	x3
-	whilelo	p0.s, x3, x5
-	bne	.loop
-	ret
-endfunc sve_subtract_arrays
-
-#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
-#endif /* __aarch64__ */
diff --git a/tftf/tests/extensions/sve/test_sve.c b/tftf/tests/extensions/sve/test_sve.c
index eabc0de..68ab775 100644
--- a/tftf/tests/extensions/sve/test_sve.c
+++ b/tftf/tests/extensions/sve/test_sve.c
@@ -15,9 +15,6 @@
 
 #if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
 
-extern void sve_subtract_arrays(int *difference, const int *sve_op_1,
-				const int *sve_op_2);
-
 static int sve_difference[SVE_ARRAYSIZE];
 static int sve_op_1[SVE_ARRAYSIZE];
 static int sve_op_2[SVE_ARRAYSIZE];
@@ -43,7 +40,7 @@
 	}
 
 	/* Perform SVE operations */
-	sve_subtract_arrays(sve_difference, sve_op_1, sve_op_2);
+	sve_subtract_arrays(sve_difference, sve_op_1, sve_op_2, SVE_ARRAYSIZE);
 
 	return TEST_RESULT_SUCCESS;
 }
diff --git a/tftf/tests/runtime_services/secure_service/spm_common.c b/tftf/tests/runtime_services/secure_service/spm_common.c
index 60b77b8..0e1c694 100644
--- a/tftf/tests/runtime_services/secure_service/spm_common.c
+++ b/tftf/tests/runtime_services/secure_service/spm_common.c
@@ -103,90 +103,6 @@
 		ret.arg7);
 }
 
-void fill_sve_vector_regs(const sve_vector_t v[SVE_NUM_VECTORS])
-{
-#ifdef __aarch64__
-	__asm__ volatile(
-		".arch_extension sve\n"
-		fill_sve_helper(0)
-		fill_sve_helper(1)
-		fill_sve_helper(2)
-		fill_sve_helper(3)
-		fill_sve_helper(4)
-		fill_sve_helper(5)
-		fill_sve_helper(6)
-		fill_sve_helper(7)
-		fill_sve_helper(8)
-		fill_sve_helper(9)
-		fill_sve_helper(10)
-		fill_sve_helper(11)
-		fill_sve_helper(12)
-		fill_sve_helper(13)
-		fill_sve_helper(14)
-		fill_sve_helper(15)
-		fill_sve_helper(16)
-		fill_sve_helper(17)
-		fill_sve_helper(18)
-		fill_sve_helper(19)
-		fill_sve_helper(20)
-		fill_sve_helper(21)
-		fill_sve_helper(22)
-		fill_sve_helper(23)
-		fill_sve_helper(24)
-		fill_sve_helper(25)
-		fill_sve_helper(26)
-		fill_sve_helper(27)
-		fill_sve_helper(28)
-		fill_sve_helper(29)
-		fill_sve_helper(30)
-		fill_sve_helper(31)
-		".arch_extension nosve\n"
-		: : "r" (v));
-#endif
-}
-
-void read_sve_vector_regs(sve_vector_t v[SVE_NUM_VECTORS])
-{
-#ifdef __aarch64__
-	__asm__ volatile(
-		".arch_extension sve\n"
-		read_sve_helper(0)
-		read_sve_helper(1)
-		read_sve_helper(2)
-		read_sve_helper(3)
-		read_sve_helper(4)
-		read_sve_helper(5)
-		read_sve_helper(6)
-		read_sve_helper(7)
-		read_sve_helper(8)
-		read_sve_helper(9)
-		read_sve_helper(10)
-		read_sve_helper(11)
-		read_sve_helper(12)
-		read_sve_helper(13)
-		read_sve_helper(14)
-		read_sve_helper(15)
-		read_sve_helper(16)
-		read_sve_helper(17)
-		read_sve_helper(18)
-		read_sve_helper(19)
-		read_sve_helper(20)
-		read_sve_helper(21)
-		read_sve_helper(22)
-		read_sve_helper(23)
-		read_sve_helper(24)
-		read_sve_helper(25)
-		read_sve_helper(26)
-		read_sve_helper(27)
-		read_sve_helper(28)
-		read_sve_helper(29)
-		read_sve_helper(30)
-		read_sve_helper(31)
-		".arch_extension nosve\n"
-		: : "r" (v));
-#endif
-}
-
 /*
  * check_spmc_execution_level
  *
diff --git a/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S b/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S
deleted file mode 100644
index f538b2c..0000000
--- a/tftf/tests/runtime_services/secure_service/sve_operations_cactus.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2022, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-
-#ifdef __aarch64__
-#if __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0)
-
-#define SVE_ARRAYSIZE 1024
-
-/*
- * Based on example code from the Arm Compiler Scalable Vector Extension User
- * Guide[1].
- * [1] https://developer.arm.com/docs/100891/latest/getting-started-with-the-sve-compiler/compiling-c-and-c-code-for-sve-enabled-targets
- */
-
-	.arch armv8.2-a+crc+fp16+sve
-	.global	sve_subtract_interleaved_smc
-func sve_subtract_interleaved_smc
-	mov	x4, SVE_ARRAYSIZE
-	mov	x5, x4
-	mov	x3, 0
-	whilelo	p0.s, xzr, x4
-.loop:
-	ld1w	z0.s, p0/z, [x1, x3, lsl 2]
-	ld1w	z1.s, p0/z, [x2, x3, lsl 2]
-	sub	z0.s, z0.s, z1.s
-	st1w	z0.s, p0, [x0, x3, lsl 2]
-	incw	x3
-
-	stp x0, x1, [sp, #-48]!
-	stp x2, x3, [sp, #16]
-	stp x4, x5, [sp, #32]
-
-	/*
-	 * Forge a FF-A direct request with a command for cactus to fill SIMD
-	 * vectors in the secure world.
-	 */
-	mov w0, #0x6f                   /* FFA_MSG_SEND_DIRECT_REQ_SMC32 */
-	movk w0, #0x8400, lsl #16
-	mov     x1, #0x8001             /* src: nwd, dest: SP1 */
-	mov     x2, xzr
-	mov     x3, #0x4d44
-	movk    w3, #0x5349, lsl #16    /* CACTUS_REQ_SIMD_FILL_CMD */
-	smc     #0
-	and     w1, w0, #0xffff
-	cmp     w1, #0x70               /* FFA_MSG_SEND_DIRECT_RESP_SMC32 (low 16bits) */
-	bne     .			/* Test hangs if direct response not received */
-	cmp	w3, #0x0		/* Check CACTUS_SUCCESS (0x0) returned */
-	bne	.
-	ldp     x4, x5, [sp, #32]
-	ldp     x2, x3, [sp, #16]
-	ldp     x0, x1, [sp], #48
-
-	whilelo	p0.s, x3, x5
-	bne	.loop
-	ret
-endfunc sve_subtract_interleaved_smc
-
-#endif /* __GNUC__ > 8 || (__GNUC__ == 8 && __GNUC_MINOR__ > 0) */
-#endif /* __aarch64__ */
diff --git a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
index 8cb54f7..8f090a2 100644
--- a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
+++ b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
@@ -9,17 +9,15 @@
 #include <ffa_helpers.h>
 #include <fpu.h>
 #include <test_helpers.h>
+#include <lib/extensions/sve.h>
 
 #define SENDER HYP_ID
 #define RECEIVER SP_ID(1)
 #define SVE_TEST_ITERATIONS	100
-#define SVE_ARRAYSIZE		1024
+#define NS_SVE_OP_ARRAYSIZE		1024
 
 static const struct ffa_uuid expected_sp_uuids[] = { {PRIMARY_UUID} };
 
-extern void sve_subtract_interleaved_smc(int *difference, const int *sve_op_1,
-				       const int *sve_op_2);
-
 static test_result_t fp_vector_compare(uint8_t *a, uint8_t *b,
 	size_t vector_size, uint8_t vectors_num)
 {
@@ -31,8 +29,8 @@
 
 static sve_vector_t sve_vectors_input[SVE_NUM_VECTORS] __aligned(16);
 static sve_vector_t sve_vectors_output[SVE_NUM_VECTORS] __aligned(16);
-static int sve_op_1[SVE_ARRAYSIZE];
-static int sve_op_2[SVE_ARRAYSIZE];
+static int sve_op_1[NS_SVE_OP_ARRAYSIZE];
+static int sve_op_2[NS_SVE_OP_ARRAYSIZE];
 static fpu_reg_state_t g_fpu_template;
 
 /*
@@ -114,7 +112,7 @@
 	}
 
 	/* Fill SVE vector registers with the buffer contents prepared above. */
-	fill_sve_vector_regs(sve_vectors_input);
+	sve_fill_vector_regs(sve_vectors_input);
 
 	/*
 	 * Call cactus secure partition which uses SIMD (and expect it doesn't
@@ -131,7 +129,7 @@
 	}
 
 	/* Get the SVE vectors state after returning to normal world. */
-	read_sve_vector_regs(sve_vectors_output);
+	sve_read_vector_regs(sve_vectors_output);
 
 	/* Compare to state before calling into secure world. */
 	return fp_vector_compare((uint8_t *)sve_vectors_input,
@@ -140,12 +138,36 @@
 }
 
 /*
+ * Sends SIMD fill command to Cactus SP
+ * Returns:
+ *	false - On success
+ *	true  - On failure
+ */
+#ifdef __aarch64__
+static bool callback_enter_cactus_sp(void)
+{
+	struct ffa_value ret = cactus_req_simd_fill_send_cmd(SENDER, RECEIVER);
+
+	if (!is_ffa_direct_response(ret)) {
+		return true;
+	}
+
+	if (cactus_get_response(ret) == CACTUS_ERROR) {
+		return true;
+	}
+
+	return false;
+}
+#endif /* __aarch64__ */
+
+/*
  * Tests that SVE vector operations in normal world are not affected by context
  * switches between normal world and the secure world.
  */
 test_result_t test_sve_vectors_operations(void)
 {
 	unsigned int val;
+	bool cb_err;
 
 	SKIP_TEST_IF_SVE_NOT_SUPPORTED();
 
@@ -156,7 +178,7 @@
 
 	val = 2 * SVE_TEST_ITERATIONS;
 
-	for (unsigned int i = 0; i < SVE_ARRAYSIZE; i++) {
+	for (unsigned int i = 0; i < NS_SVE_OP_ARRAYSIZE; i++) {
 		sve_op_1[i] = val;
 		sve_op_2[i] = 1;
 	}
@@ -167,11 +189,19 @@
 
 	for (unsigned int i = 0; i < SVE_TEST_ITERATIONS; i++) {
 		/* Perform SVE operations with intermittent calls to Swd. */
-		sve_subtract_interleaved_smc(sve_op_1, sve_op_1, sve_op_2);
+		cb_err = sve_subtract_arrays_interleaved(sve_op_1, sve_op_1,
+							 sve_op_2,
+							 NS_SVE_OP_ARRAYSIZE,
+							 &callback_enter_cactus_sp);
+		if (cb_err == true) {
+			ERROR("Callback to Cactus SP failed\n");
+			return TEST_RESULT_FAIL;
+		}
+
 	}
 
 	/* Check result of SVE operations. */
-	for (unsigned int i = 0; i < SVE_ARRAYSIZE; i++) {
+	for (unsigned int i = 0; i < NS_SVE_OP_ARRAYSIZE; i++) {
 		if (sve_op_1[i] != (val - SVE_TEST_ITERATIONS)) {
 			return TEST_RESULT_FAIL;
 		}
diff --git a/tftf/tests/tests-cpu-extensions.mk b/tftf/tests/tests-cpu-extensions.mk
index f838b4b..0b1839a 100644
--- a/tftf/tests/tests-cpu-extensions.mk
+++ b/tftf/tests/tests-cpu-extensions.mk
@@ -13,7 +13,6 @@
 	extensions/pmuv3/test_pmuv3.c					\
 	extensions/mte/test_mte.c					\
 	extensions/pauth/test_pauth.c					\
-	extensions/sve/sve_operations.S					\
 	extensions/sme/test_sme.c					\
 	extensions/sme/test_sme2.c					\
 	extensions/spe/test_spe.c					\
diff --git a/tftf/tests/tests-spm.mk b/tftf/tests/tests-spm.mk
index 737c4cb..c0a7eb0 100644
--- a/tftf/tests/tests-spm.mk
+++ b/tftf/tests/tests-spm.mk
@@ -27,7 +27,6 @@
 TESTS_SOURCES   +=                                                      \
         $(addprefix tftf/tests/runtime_services/secure_service/,        \
 	  test_spm_cpu_features.c					\
-	  sve_operations_cactus.S					\
 	 )
 
 TESTS_SOURCES	+=							\