feat(fpu): add helper routines to read, write, compare FPU registers

Add helper routines to read, write, write_rand and compare FPU state
and FPU control/status registers.

These helper routines can be called by testcases running in NS-EL2,
R-EL1, S-EL1 payload. The caller has to pass memory to read/write FPU
registers.

Signed-off-by: Arunachalam Ganapathy <arunachalam.ganapathy@arm.com>
Change-Id: I10ae5487c9f58e46434c1bd5b42fd458ec755045
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 39461d5..4b9c33e 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -614,8 +614,9 @@
 /* FEAT_HCX HCRX_EL2 */
 DEFINE_RENAME_SYSREG_RW_FUNCS(hcrx_el2, HCRX_EL2)
 
-/* Control floating point behaviour */
+/* Floating point control and status register */
 DEFINE_RENAME_SYSREG_RW_FUNCS(fpcr, FPCR)
+DEFINE_RENAME_SYSREG_RW_FUNCS(fpsr, FPSR)
 
 /* ID_AA64ISAR2_EL1 */
 DEFINE_RENAME_SYSREG_READ_FUNC(id_aa64isar2_el1, ID_AA64ISAR2_EL1)
diff --git a/include/lib/extensions/fpu.h b/include/lib/extensions/fpu.h
index d7b4f99..1a82818 100644
--- a/include/lib/extensions/fpu.h
+++ b/include/lib/extensions/fpu.h
@@ -11,44 +11,35 @@
 #define FPU_Q_SIZE		16U
 #define FPU_Q_COUNT		32U
 
-/* These defines are needed by assembly code to access FPU registers. */
-#define FPU_OFFSET_Q		0U
-#define FPU_OFFSET_FPSR		(FPU_Q_SIZE * FPU_Q_COUNT)
-#define FPU_OFFSET_FPCR		(FPU_OFFSET_FPSR + 8)
-
 #ifndef __ASSEMBLER__
 
 #include <stdbool.h>
 #include <stdint.h>
 
-typedef struct fpu_reg_state {
-	uint8_t q[FPU_Q_COUNT][FPU_Q_SIZE];
-	unsigned long fpsr;
+typedef uint8_t fpu_q_reg_t[FPU_Q_SIZE] __aligned(16);
+typedef struct fpu_cs_regs {
 	unsigned long fpcr;
-} fpu_reg_state_t __aligned(16);
+	unsigned long fpsr;
+} fpu_cs_regs_t __aligned(16);
 
-/*
- * Read and compare FPU state registers with provided template values in parameters.
- */
-bool fpu_state_compare_template(fpu_reg_state_t *fpu);
+typedef struct fpu_state {
+	fpu_q_reg_t q_regs[FPU_Q_COUNT];
+	fpu_cs_regs_t cs_regs;
+} fpu_state_t __aligned(16);
 
-/*
- * Fill the template with random values and copy it to
- * FPU state registers(SIMD vectors, FPCR, FPSR).
- */
-void fpu_state_fill_regs_and_template(fpu_reg_state_t *fpu);
+void fpu_cs_regs_write(const fpu_cs_regs_t *cs_regs);
+void fpu_cs_regs_write_rand(fpu_cs_regs_t *cs_regs);
+void fpu_cs_regs_read(fpu_cs_regs_t *cs_regs);
+int fpu_cs_regs_compare(const fpu_cs_regs_t *s1, const fpu_cs_regs_t *s2);
 
-/*
- * This function populates the provided FPU structure with the provided template
- * regs_val for all the 32 FPU/SMID registers, and the status registers FPCR/FPSR
- */
-void fpu_state_set(fpu_reg_state_t *vec,
-		uint8_t regs_val);
+void fpu_q_regs_write_rand(fpu_q_reg_t q_regs[FPU_Q_COUNT]);
+void fpu_q_regs_read(fpu_q_reg_t q_regs[FPU_Q_COUNT]);
+int fpu_q_regs_compare(const fpu_q_reg_t s1[FPU_Q_COUNT],
+		       const fpu_q_reg_t s2[FPU_Q_COUNT]);
 
-/*
- * This function prints the content of the provided FPU structure
- */
-void fpu_state_print(fpu_reg_state_t *vec);
+void fpu_state_write_rand(fpu_state_t *fpu_state);
+void fpu_state_read(fpu_state_t *fpu_state);
+int fpu_state_compare(const fpu_state_t *s1, const fpu_state_t *s2);
 
 #endif /* __ASSEMBLER__ */
 #endif /* FPU_H */
diff --git a/lib/extensions/fpu/fpu.c b/lib/extensions/fpu/fpu.c
index b08e64c..34cbafb 100644
--- a/lib/extensions/fpu/fpu.c
+++ b/lib/extensions/fpu/fpu.c
@@ -3,6 +3,8 @@
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
+
+#include <arch_helpers.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
@@ -18,21 +20,9 @@
 #define read_simd_helper(num1, num2) "stp q"#num1", q"#num2",\
 	[%0], #"STR(2 * FPU_Q_SIZE)";"
 
-static fpu_reg_state_t g_fpu_read;
-
-static void read_fpu_state_registers(fpu_reg_state_t *fpu_template_in)
+/* Read FPU Q[0-31] and strore it in 'q_regs' */
+void fpu_q_regs_read(fpu_q_reg_t q_regs[FPU_Q_COUNT])
 {
-#ifdef __aarch64__
-
-	u_register_t fpsr;
-	u_register_t fpcr;
-
-	/* Read current FPCR FPSR and write to template. */
-	__asm__ volatile ("mrs %0, fpsr\n" : "=r" (fpsr));
-	__asm__ volatile ("mrs %0, fpcr\n" : "=r" (fpcr));
-	fpu_template_in->fpsr = fpsr;
-	fpu_template_in->fpcr = fpcr;
-
 	__asm__ volatile(
 			read_simd_helper(0, 1)
 			read_simd_helper(2, 3)
@@ -51,39 +41,12 @@
 			read_simd_helper(28, 29)
 			read_simd_helper(30, 31)
 			"sub %0, %0, #" STR(FPU_Q_COUNT * FPU_Q_SIZE) ";"
-			: : "r" (fpu_template_in->q));
-#endif
+			: : "r" (q_regs));
 }
 
-void fpu_state_fill_regs_and_template(fpu_reg_state_t *fpu_template_in)
+/* Write FPU Q[0-31] registers passed in 'q_regs' */
+static void fpu_q_regs_write(const fpu_q_reg_t q_regs[FPU_Q_COUNT])
 {
-	u_register_t fpsr;
-	u_register_t fpcr;
-	u_register_t temp;
-
-	temp = rand();
-	(void)memset((void *)fpu_template_in, 0, sizeof(fpu_reg_state_t));
-
-	/*
-	 * Write random value to FPCR FPSR.
-	 * Note write will be ignored for reserved bits.
-	 */
-	__asm__ volatile ("msr fpsr, %0\n" : : "r" (temp));
-	__asm__ volatile ("msr fpcr, %0\n" : : "r" (temp));
-
-	/*
-	 * Read back current FPCR FPSR and write to template,
-	 */
-	__asm__ volatile ("mrs %0, fpsr\n" : "=r" (fpsr));
-	__asm__ volatile ("mrs %0, fpcr\n" : "=r" (fpcr));
-	fpu_template_in->fpsr = fpsr;
-	fpu_template_in->fpcr = fpcr;
-
-	for (unsigned int num = 0U; num < FPU_Q_COUNT; num++) {
-		memset((uint8_t *)fpu_template_in->q[num], temp * (num + 1),
-				sizeof(fpu_template_in->q[0]));
-	}
-
 	__asm__ volatile(
 			fill_simd_helper(0, 1)
 			fill_simd_helper(2, 3)
@@ -102,35 +65,105 @@
 			fill_simd_helper(28, 29)
 			fill_simd_helper(30, 31)
 			"sub %0, %0, #" STR(FPU_Q_COUNT * FPU_Q_SIZE) ";"
-			: : "r" (fpu_template_in->q));
+			: : "r" (q_regs));
 }
 
-void fpu_state_print(fpu_reg_state_t *vec)
+/* Read FPCR and FPSR and store it in 'cs_regs' */
+void fpu_cs_regs_read(fpu_cs_regs_t *cs_regs)
 {
-	INFO("dumping FPU registers :\n");
+	cs_regs->fpcr = read_fpcr();
+	cs_regs->fpsr = read_fpsr();
+}
+
+/* Write FPCR and FPSR passed in 'cs_regs' */
+void fpu_cs_regs_write(const fpu_cs_regs_t *cs_regs)
+{
+	write_fpcr(cs_regs->fpcr);
+	write_fpsr(cs_regs->fpsr);
+}
+
+/*
+ * Generate random values and write it to 'q_regs', then write it to FPU Q
+ * registers.
+ */
+void fpu_q_regs_write_rand(fpu_q_reg_t q_regs[FPU_Q_COUNT])
+{
+	uint32_t rval;
+
+	rval = rand();
+
+	memset((void *)q_regs, 0, sizeof(fpu_q_reg_t) * FPU_Q_COUNT);
 	for (unsigned int num = 0U; num < FPU_Q_COUNT; num++) {
-		uint64_t __unused *qreg = (uint64_t *)&vec->q[num];
-
-		INFO("Q[%02u]=0x%016llx_%016llx\n", num, *qreg, *(qreg + 1));
+		memset((uint8_t *)q_regs[num], rval * (num + 1),
+		       sizeof(fpu_q_reg_t));
 	}
-	INFO("FPCR=0x%lx FPSR=0x%lx\n", vec->fpcr, vec->fpsr);
+	fpu_q_regs_write(q_regs);
 }
 
-bool fpu_state_compare_template(fpu_reg_state_t *fpu_template_in)
+/*
+ * Generate random values and write it to 'cs_regs', then write it to FPU FPCR
+ * and FPSR.
+ */
+void fpu_cs_regs_write_rand(fpu_cs_regs_t *cs_regs)
 {
-	(void)memset((void *)&g_fpu_read, 0, sizeof(fpu_reg_state_t));
-	read_fpu_state_registers(&g_fpu_read);
+	memset((void *)cs_regs, 0, sizeof(fpu_cs_regs_t));
 
-	if (memcmp((uint8_t *)fpu_template_in,
-			(uint8_t *)&g_fpu_read,
-			sizeof(fpu_reg_state_t)) != 0U) {
-		ERROR("%s failed\n", __func__);
-		ERROR("Read values\n");
-		fpu_state_print(&g_fpu_read);
-		ERROR("Template values\n");
-		fpu_state_print(fpu_template_in);
-		return false;
-	} else {
-		return true;
+	cs_regs->fpcr = rand();
+	cs_regs->fpsr = rand();
+
+	/*
+	 * Write random value to FPCR FPSR.
+	 * Note write will be ignored for reserved bits.
+	 */
+	fpu_cs_regs_write(cs_regs);
+
+	/* Read back current FPCR and FPSR */
+	fpu_cs_regs_read(cs_regs);
+}
+
+/*
+ * Generate random values and write it to 'fpu_state', then write it to FPU Q
+ * registers, FPCR and FPSR.
+ */
+void fpu_state_write_rand(fpu_state_t *fpu_state)
+{
+	fpu_q_regs_write_rand(fpu_state->q_regs);
+	fpu_cs_regs_write_rand(&fpu_state->cs_regs);
+}
+
+/* Read FPU Q registers, FPCR and FPSR write it to 'fpu_state' */
+void fpu_state_read(fpu_state_t *fpu_state)
+{
+	fpu_q_regs_read(fpu_state->q_regs);
+	fpu_cs_regs_read(&fpu_state->cs_regs);
+}
+
+/* Return zero if FPU Q registers 's1', 's2' matches else nonzero */
+int fpu_q_regs_compare(const fpu_q_reg_t s1[FPU_Q_COUNT],
+		       const fpu_q_reg_t s2[FPU_Q_COUNT])
+{
+	return memcmp(s1, s2, sizeof(fpu_q_reg_t) * FPU_Q_COUNT);
+}
+
+/*
+ * Return zero if FPU control and status registers 's1', 's2' matches else
+ * nonzero
+ */
+int fpu_cs_regs_compare(const fpu_cs_regs_t *s1, const fpu_cs_regs_t *s2)
+{
+	return memcmp(s1, s2, sizeof(fpu_cs_regs_t));
+}
+
+/* Returns 0, if FPU state 's1', 's2' matches else non-zero */
+int fpu_state_compare(const fpu_state_t *s1, const fpu_state_t *s2)
+{
+	if (fpu_q_regs_compare(s1->q_regs, s2->q_regs) != 0) {
+		return 1;
 	}
+
+	if (fpu_cs_regs_compare(&s1->cs_regs, &s2->cs_regs) != 0) {
+		return 1;
+	}
+
+	return 0;
 }
diff --git a/realm/realm_payload_main.c b/realm/realm_payload_main.c
index 94ef25e..4eea234 100644
--- a/realm/realm_payload_main.c
+++ b/realm/realm_payload_main.c
@@ -18,7 +18,9 @@
 #include <realm_tests.h>
 #include <tftf_lib.h>
 
-static fpu_reg_state_t fpu_temp_rl;
+static fpu_state_t rl_fpu_state_write;
+static fpu_state_t rl_fpu_state_read;
+
 /*
  * This function reads sleep time in ms from shared buffer and spins PE
  * in a loop for that time period.
@@ -98,11 +100,13 @@
 			test_succeed = test_pmuv3_overflow_interrupt();
 			break;
 		case REALM_REQ_FPU_FILL_CMD:
-			fpu_state_fill_regs_and_template(&fpu_temp_rl);
+			fpu_state_write_rand(&rl_fpu_state_write);
 			test_succeed = true;
 			break;
 		case REALM_REQ_FPU_CMP_CMD:
-			test_succeed = fpu_state_compare_template(&fpu_temp_rl);
+			fpu_state_read(&rl_fpu_state_read);
+			test_succeed = !fpu_state_compare(&rl_fpu_state_write,
+							  &rl_fpu_state_read);
 			break;
 		case REALM_SVE_RDVL:
 			test_succeed = test_realm_sve_rdvl();
diff --git a/spm/cactus/cactus_tests/cactus_test_cpu_features.c b/spm/cactus/cactus_tests/cactus_test_cpu_features.c
index 78b89ac..a1366d3 100644
--- a/spm/cactus/cactus_tests/cactus_test_cpu_features.c
+++ b/spm/cactus/cactus_tests/cactus_test_cpu_features.c
@@ -13,7 +13,8 @@
  * Note Test must exercise FILL and COMPARE command in
  * sequence and on same CPU.
  */
-static fpu_reg_state_t g_fpu_temp;
+static fpu_state_t sp_fpu_state_write;
+static fpu_state_t sp_fpu_state_read;
 static unsigned int core_pos;
 /*
  * Fill SIMD vectors from secure world side with a unique value.
@@ -21,7 +22,7 @@
 CACTUS_CMD_HANDLER(req_simd_fill, CACTUS_REQ_SIMD_FILL_CMD)
 {
 	core_pos = platform_get_core_pos(read_mpidr_el1());
-	fpu_state_fill_regs_and_template(&g_fpu_temp);
+	fpu_state_write_rand(&sp_fpu_state_write);
 	return cactus_response(ffa_dir_msg_dest(*args),
 			       ffa_dir_msg_source(*args),
 			       CACTUS_SUCCESS);
@@ -37,7 +38,11 @@
 
 	unsigned int core_pos1 = platform_get_core_pos(read_mpidr_el1());
 	if (core_pos1 == core_pos) {
-		test_succeed = fpu_state_compare_template(&g_fpu_temp);
+		fpu_state_read(&sp_fpu_state_read);
+		if (fpu_state_compare(&sp_fpu_state_write,
+				      &sp_fpu_state_read) == 0) {
+			test_succeed = true;
+		}
 	}
 	return cactus_response(ffa_dir_msg_dest(*args),
 			ffa_dir_msg_source(*args),
diff --git a/tftf/tests/runtime_services/realm_payload/host_realm_spm.c b/tftf/tests/runtime_services/realm_payload/host_realm_spm.c
index 64de113..dbc7102 100644
--- a/tftf/tests/runtime_services/realm_payload/host_realm_spm.c
+++ b/tftf/tests/runtime_services/realm_payload/host_realm_spm.c
@@ -22,7 +22,9 @@
 static const struct ffa_uuid expected_sp_uuids[] = { {PRIMARY_UUID} };
 static struct mailbox_buffers mb;
 static bool secure_mailbox_initialised;
-static fpu_reg_state_t fpu_temp_ns;
+
+static fpu_state_t ns_fpu_state_write;
+static fpu_state_t ns_fpu_state_read;
 
 typedef enum test_rl_sec_fp_cmd {
 	CMD_SIMD_NS_FILL = 0U,
@@ -307,7 +309,7 @@
 	 * Fill all 3 world's FPU/SIMD state regs with some known values in the
 	 * beginning to have something later to compare to.
 	 */
-	fpu_state_fill_regs_and_template(&fpu_temp_ns);
+	fpu_state_write_rand(&ns_fpu_state_write);
 	if (!fpu_fill_rl()) {
 		ERROR("fpu_fill_rl error\n");
 		goto destroy_realm;
@@ -322,12 +324,14 @@
 
 		switch (cmd) {
 		case CMD_SIMD_NS_FILL:
-			/* Non secure world fill FPU/SIMD state registers */
-			fpu_state_fill_regs_and_template(&fpu_temp_ns);
+			/* Non secure world fill FPU state registers */
+			fpu_state_write_rand(&ns_fpu_state_write);
 			break;
 		case CMD_SIMD_NS_CMP:
-			/* Normal world verify its FPU/SIMD state registers data */
-			if (!fpu_state_compare_template(&fpu_temp_ns)) {
+			/* Normal world verify its FPU state registers data */
+			fpu_state_read(&ns_fpu_state_read);
+			if (fpu_state_compare(&ns_fpu_state_write,
+					      &ns_fpu_state_read)) {
 				ERROR("%s failed %d\n", __func__, __LINE__);
 				goto destroy_realm;
 			}
diff --git a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
index 2e1f1d7..bc3bb52 100644
--- a/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
+++ b/tftf/tests/runtime_services/secure_service/test_spm_cpu_features.c
@@ -32,7 +32,8 @@
 static sve_z_regs_t sve_vectors_output;
 static int sve_op_1[NS_SVE_OP_ARRAYSIZE];
 static int sve_op_2[NS_SVE_OP_ARRAYSIZE];
-static fpu_reg_state_t g_fpu_template;
+static fpu_state_t g_fpu_state_write;
+static fpu_state_t g_fpu_state_read;
 
 /*
  * Tests that SIMD vectors and FPU state are preserved during the context switches between
@@ -48,7 +49,7 @@
 	 **********************************************************************/
 	CHECK_SPMC_TESTING_SETUP(1, 1, expected_sp_uuids);
 
-	fpu_state_fill_regs_and_template(&g_fpu_template);
+	fpu_state_write_rand(&g_fpu_state_write);
 	struct ffa_value ret = cactus_req_simd_fill_send_cmd(SENDER, RECEIVER);
 
 	if (!is_ffa_direct_response(ret)) {
@@ -68,9 +69,14 @@
 	if (cactus_get_response(ret) == CACTUS_ERROR) {
 		return TEST_RESULT_FAIL;
 	}
+
 	/* Normal world verify its FPU/SIMD state registers data */
-	return fpu_state_compare_template(&g_fpu_template) ? TEST_RESULT_SUCCESS :
-		TEST_RESULT_FAIL;
+	fpu_state_read(&g_fpu_state_read);
+	if (fpu_state_compare(&g_fpu_state_write, &g_fpu_state_read) != 0) {
+		return TEST_RESULT_FAIL;
+	}
+
+	return TEST_RESULT_SUCCESS;
 }
 
 /*