aarch64: Enable Statistical Profiling Extensions for lower ELs

SPE is only supported in non-secure state.  Accesses to SPE specific
registers from SEL1 will trap to EL3.  During a world switch, before
`TTBR` is modified the SPE profiling buffers are drained.  This is to
avoid a potential invalid memory access in SEL1.

SPE is architecturally specified only for AArch64.

Change-Id: I04a96427d9f9d586c331913d815fdc726855f6b0
Signed-off-by: dp-arm <dimitris.papastamos@arm.com>
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index afe912a..8a6c11b 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -9,6 +9,7 @@
 #include <context.h>
 
 	.global	el1_sysregs_context_save
+	.global el1_sysregs_context_save_post_ops
 	.global	el1_sysregs_context_restore
 #if CTX_INCLUDE_FPREGS
 	.global	fpregs_context_save
@@ -111,6 +112,36 @@
 /* -----------------------------------------------------
  * The following function strictly follows the AArch64
  * PCS to use x9-x17 (temporary caller-saved registers)
+ * to do post operations after saving the EL1 system
+ * register context.
+ * -----------------------------------------------------
+ */
+func el1_sysregs_context_save_post_ops
+#if ENABLE_SPE_FOR_LOWER_ELS
+	/* Detect if SPE is implemented */
+	mrs	x9, id_aa64dfr0_el1
+	ubfx	x9, x9, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
+	cmp	x9, #0x1
+	b.ne	1f
+
+	/*
+	 * Before switching from normal world to secure world
+	 * the profiling buffers need to be drained out to memory.  This is
+	 * required to avoid an invalid memory access when TTBR is switched
+	 * for entry to SEL1.
+	 */
+	.arch	armv8.2-a+profile
+	psb	csync
+	dsb	nsh
+	.arch	armv8-a
+1:
+#endif
+	ret
+endfunc el1_sysregs_context_save_post_ops
+
+/* -----------------------------------------------------
+ * The following function strictly follows the AArch64
+ * PCS to use x9-x17 (temporary caller-saved registers)
  * to restore EL1 system register context.  It assumes
  * that 'x0' is pointing to a 'el1_sys_regs' structure
  * from where the register context will be restored
@@ -343,7 +374,7 @@
 	ldp	x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24]
 	ldp	x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
 	ldp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
-	ldp	 x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+	ldp	x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
 	msr	sp_el0, x17
 	ldp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
 	eret