Add support for Branch Target Identification

This patch adds the functionality needed for platforms to provide
Branch Target Identification (BTI) extension, introduced to AArch64
in Armv8.5-A by adding BTI instruction used to mark valid targets
for indirect branches. The patch sets new GP bit [50] to the stage 1
Translation Table Block and Page entries to denote guarded EL3 code
pages which will cause processor to trap instructions in protected
pages trying to perform an indirect branch to any instruction other
than BTI.
BTI feature is selected by BRANCH_PROTECTION option which supersedes
the previous ENABLE_PAUTH used for Armv8.3-A Pointer Authentication
and is disabled by default. Enabling BTI requires compiler support
and was tested with GCC versions 9.0.0, 9.0.1 and 10.0.0.
The assembly macros and helpers are modified to accommodate the BTI
instruction.
This is an experimental feature.
Note. The previous ENABLE_PAUTH build option to enable PAuth in EL3
is now made as an internal flag and BRANCH_PROTECTION flag should be
used instead to enable Pointer Authentication.
Note. USE_LIBROM=1 option is currently not supported.

Change-Id: Ifaf4438609b16647dc79468b70cd1f47a623362e
Signed-off-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
diff --git a/lib/cpus/aarch64/cpuamu_helpers.S b/lib/cpus/aarch64/cpuamu_helpers.S
index 79b7288..5a77fc7 100644
--- a/lib/cpus/aarch64/cpuamu_helpers.S
+++ b/lib/cpus/aarch64/cpuamu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2018-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -23,21 +23,17 @@
  */
 func cpuamu_cnt_read
 	adr	x1, 1f
-	lsl	x0, x0, #3
-	add	x1, x1, x0
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x1
 
-1:
-	mrs	x0, CPUAMEVCNTR0_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR1_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR2_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR3_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR4_EL0
-	ret
+1:	read	CPUAMEVCNTR0_EL0
+	read	CPUAMEVCNTR1_EL0
+	read	CPUAMEVCNTR2_EL0
+	read	CPUAMEVCNTR3_EL0
+	read	CPUAMEVCNTR4_EL0
 endfunc cpuamu_cnt_read
 
 /*
@@ -47,21 +43,17 @@
  */
 func cpuamu_cnt_write
 	adr	x2, 1f
-	lsl	x0, x0, #3
-	add	x2, x2, x0
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x2
 
-1:
-	msr	CPUAMEVCNTR0_EL0, x0
-	ret
-	msr	CPUAMEVCNTR1_EL0, x0
-	ret
-	msr	CPUAMEVCNTR2_EL0, x0
-	ret
-	msr	CPUAMEVCNTR3_EL0, x0
-	ret
-	msr	CPUAMEVCNTR4_EL0, x0
-	ret
+1:	write	CPUAMEVCNTR0_EL0
+	write	CPUAMEVCNTR1_EL0
+	write	CPUAMEVCNTR2_EL0
+	write	CPUAMEVCNTR3_EL0
+	write	CPUAMEVCNTR4_EL0
 endfunc cpuamu_cnt_write
 
 /*