TFTF: Add ARMv8.5 BTI support in assembler files

This patch adds BTI support in assembler files
which fully correspond to those in TF-A source tree.

Signed-off-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
Change-Id: Ie6a7b248c967684c6b2b86b915f0499fe095bba3
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 9c40b9d..de9c8e4 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -30,7 +30,7 @@
 	dc	\op, x0
 	add	x0, x0, x2
 	cmp	x0, x1
-	b.lo    loop_\op
+	b.lo	loop_\op
 	dsb	sy
 exit_loop_\op:
 	ret
@@ -91,6 +91,9 @@
 	cbz	x3, exit
 	adr	x14, dcsw_loop_table	// compute inner loop address
 	add	x14, x14, x0, lsl #5	// inner loop is 8x32-bit instructions
+#if ENABLE_BTI
+	add	x14, x14, x0, lsl #2	// inner loop is + "bti j" instruction
+#endif
 	mov	x0, x9
 	mov	w8, #1
 loop1:
@@ -116,6 +119,9 @@
 	br	x14			// jump to DC operation specific loop
 
 	.macro	dcsw_loop _op
+#if ENABLE_BTI
+	bti	j
+#endif
 loop2_\_op:
 	lsl	w7, w6, w2		// w7 = aligned max set number
 
@@ -134,7 +140,7 @@
 level_done:
 	add	x10, x10, #2		// increment cache number
 	cmp	x3, x10
-	b.hi    loop1
+	b.hi	loop1
 	msr	csselr_el1, xzr		// select cache level 0 in csselr
 	dsb	sy			// barrier to complete final cache operation
 	isb
diff --git a/lib/extensions/amu/aarch64/amu_helpers.S b/lib/extensions/amu/aarch64/amu_helpers.S
index 862a713..061f3fd 100644
--- a/lib/extensions/amu/aarch64/amu_helpers.S
+++ b/lib/extensions/amu/aarch64/amu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Arm Limited. All rights reserved.
+ * Copyright (c) 2017-2020, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -18,35 +18,29 @@
  * and return it in `x0`.
  */
 func amu_group0_cnt_read_internal
+	adr	x1, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x1, x0
-	lsr	x1, x1, #2
-	cmp	x1, #0
+	tst	x0, #~3
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of mrs/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x1, 1f
-	lsl	x0, x0, #3		/* each mrs/ret sequence is 8 bytes */
-	add	x1, x1, x0
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x1
 
-1:
-	mrs	x0, AMEVCNTR00_EL0	/* index 0 */
-	ret
-	mrs	x0, AMEVCNTR01_EL0	/* index 1 */
-	ret
-	mrs	x0, AMEVCNTR02_EL0	/* index 2 */
-	ret
-	mrs	x0, AMEVCNTR03_EL0	/* index 3 */
-	ret
+1:	read	AMEVCNTR00_EL0		/* index 0 */
+	read	AMEVCNTR01_EL0		/* index 1 */
+	read	AMEVCNTR02_EL0		/* index 2 */
+	read	AMEVCNTR03_EL0		/* index 3 */
 endfunc amu_group0_cnt_read_internal
 
 /*
@@ -56,57 +50,39 @@
  * and return it in `x0`.
  */
 func amu_group1_cnt_read_internal
+	adr	x1, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x1, x0
-	lsr	x1, x1, #4
-	cmp	x1, #0
+	tst	x0, #~0xF
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of mrs/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x1, 1f
-	lsl	x0, x0, #3		/* each mrs/ret sequence is 8 bytes */
-	add	x1, x1, x0
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x1
 
-1:
-	mrs	x0, AMEVCNTR10_EL0	/* index 0 */
-	ret
-	mrs	x0, AMEVCNTR11_EL0	/* index 1 */
-	ret
-	mrs	x0, AMEVCNTR12_EL0	/* index 2 */
-	ret
-	mrs	x0, AMEVCNTR13_EL0	/* index 3 */
-	ret
-	mrs	x0, AMEVCNTR14_EL0	/* index 4 */
-	ret
-	mrs	x0, AMEVCNTR15_EL0	/* index 5 */
-	ret
-	mrs	x0, AMEVCNTR16_EL0	/* index 6 */
-	ret
-	mrs	x0, AMEVCNTR17_EL0	/* index 7 */
-	ret
-	mrs	x0, AMEVCNTR18_EL0	/* index 8 */
-	ret
-	mrs	x0, AMEVCNTR19_EL0	/* index 9 */
-	ret
-	mrs	x0, AMEVCNTR1A_EL0	/* index 10 */
-	ret
-	mrs	x0, AMEVCNTR1B_EL0	/* index 11 */
-	ret
-	mrs	x0, AMEVCNTR1C_EL0	/* index 12 */
-	ret
-	mrs	x0, AMEVCNTR1D_EL0	/* index 13 */
-	ret
-	mrs	x0, AMEVCNTR1E_EL0	/* index 14 */
-	ret
-	mrs	x0, AMEVCNTR1F_EL0	/* index 15 */
-	ret
+1:	read	AMEVCNTR10_EL0		/* index 0 */
+	read	AMEVCNTR11_EL0		/* index 1 */
+	read	AMEVCNTR12_EL0		/* index 2 */
+	read	AMEVCNTR13_EL0		/* index 3 */
+	read	AMEVCNTR14_EL0		/* index 4 */
+	read	AMEVCNTR15_EL0		/* index 5 */
+	read	AMEVCNTR16_EL0		/* index 6 */
+	read	AMEVCNTR17_EL0		/* index 7 */
+	read	AMEVCNTR18_EL0		/* index 8 */
+	read	AMEVCNTR19_EL0		/* index 9 */
+	read	AMEVCNTR1A_EL0		/* index 10 */
+	read	AMEVCNTR1B_EL0		/* index 11 */
+	read	AMEVCNTR1C_EL0		/* index 12 */
+	read	AMEVCNTR1D_EL0		/* index 13 */
+	read	AMEVCNTR1E_EL0		/* index 14 */
+	read	AMEVCNTR1F_EL0		/* index 15 */
 endfunc amu_group1_cnt_read_internal