Refactor ARMv8.3 Pointer Authentication support code

This patch provides the following features and makes modifications
listed below:
- Individual APIAKey key generation for each CPU.
- New key generation on every BL31 warm boot and TSP CPU On event.
- Per-CPU storage of APIAKey added in percpu_data[]
  of cpu_data structure.
- `plat_init_apiakey()` function replaced with `plat_init_apkey()`
  which returns 128-bit value and uses Generic timer physical counter
  value to increase the randomness of the generated key.
  The new function can be used for generation of all ARMv8.3-PAuth keys
- ARMv8.3-PAuth specific code placed in `lib\extensions\pauth`.
- New `pauth_init_enable_el1()` and `pauth_init_enable_el3()` functions
  generate, program and enable APIAKey_EL1 for EL1 and EL3 respectively;
  pauth_disable_el1()` and `pauth_disable_el3()` functions disable
  PAuth for EL1 and EL3 respectively;
  `pauth_load_bl31_apiakey()` loads saved per-CPU APIAKey_EL1 from
  cpu-data structure.
- Combined `save_gp_pauth_registers()` function replaces calls to
  `save_gp_registers()` and `pauth_context_save()`;
  `restore_gp_pauth_registers()` replaces `pauth_context_restore()`
  and `restore_gp_registers()` calls.
- `restore_gp_registers_eret()` function removed with corresponding
  code placed in `el3_exit()`.
- Fixed the issue when `pauth_t pauth_ctx` structure allocated space
  for 12 uint64_t PAuth registers instead of 10 by removal of macro
  CTX_PACGAKEY_END from `include/lib/el3_runtime/aarch64/context.h`
  and assigning its value to CTX_PAUTH_REGS_END.
- Use of MODE_SP_ELX and MODE_SP_EL0 macro definitions
  in `msr	spsel`  instruction instead of hard-coded values.
- Changes in documentation related to ARMv8.3-PAuth and ARMv8.5-BTI.

Change-Id: Id18b81cc46f52a783a7e6a09b9f149b6ce803211
Signed-off-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
diff --git a/include/lib/el3_runtime/cpu_data.h b/include/lib/el3_runtime/cpu_data.h
index 55db4cf..5426135 100644
--- a/include/lib/el3_runtime/cpu_data.h
+++ b/include/lib/el3_runtime/cpu_data.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -11,23 +11,37 @@
 
 #include <bl31/ehf.h>
 
+/* Size of psci_cpu_data structure */
+#define PSCI_CPU_DATA_SIZE		12
+
 #ifdef __aarch64__
 
-/* Offsets for the cpu_data structure */
-#define CPU_DATA_CRASH_BUF_OFFSET	0x18
-/* need enough space in crash buffer to save 8 registers */
-#define CPU_DATA_CRASH_BUF_SIZE		64
+/* 8-bytes aligned size of psci_cpu_data structure */
+#define PSCI_CPU_DATA_SIZE_ALIGNED	((PSCI_CPU_DATA_SIZE + 7) & ~7)
+
+/* Offset of cpu_ops_ptr, size 8 bytes */
 #define CPU_DATA_CPU_OPS_PTR		0x10
 
-#else /* __aarch64__ */
+#if ENABLE_PAUTH
+/* 8-bytes aligned offset of apiakey[2], size 16 bytes */
+#define	CPU_DATA_APIAKEY_OFFSET		(0x18 + PSCI_CPU_DATA_SIZE_ALIGNED)
+#define CPU_DATA_CRASH_BUF_OFFSET	(CPU_DATA_APIAKEY_OFFSET + 0x10)
+#else
+#define CPU_DATA_CRASH_BUF_OFFSET	(0x18 + PSCI_CPU_DATA_SIZE_ALIGNED)
+#endif	/* ENABLE_PAUTH */
+
+/* need enough space in crash buffer to save 8 registers */
+#define CPU_DATA_CRASH_BUF_SIZE		64
+
+#else	/* !__aarch64__ */
 
 #if CRASH_REPORTING
 #error "Crash reporting is not supported in AArch32"
 #endif
 #define CPU_DATA_CPU_OPS_PTR		0x0
-#define CPU_DATA_CRASH_BUF_OFFSET	0x4
+#define CPU_DATA_CRASH_BUF_OFFSET	(0x4 + PSCI_CPU_DATA_SIZE)
 
-#endif /* __aarch64__ */
+#endif	/* __aarch64__ */
 
 #if CRASH_REPORTING
 #define CPU_DATA_CRASH_BUF_END		(CPU_DATA_CRASH_BUF_OFFSET + \
@@ -88,13 +102,16 @@
 	void *cpu_context[2];
 #endif
 	uintptr_t cpu_ops_ptr;
+	struct psci_cpu_data psci_svc_cpu_data;
+#if ENABLE_PAUTH
+	uint64_t apiakey[2];
+#endif
 #if CRASH_REPORTING
 	u_register_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
 #endif
 #if ENABLE_RUNTIME_INSTRUMENTATION
 	uint64_t cpu_data_pmf_ts[CPU_DATA_PMF_TS_COUNT];
 #endif
-	struct psci_cpu_data psci_svc_cpu_data;
 #if PLAT_PCPU_DATA_SIZE
 	uint8_t platform_cpu_data[PLAT_PCPU_DATA_SIZE];
 #endif
@@ -105,6 +122,12 @@
 
 extern cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
 
+#if ENABLE_PAUTH
+CASSERT(CPU_DATA_APIAKEY_OFFSET == __builtin_offsetof
+	(cpu_data_t, apiakey),
+	assert_cpu_data_crash_stack_offset_mismatch);
+#endif
+
 #if CRASH_REPORTING
 /* verify assembler offsets match data structures */
 CASSERT(CPU_DATA_CRASH_BUF_OFFSET == __builtin_offsetof