Merge changes from topic "advk-serror" into integration

* changes:
  fix(plat/marvell/a3k): disable HANDLE_EA_EL3_FIRST by default
  fix(plat/marvell/a3k): update information about PCIe abort hack
diff --git a/docs/plat/marvell/armada/build.rst b/docs/plat/marvell/armada/build.rst
index 8af27b1..b125144 100644
--- a/docs/plat/marvell/armada/build.rst
+++ b/docs/plat/marvell/armada/build.rst
@@ -153,6 +153,29 @@
 
 Armada37x0 specific build options:
 
+- HANDLE_EA_EL3_FIRST
+
+        When ``HANDLE_EA_EL3_FIRST=1``, External Aborts and SError Interrupts will be always trapped
+        in TF-A. TF-A in this case enables dirty hack / workaround for a bug found in U-Boot and
+        Linux kernel PCIe controller driver pci-aardvark.c, traps and then masks SError interrupt
+        caused by AXI SLVERR on external access (syndrome 0xbf000002).
+
+        Otherwise when ``HANDLE_EA_EL3_FIRST=0``, these exceptions will be trapped in the current
+        exception level (or in EL1 if the current exception level is EL0). So exceptions caused by
+        U-Boot will be trapped in U-Boot, exceptions caused by Linux kernel (or user applications)
+        will be trapped in Linux kernel.
+
+        Mentioned bug in pci-aardvark.c driver is fixed in U-Boot version v2021.07 and Linux kernel
+        version v5.13 (workarounded since Linux kernel version 5.9) and also backported in Linux
+        kernel stable releases since versions v5.12.13, v5.10.46, v5.4.128, v4.19.198, v4.14.240.
+
+        If target system has already patched version of U-Boot and Linux kernel then it is strongly
+        recommended to not enable this workaround as it disallows propagating of all External Aborts
+        to running Linux kernel and makes correctable errors as fatal aborts.
+
+        This option is now disabled by default. In past this option was enabled by default in
+        TF-A versions v2.2, v2.3, v2.4 and v2.5.
+
 - CM3_SYSTEM_RESET
 
         When ``CM3_SYSTEM_RESET=1``, the Cortex-M3 secure coprocessor will be used for system reset.
diff --git a/plat/marvell/armada/a3k/common/a3700_common.mk b/plat/marvell/armada/a3k/common/a3700_common.mk
index 0a89742..238587d 100644
--- a/plat/marvell/armada/a3k/common/a3700_common.mk
+++ b/plat/marvell/armada/a3k/common/a3700_common.mk
@@ -13,7 +13,6 @@
 PLAT_COMMON_BASE		:= $(PLAT_FAMILY_BASE)/common
 MARVELL_DRV_BASE		:= drivers/marvell
 MARVELL_COMMON_BASE		:= $(MARVELL_PLAT_BASE)/common
-HANDLE_EA_EL3_FIRST		:= 1
 
 include plat/marvell/marvell.mk
 
@@ -53,7 +52,6 @@
 				$(PLAT_COMMON_BASE)/dram_win.c		\
 				$(PLAT_COMMON_BASE)/io_addr_dec.c	\
 				$(PLAT_COMMON_BASE)/marvell_plat_config.c     \
-				$(PLAT_COMMON_BASE)/a3700_ea.c		\
 				$(PLAT_FAMILY_BASE)/$(PLAT)/plat_bl31_setup.c \
 				$(MARVELL_COMMON_BASE)/marvell_cci.c	\
 				$(MARVELL_COMMON_BASE)/marvell_ddr_info.c	\
@@ -63,6 +61,10 @@
 				$(PLAT_COMMON_BASE)/a3700_sip_svc.c	\
 				$(MARVELL_DRV)
 
+ifeq ($(HANDLE_EA_EL3_FIRST),1)
+BL31_SOURCES		+=	$(PLAT_COMMON_BASE)/a3700_ea.c
+endif
+
 ifeq ($(CM3_SYSTEM_RESET),1)
 BL31_SOURCES		+=	$(PLAT_COMMON_BASE)/cm3_system_reset.c
 endif
diff --git a/plat/marvell/armada/a3k/common/a3700_ea.c b/plat/marvell/armada/a3k/common/a3700_ea.c
index 3a4f720..4a58fc6 100644
--- a/plat/marvell/armada/a3k/common/a3700_ea.c
+++ b/plat/marvell/armada/a3k/common/a3700_ea.c
@@ -8,14 +8,79 @@
 #include <common/debug.h>
 #include <arch_helpers.h>
 #include <plat/common/platform.h>
+#include <bl31/ea_handle.h>
 
-#define ADVK_SERROR_SYNDROME 0xbf000002
+#define A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS 0xbf000002
 
+#if !ENABLE_BACKTRACE
+static const char *get_el_str(unsigned int el)
+{
+	if (el == MODE_EL3) {
+		return "EL3";
+	} else if (el == MODE_EL2) {
+		return "EL2";
+	}
+	return "S-EL1";
+}
+#endif /* !ENABLE_BACKTRACE */
+
+/*
+ * This source file with custom plat_ea_handler function is compiled only when
+ * building TF-A with compile option HANDLE_EA_EL3_FIRST=1
+ */
 void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie,
 		void *handle, uint64_t flags)
 {
-	if (syndrome == ADVK_SERROR_SYNDROME)
+	unsigned int level = (unsigned int)GET_EL(read_spsr_el3());
+
+	/*
+	 * Asynchronous External Abort with syndrome 0xbf000002 on Cortex A53
+	 * core means SError interrupt caused by AXI SLVERR on external access.
+	 *
+	 * In most cases this indicates a bug in U-Boot or Linux kernel driver
+	 * pci-aardvark.c which implements access to A3700 PCIe config space.
+	 * Driver does not wait for PCIe PIO transfer completion and try to
+	 * start a new PCIe PIO transfer while previous has not finished yet.
+	 * A3700 PCIe controller in this case sends SLVERR via AXI which results
+	 * in a fatal Asynchronous SError interrupt on Cortex A53 CPU.
+	 *
+	 * Following patches fix that bug in U-Boot and Linux kernel drivers:
+	 * https://source.denx.de/u-boot/u-boot/-/commit/eccbd4ad8e4e182638eafbfb87ac139c04f24a01
+	 * https://git.kernel.org/stable/c/f18139966d072dab8e4398c95ce955a9742e04f7
+	 *
+	 * As a hacky workaround for unpatched U-Boot and Linux kernel drivers
+	 * ignore all asynchronous aborts with that syndrome value received on
+	 * CPU from level lower than EL3.
+	 *
+	 * Because these aborts are delivered on CPU asynchronously, they are
+	 * imprecise and we cannot check the real reason of abort and neither
+	 * who and why sent this abort. We expect that on A3700 it is always
+	 * PCIe controller.
+	 *
+	 * Hence ignoring all aborts with this syndrome value is just a giant
+	 * hack that we need only because of bugs in old U-Boot and Linux kernel
+	 * versions and because it was decided that TF-A would implement this
+	 * hack for U-Boot and Linux kernel it in this way. New patched U-Boot
+	 * and kernel versions do not need it anymore.
+	 *
+	 * Links to discussion about this workaround:
+	 * https://lore.kernel.org/linux-pci/20190316161243.29517-1-repk@triplefau.lt/
+	 * https://lore.kernel.org/linux-pci/971be151d24312cc533989a64bd454b4@www.loen.fr/
+	 * https://review.trustedfirmware.org/c/TF-A/trusted-firmware-a/+/1541
+	 */
+	if (level < MODE_EL3 && ea_reason == ERROR_EA_ASYNC &&
+	    syndrome == A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS) {
+		ERROR_NL();
+		ERROR("Ignoring Asynchronous External Abort with"
+		     " syndrome 0x%llx received on 0x%lx from %s\n",
+		     syndrome, read_mpidr_el1(), get_el_str(level));
+		ERROR("SError interrupt: AXI SLVERR on external access\n");
+		ERROR("This indicates a bug in pci-aardvark.c driver\n");
+		ERROR("Please update U-Boot/Linux to the latest version\n");
+		ERROR_NL();
+		console_flush();
 		return;
+	}
 
 	plat_default_ea_handler(ea_reason, syndrome, cookie, handle, flags);
 }