Merge "feat(stm32mp15): remove OP-TEE shared mem" into integration
diff --git a/Makefile b/Makefile
index 0ae3c10..d73fc87 100644
--- a/Makefile
+++ b/Makefile
@@ -1342,6 +1342,8 @@
 	PSCI_EXTENDED_STATE_ID \
 	PSCI_OS_INIT_MODE \
 	RESET_TO_BL31 \
+	RME_GPT_BITLOCK_BLOCK \
+	RME_GPT_MAX_BLOCK \
 	SEPARATE_CODE_AND_RODATA \
 	SEPARATE_BL2_NOLOAD_REGION \
 	SEPARATE_NOBITS_REGION \
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 2f2116f..52a9317 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -805,6 +805,21 @@
    instead of the BL1 entrypoint. It can take the value 0 (CPU reset to BL1
    entrypoint) or 1 (CPU reset to SP_MIN entrypoint). The default value is 0.
 
+-  ``RME_GPT_BITLOCK_BLOCK``: This defines the block size (in number of 512MB
+-  blocks) covered by a single bit of the bitlock structure during RME GPT
+-  operations. The lower the block size, the better opportunity for
+-  parallelising GPT operations but at the cost of more bits being needed
+-  for the bitlock structure. This numeric parameter can take the values
+-  from 0 to 512 and must be a power of 2. The value of 0 is special and
+-  and it chooses a single spinlock for all GPT L1 table entries. Default
+-  value is 1 which corresponds to block size of 512MB per bit of bitlock
+-  structure.
+
+-  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define the maximum size of
+   supported contiguous blocks in GPT Library. This parameter can take the
+   values 0, 2, 32 and 512. Setting this value to 0 disables use of Contigious
+   descriptors. Default value is 2.
+
 -  ``ROT_KEY``: This option is used when ``GENERATE_COT=1``. It specifies a
    file that contains the ROT private key in PEM format or a PKCS11 URI and
    enforces public key hash generation. If ``SAVE_KEYS=1``, only a file is
diff --git a/fdts/tc-base.dtsi b/fdts/tc-base.dtsi
index e32d215..494c825 100644
--- a/fdts/tc-base.dtsi
+++ b/fdts/tc-base.dtsi
@@ -278,37 +278,35 @@
 		#size-cells = <1>;
 		ranges = <0 0x0 PLAT_ARM_NSRAM_BASE PLAT_ARM_NSRAM_SIZE>;
 
-		cpu_scp_scmi_mem: scp-shmem@0 {
+		cpu_scp_scmi_a2p: scp-shmem@0 {
 			compatible = "arm,scmi-shmem";
 			reg = <0x0 0x80>;
 		};
 	};
 
 	mbox_db_rx: mhu@MHU_RX_ADDR {
-		compatible = "arm,mhuv2-rx","arm,primecell";
-		reg = <0x0 ADDRESSIFY(MHU_RX_ADDR) 0x0 0x1000>;
+		compatible = MHU_RX_COMPAT;
+		reg = <0x0 ADDRESSIFY(MHU_RX_ADDR) 0x0 MHU_OFFSET>;
 		clocks = <&soc_refclk>;
 		clock-names = "apb_pclk";
-		#mbox-cells = <2>;
-		interrupts = <GIC_SPI INT_MBOX_RX IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "mhu_rx";
+		#mbox-cells = <MHU_MBOX_CELLS>;
+		interrupts = <GIC_SPI MHU_RX_INT_NUM IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = MHU_RX_INT_NAME;
 	};
 
 	mbox_db_tx: mhu@MHU_TX_ADDR {
-		compatible = "arm,mhuv2-tx","arm,primecell";
-		reg = <0x0 ADDRESSIFY(MHU_TX_ADDR) 0x0 0x1000>;
+		compatible = MHU_TX_COMPAT;
+		reg = <0x0 ADDRESSIFY(MHU_TX_ADDR) 0x0 MHU_OFFSET>;
 		clocks = <&soc_refclk>;
 		clock-names = "apb_pclk";
-		#mbox-cells = <2>;
-		interrupt-names = "mhu_tx";
+		#mbox-cells = <MHU_MBOX_CELLS>;
+		interrupt-names = MHU_TX_INT_NAME;
 	};
 
 	firmware {
 		scmi {
 			compatible = "arm,scmi";
 			mbox-names = "tx", "rx";
-			mboxes = <&mbox_db_tx 0 0 &mbox_db_rx 0 0 >;
-			shmem = <&cpu_scp_scmi_mem &cpu_scp_scmi_mem>;
 			#address-cells = <1>;
 			#size-cells = <0>;
 
@@ -487,6 +485,14 @@
 #if TC_IOMMU_EN
 		iommus = <&smmu_700 0x200>;
 #endif /* TC_IOMMU_EN */
+		pbha {
+			int-id-override = <0 0x22>, <2 0x23>, <4 0x23>, <7 0x22>,
+					  <8 0x22>, <9 0x22>, <10 0x22>, <11 0x22>,
+					  <12 0x22>, <13 0x22>, <16 0x22>, <17 0x32>,
+					  <18 0x32>, <19 0x22>, <20 0x22>, <21 0x32>,
+					  <22 0x32>, <24 0x22>, <28 0x32>;
+			propagate-bits = <0x0f>;
+		};
 	};
 
 	power_model_simple {
diff --git a/fdts/tc-fvp.dtsi b/fdts/tc-fvp.dtsi
index 46b0e81..9f3a9ac 100644
--- a/fdts/tc-fvp.dtsi
+++ b/fdts/tc-fvp.dtsi
@@ -7,6 +7,23 @@
 #define GIC_CTRL_ADDR		2c010000
 #define GIC_GICR_OFFSET		0x200000
 #define UART_OFFSET		0x1000
+
+#ifdef TC_RESOLUTION_1920X1080P60
+
+#define VENCODER_TIMING_CLK 148500000
+#define VENCODER_TIMING								\
+	clock-frequency = <VENCODER_TIMING_CLK>;				\
+	hactive = <1920>;							\
+	vactive = <1080>;							\
+	hfront-porch = <88>;							\
+	hback-porch = <148>;							\
+	hsync-len = <44>;							\
+	vfront-porch = <4>;							\
+	vback-porch = <36>;							\
+	vsync-len = <5>
+
+#else /* TC_RESOLUTION_640X480P60 */
+
 #define VENCODER_TIMING_CLK 25175000
 #define VENCODER_TIMING								\
 	clock-frequency = <VENCODER_TIMING_CLK>;				\
@@ -19,6 +36,8 @@
 	vback-porch = <33>;							\
 	vsync-len = <2>
 
+#endif
+
 / {
 	chosen {
 		stdout-path = "serial0:115200n8";
diff --git a/fdts/tc2.dts b/fdts/tc2.dts
index 288b40f..69c6886 100644
--- a/fdts/tc2.dts
+++ b/fdts/tc2.dts
@@ -20,9 +20,17 @@
 #endif /* TARGET_FLAVOUR_FPGA */
 #define BIG_CAPACITY			1024
 
-#define INT_MBOX_RX			317
 #define MHU_TX_ADDR			45000000 /* hex */
+#define MHU_TX_COMPAT			"arm,mhuv2-tx","arm,primecell"
+#define MHU_TX_INT_NAME			"mhu_tx"
+
 #define MHU_RX_ADDR			45010000 /* hex */
+#define MHU_RX_COMPAT			"arm,mhuv2-rx","arm,primecell"
+#define MHU_OFFSET			0x1000
+#define MHU_MBOX_CELLS			2
+#define MHU_RX_INT_NUM			317
+#define MHU_RX_INT_NAME			"mhu_rx"
+
 #define MPAM_ADDR			0x1 0x00010000 /* 0x1_0001_0000 */
 #define UARTCLK_FREQ			5000000
 
@@ -211,6 +219,24 @@
 		arm,mhuv2-protocols = <0 1>;
 	};
 
+	firmware {
+		/*
+		 * TC2 does not have a P2A channel, but wiring one was needed to make Linux work
+		 * (by chance). At the time the SCMI driver did not support bidirectional
+		 * mailboxes so as a workaround, the A2P channel was wired for TX communication
+		 * and the synchronous replies would be read asyncrhonously as if coming from
+		 * the P2A channel, while being the actual A2P channel.
+		 *
+		 * This will not work with kernels > 5.15, but keep it around to keep TC2
+		 * working with its target kernel. Newer kernels will still work, but SCMI
+		 * won't as they check that the two regions are distinct.
+		 */
+		scmi {
+			mboxes = <&mbox_db_tx 0 0 &mbox_db_rx 0 0>;
+			shmem = <&cpu_scp_scmi_a2p &cpu_scp_scmi_a2p>;
+		};
+	};
+
 	dp0: display@DPU_ADDR {
 #if TC_SCMI_PD_CTRL_EN
 		power-domains = <&scmi_devpd (PLAT_MAX_CPUS_PER_CLUSTER + 2)>;
diff --git a/fdts/tc3.dts b/fdts/tc3.dts
index 52b0856..fe6a695 100644
--- a/fdts/tc3.dts
+++ b/fdts/tc3.dts
@@ -14,9 +14,17 @@
 #define MID_CAPACITY			686
 #define BIG_CAPACITY			1024
 
-#define INT_MBOX_RX			300
 #define MHU_TX_ADDR			46040000 /* hex */
+#define MHU_TX_COMPAT			"arm,mhuv3"
+#define MHU_TX_INT_NAME			""
+
 #define MHU_RX_ADDR			46140000 /* hex */
+#define MHU_RX_COMPAT			"arm,mhuv3"
+#define MHU_OFFSET			0x10000
+#define MHU_MBOX_CELLS			3
+#define MHU_RX_INT_NUM			300
+#define MHU_RX_INT_NAME			"combined-mbx"
+
 #define MPAM_ADDR			0x0 0x5f010000 /* 0x5f01_0000 */
 #define UARTCLK_FREQ			3750000
 
@@ -63,4 +71,18 @@
 		interrupt-affinity = <&CPU0>,  <&CPU1>,  <&CPU2>,  <&CPU3>,
 				     <&CPU4>,  <&CPU5>,  <&CPU6>,  <&CPU7>;
 	};
+
+	sram: sram@6000000 {
+		cpu_scp_scmi_p2a: scp-shmem@80 {
+			compatible = "arm,scmi-shmem";
+			reg = <0x80 0x80>;
+		};
+	};
+
+	firmware {
+		scmi {
+			mboxes = <&mbox_db_tx 0 0 0 &mbox_db_rx 0 0 0 &mbox_db_rx 0 0 1>;
+			shmem = <&cpu_scp_scmi_a2p &cpu_scp_scmi_p2a>;
+		};
+	};
 };
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 59adc7c..1e2f84b 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -807,15 +807,6 @@
 	TLBIRPALOS(addr, TLBI_SZ_512M);
 }
 
-/*
- * Invalidate TLBs of GPT entries by Physical address, last level.
- *
- * @pa: the starting address for the range
- *      of invalidation
- * @size: size of the range of invalidation
- */
-void gpt_tlbi_by_pa_ll(uint64_t pa, size_t size);
-
 /* Previously defined accessor functions with incomplete register names  */
 
 #define read_current_el()	read_CurrentEl()
diff --git a/include/drivers/arm/css/css_mhu_doorbell.h b/include/drivers/arm/css/css_mhu_doorbell.h
index 88302fd..d6c1a2a 100644
--- a/include/drivers/arm/css/css_mhu_doorbell.h
+++ b/include/drivers/arm/css/css_mhu_doorbell.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2024, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -22,6 +22,10 @@
 #define SENDER_REG_STAT(_channel)	(0x20 * (_channel))
 #define SENDER_REG_SET(_channel)	((0x20 * (_channel)) + 0xC)
 
+#define MHU_V3_PBX_PDBCW_PAGE_OFFSET		UL(0x1000)
+#define MHU_V3_SENDER_REG_SET(_channel)		(MHU_V3_PBX_PDBCW_PAGE_OFFSET + \
+						 SENDER_REG_SET(_channel))
+
 /* Helper macro to ring doorbell */
 #define MHU_RING_DOORBELL(addr, modify_mask, preserve_mask)	do {	\
 		uint32_t db = mmio_read_32(addr) & (preserve_mask);	\
diff --git a/include/lib/cpus/aarch64/cortex_a725.h b/include/lib/cpus/aarch64/cortex_a725.h
new file mode 100644
index 0000000..123c5ab
--- /dev/null
+++ b/include/lib/cpus/aarch64/cortex_a725.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2023-2024, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef CORTEX_A725_H
+#define CORTEX_A725_H
+
+#define CORTEX_A725_MIDR					U(0x410FD870)
+
+/*******************************************************************************
+ * CPU Extended Control register specific definitions
+ ******************************************************************************/
+#define CORTEX_A725_CPUECTLR_EL1				S3_0_C15_C1_4
+
+/*******************************************************************************
+ * CPU Power Control register specific definitions
+ ******************************************************************************/
+#define CORTEX_A725_CPUPWRCTLR_EL1				S3_0_C15_C2_7
+#define CORTEX_A725_CPUPWRCTLR_EL1_CORE_PWRDN_BIT		U(1)
+
+#endif /* CORTEX_A725_H */
diff --git a/include/lib/cpus/aarch64/cortex_blackhawk.h b/include/lib/cpus/aarch64/cortex_blackhawk.h
deleted file mode 100644
index bfb3039..0000000
--- a/include/lib/cpus/aarch64/cortex_blackhawk.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2023, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef CORTEX_BLACKHAWK_H
-#define CORTEX_BLACKHAWK_H
-
-#define CORTEX_BLACKHAWK_MIDR					U(0x410FD850)
-
-/*******************************************************************************
- * CPU Extended Control register specific definitions
- ******************************************************************************/
-#define CORTEX_BLACKHAWK_CPUECTLR_EL1				S3_0_C15_C1_4
-
-/*******************************************************************************
- * CPU Power Control register specific definitions
- ******************************************************************************/
-#define CORTEX_BLACKHAWK_CPUPWRCTLR_EL1				S3_0_C15_C2_7
-#define CORTEX_BLACKHAWK_CPUPWRCTLR_EL1_CORE_PWRDN_BIT		U(1)
-
-#endif /* CORTEX_BLACKHAWK_H */
diff --git a/include/lib/cpus/aarch64/cortex_chaberton.h b/include/lib/cpus/aarch64/cortex_chaberton.h
deleted file mode 100644
index 8f10b68..0000000
--- a/include/lib/cpus/aarch64/cortex_chaberton.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2023, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef CORTEX_CHABERTON_H
-#define CORTEX_CHABERTON_H
-
-#define CORTEX_CHABERTON_MIDR					U(0x410FD870)
-
-/*******************************************************************************
- * CPU Extended Control register specific definitions
- ******************************************************************************/
-#define CORTEX_CHABERTON_CPUECTLR_EL1				S3_0_C15_C1_4
-
-/*******************************************************************************
- * CPU Power Control register specific definitions
- ******************************************************************************/
-#define CORTEX_CHABERTON_CPUPWRCTLR_EL1				S3_0_C15_C2_7
-#define CORTEX_CHABERTON_CPUPWRCTLR_EL1_CORE_PWRDN_BIT		U(1)
-
-#endif /* CORTEX_CHABERTON_H */
diff --git a/include/lib/cpus/aarch64/cortex_x925.h b/include/lib/cpus/aarch64/cortex_x925.h
new file mode 100644
index 0000000..38aafcf
--- /dev/null
+++ b/include/lib/cpus/aarch64/cortex_x925.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2023-2024, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef CORTEX_X925_H
+#define CORTEX_X925_H
+
+#define CORTEX_X925_MIDR					U(0x410FD850)
+
+/*******************************************************************************
+ * CPU Extended Control register specific definitions
+ ******************************************************************************/
+#define CORTEX_X925_CPUECTLR_EL1				S3_0_C15_C1_4
+
+/*******************************************************************************
+ * CPU Power Control register specific definitions
+ ******************************************************************************/
+#define CORTEX_X925_CPUPWRCTLR_EL1				S3_0_C15_C2_7
+#define CORTEX_X925_CPUPWRCTLR_EL1_CORE_PWRDN_BIT		U(1)
+
+#endif /* CORTEX_X925_H */
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 4f11ad2..6f97bed 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -299,27 +299,6 @@
 #endif /* CTX_INCLUDE_PAUTH_REGS */
 
 /*******************************************************************************
- * Registers related to ARMv8.2-MPAM.
- ******************************************************************************/
-#define CTX_MPAM_REGS_OFFSET	(CTX_PAUTH_REGS_OFFSET + CTX_PAUTH_REGS_END)
-#if CTX_INCLUDE_MPAM_REGS
-#define CTX_MPAM2_EL2		U(0x0)
-#define CTX_MPAMHCR_EL2		U(0x8)
-#define CTX_MPAMVPM0_EL2	U(0x10)
-#define CTX_MPAMVPM1_EL2	U(0x18)
-#define CTX_MPAMVPM2_EL2	U(0x20)
-#define CTX_MPAMVPM3_EL2	U(0x28)
-#define CTX_MPAMVPM4_EL2	U(0x30)
-#define CTX_MPAMVPM5_EL2	U(0x38)
-#define CTX_MPAMVPM6_EL2	U(0x40)
-#define CTX_MPAMVPM7_EL2	U(0x48)
-#define CTX_MPAMVPMV_EL2	U(0x50)
-#define CTX_MPAM_REGS_END	U(0x60)
-#else
-#define CTX_MPAM_REGS_END	U(0x0)
-#endif /* CTX_INCLUDE_MPAM_REGS */
-
-/*******************************************************************************
  * Registers initialised in a per-world context.
  ******************************************************************************/
 #define CTX_CPTR_EL3			U(0x0)
@@ -355,9 +334,6 @@
 #if CTX_INCLUDE_PAUTH_REGS
 # define CTX_PAUTH_REGS_ALL	(CTX_PAUTH_REGS_END >> DWORD_SHIFT)
 #endif
-#if CTX_INCLUDE_MPAM_REGS
-# define CTX_MPAM_REGS_ALL	(CTX_MPAM_REGS_END >> DWORD_SHIFT)
-#endif
 
 /*
  * AArch64 general purpose register context structure. Usually x0-x18,
@@ -397,11 +373,6 @@
 DEFINE_REG_STRUCT(pauth, CTX_PAUTH_REGS_ALL);
 #endif
 
-/* Registers associated to ARMv8.2 MPAM */
-#if CTX_INCLUDE_MPAM_REGS
-DEFINE_REG_STRUCT(mpam, CTX_MPAM_REGS_ALL);
-#endif
-
 /*
  * Macros to access members of any of the above structures using their
  * offsets
@@ -432,10 +403,6 @@
 	pauth_t pauth_ctx;
 #endif
 
-#if CTX_INCLUDE_MPAM_REGS
-	mpam_t	mpam_ctx;
-#endif
-
 #if CTX_INCLUDE_EL2_REGS
 	el2_sysregs_t el2_sysregs_ctx;
 #endif
@@ -468,9 +435,6 @@
 #if CTX_INCLUDE_PAUTH_REGS
 # define get_pauth_ctx(h)	(&((cpu_context_t *) h)->pauth_ctx)
 #endif
-#if CTX_INCLUDE_MPAM_REGS
-# define get_mpam_ctx(h)	(&((cpu_context_t *) h)->mpam_ctx)
-#endif
 
 /*
  * Compile time assertions related to the 'cpu_context' structure to
@@ -499,11 +463,6 @@
 	assert_core_context_pauth_offset_mismatch);
 #endif /* CTX_INCLUDE_PAUTH_REGS */
 
-#if CTX_INCLUDE_MPAM_REGS
-CASSERT(CTX_MPAM_REGS_OFFSET == __builtin_offsetof(cpu_context_t, mpam_ctx),
-	assert_core_context_mpam_offset_mismatch);
-#endif /* CTX_INCLUDE_MPAM_REGS */
-
 /*
  * Helper macro to set the general purpose registers that correspond to
  * parameters in an aapcs_64 call i.e. x0-x7
diff --git a/include/lib/el3_runtime/context_el2.h b/include/lib/el3_runtime/context_el2.h
index 04f6587..d25ab81 100644
--- a/include/lib/el3_runtime/context_el2.h
+++ b/include/lib/el3_runtime/context_el2.h
@@ -114,6 +114,20 @@
 	uint64_t gcspr_el2;
 } el2_gcs_regs_t;
 
+typedef struct el2_mpam_regs {
+	uint64_t mpam2_el2;
+	uint64_t mpamhcr_el2;
+	uint64_t mpamvpm0_el2;
+	uint64_t mpamvpm1_el2;
+	uint64_t mpamvpm2_el2;
+	uint64_t mpamvpm3_el2;
+	uint64_t mpamvpm4_el2;
+	uint64_t mpamvpm5_el2;
+	uint64_t mpamvpm6_el2;
+	uint64_t mpamvpm7_el2;
+	uint64_t mpamvpmv_el2;
+} el2_mpam_regs_t;
+
 typedef struct el2_sysregs {
 
 	el2_common_regs_t common;
@@ -174,6 +188,10 @@
 	el2_gcs_regs_t gcs;
 #endif
 
+#if CTX_INCLUDE_MPAM_REGS
+	el2_mpam_regs_t mpam;
+#endif
+
 } el2_sysregs_t;
 
 /*
@@ -311,6 +329,15 @@
 #define write_el2_ctx_gcs(ctx, reg, val)
 #endif /* ENABLE_FEAT_GCS */
 
+#if CTX_INCLUDE_MPAM_REGS
+#define read_el2_ctx_mpam(ctx, reg)		(((ctx)->mpam).reg)
+#define write_el2_ctx_mpam(ctx, reg, val)	((((ctx)->mpam).reg)	\
+							= (uint64_t) (val))
+#else
+#define read_el2_ctx_mpam(ctx, reg)		ULL(0)
+#define write_el2_ctx_mpam(ctx, reg, val)
+#endif /* CTX_INCLUDE_MPAM_REGS */
+
 #endif /* CTX_INCLUDE_EL2_REGS */
 /******************************************************************************/
 
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index f9c4baf..93771df 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -15,7 +15,6 @@
 	.globl	zero_normalmem
 	.globl	zeromem
 	.globl	memcpy16
-	.globl	gpt_tlbi_by_pa_ll
 
 	.globl	disable_mmu_el1
 	.globl	disable_mmu_el3
@@ -594,20 +593,3 @@
 	b.lo	1b
 	ret
 endfunc fixup_gdt_reloc
-
-/*
- * TODO: Currently only supports size of 4KB,
- * support other sizes as well.
- */
-func gpt_tlbi_by_pa_ll
-#if ENABLE_ASSERTIONS
-	cmp	x1, #PAGE_SIZE_4KB
-	ASM_ASSERT(eq)
-	tst	x0, #(PAGE_SIZE_MASK)
-	ASM_ASSERT(eq)
-#endif
-	lsr	x0, x0, #FOUR_KB_SHIFT	/* 4KB size encoding is zero */
-	sys	#6, c8, c4, #7, x0 	/* TLBI RPALOS, <Xt> */
-	dsb	sy
-	ret
-endfunc gpt_tlbi_by_pa_ll
diff --git a/lib/cpus/aarch64/cortex_a725.S b/lib/cpus/aarch64/cortex_a725.S
new file mode 100644
index 0000000..c08945f
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a725.S
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023-2024, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <common/bl_common.h>
+#include <cortex_a725.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+/* Hardware handled coherency */
+#if HW_ASSISTED_COHERENCY == 0
+#error "Cortex-A725 must be compiled with HW_ASSISTED_COHERENCY enabled"
+#endif
+
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Cortex-A725 supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
+cpu_reset_func_start cortex_a725
+	/* Disable speculative loads */
+	msr	SSBS, xzr
+cpu_reset_func_end cortex_a725
+
+	/* ----------------------------------------------------
+	 * HW will do the cache maintenance while powering down
+	 * ----------------------------------------------------
+	 */
+func cortex_a725_core_pwr_dwn
+	/* ---------------------------------------------------
+	 * Enable CPU power down bit in power control register
+	 * ---------------------------------------------------
+	 */
+	sysreg_bit_set CORTEX_A725_CPUPWRCTLR_EL1, CORTEX_A725_CPUPWRCTLR_EL1_CORE_PWRDN_BIT
+	isb
+	ret
+endfunc cortex_a725_core_pwr_dwn
+
+errata_report_shim cortex_a725
+
+	/* ---------------------------------------------
+	 * This function provides Cortex-A725 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a725_regs, "aS"
+cortex_a725_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_a725_cpu_reg_dump
+	adr	x6, cortex_a725_regs
+	mrs	x8, CORTEX_A725_CPUECTLR_EL1
+	ret
+endfunc cortex_a725_cpu_reg_dump
+
+declare_cpu_ops cortex_a725, CORTEX_A725_MIDR, \
+	cortex_a725_reset_func, \
+	cortex_a725_core_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_blackhawk.S b/lib/cpus/aarch64/cortex_blackhawk.S
deleted file mode 100644
index b7b7a2d..0000000
--- a/lib/cpus/aarch64/cortex_blackhawk.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2023, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <arch.h>
-#include <asm_macros.S>
-#include <common/bl_common.h>
-#include <cortex_blackhawk.h>
-#include <cpu_macros.S>
-#include <plat_macros.S>
-
-/* Hardware handled coherency */
-#if HW_ASSISTED_COHERENCY == 0
-#error "Cortex blackhawk must be compiled with HW_ASSISTED_COHERENCY enabled"
-#endif
-
-/* 64-bit only core */
-#if CTX_INCLUDE_AARCH32_REGS == 1
-#error "Cortex blackhawk supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
-#endif
-
-cpu_reset_func_start cortex_blackhawk
-	/* Disable speculative loads */
-	msr	SSBS, xzr
-cpu_reset_func_end cortex_blackhawk
-
-	/* ----------------------------------------------------
-	 * HW will do the cache maintenance while powering down
-	 * ----------------------------------------------------
-	 */
-func cortex_blackhawk_core_pwr_dwn
-	/* ---------------------------------------------------
-	 * Enable CPU power down bit in power control register
-	 * ---------------------------------------------------
-	 */
-	sysreg_bit_set CORTEX_BLACKHAWK_CPUPWRCTLR_EL1, CORTEX_BLACKHAWK_CPUPWRCTLR_EL1_CORE_PWRDN_BIT
-	isb
-	ret
-endfunc cortex_blackhawk_core_pwr_dwn
-
-errata_report_shim cortex_blackhawk
-
-	/* ---------------------------------------------
-	 * This function provides Cortex Blackhawk specific
-	 * register information for crash reporting.
-	 * It needs to return with x6 pointing to
-	 * a list of register names in ascii and
-	 * x8 - x15 having values of registers to be
-	 * reported.
-	 * ---------------------------------------------
-	 */
-.section .rodata.cortex_blackhawk_regs, "aS"
-cortex_blackhawk_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
-
-func cortex_blackhawk_cpu_reg_dump
-	adr	x6, cortex_blackhawk_regs
-	mrs	x8, CORTEX_BLACKHAWK_CPUECTLR_EL1
-	ret
-endfunc cortex_blackhawk_cpu_reg_dump
-
-declare_cpu_ops cortex_blackhawk, CORTEX_BLACKHAWK_MIDR, \
-	cortex_blackhawk_reset_func, \
-	cortex_blackhawk_core_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_chaberton.S b/lib/cpus/aarch64/cortex_chaberton.S
deleted file mode 100644
index 596fe4a..0000000
--- a/lib/cpus/aarch64/cortex_chaberton.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2023, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <arch.h>
-#include <asm_macros.S>
-#include <common/bl_common.h>
-#include <cortex_chaberton.h>
-#include <cpu_macros.S>
-#include <plat_macros.S>
-
-/* Hardware handled coherency */
-#if HW_ASSISTED_COHERENCY == 0
-#error "Cortex Chaberton must be compiled with HW_ASSISTED_COHERENCY enabled"
-#endif
-
-/* 64-bit only core */
-#if CTX_INCLUDE_AARCH32_REGS == 1
-#error "Cortex Chaberton supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
-#endif
-
-cpu_reset_func_start cortex_chaberton
-	/* Disable speculative loads */
-	msr	SSBS, xzr
-cpu_reset_func_end cortex_chaberton
-
-	/* ----------------------------------------------------
-	 * HW will do the cache maintenance while powering down
-	 * ----------------------------------------------------
-	 */
-func cortex_chaberton_core_pwr_dwn
-	/* ---------------------------------------------------
-	 * Enable CPU power down bit in power control register
-	 * ---------------------------------------------------
-	 */
-	sysreg_bit_set CORTEX_CHABERTON_CPUPWRCTLR_EL1, CORTEX_CHABERTON_CPUPWRCTLR_EL1_CORE_PWRDN_BIT
-	isb
-	ret
-endfunc cortex_chaberton_core_pwr_dwn
-
-errata_report_shim cortex_chaberton
-
-	/* ---------------------------------------------
-	 * This function provides Cortex Chaberton specific
-	 * register information for crash reporting.
-	 * It needs to return with x6 pointing to
-	 * a list of register names in ascii and
-	 * x8 - x15 having values of registers to be
-	 * reported.
-	 * ---------------------------------------------
-	 */
-.section .rodata.cortex_chaberton_regs, "aS"
-cortex_chaberton_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
-
-func cortex_chaberton_cpu_reg_dump
-	adr	x6, cortex_chaberton_regs
-	mrs	x8, CORTEX_CHABERTON_CPUECTLR_EL1
-	ret
-endfunc cortex_chaberton_cpu_reg_dump
-
-declare_cpu_ops cortex_chaberton, CORTEX_CHABERTON_MIDR, \
-	cortex_chaberton_reset_func, \
-	cortex_chaberton_core_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_x925.S b/lib/cpus/aarch64/cortex_x925.S
new file mode 100644
index 0000000..36b442e
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_x925.S
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023-2024, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <common/bl_common.h>
+#include <cortex_x925.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+/* Hardware handled coherency */
+#if HW_ASSISTED_COHERENCY == 0
+#error "Cortex-X925 must be compiled with HW_ASSISTED_COHERENCY enabled"
+#endif
+
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Cortex-X925 supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
+cpu_reset_func_start cortex_x925
+	/* Disable speculative loads */
+	msr	SSBS, xzr
+cpu_reset_func_end cortex_x925
+
+	/* ----------------------------------------------------
+	 * HW will do the cache maintenance while powering down
+	 * ----------------------------------------------------
+	 */
+func cortex_x925_core_pwr_dwn
+	/* ---------------------------------------------------
+	 * Enable CPU power down bit in power control register
+	 * ---------------------------------------------------
+	 */
+	sysreg_bit_set CORTEX_X925_CPUPWRCTLR_EL1, CORTEX_X925_CPUPWRCTLR_EL1_CORE_PWRDN_BIT
+	isb
+	ret
+endfunc cortex_x925_core_pwr_dwn
+
+errata_report_shim cortex_x925
+
+	/* ---------------------------------------------
+	 * This function provides Cortex-X925 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_x925_regs, "aS"
+cortex_x925_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_x925_cpu_reg_dump
+	adr	x6, cortex_x925_regs
+	mrs	x8, CORTEX_X925_CPUECTLR_EL1
+	ret
+endfunc cortex_x925_cpu_reg_dump
+
+declare_cpu_ops cortex_x925, CORTEX_X925_MIDR, \
+	cortex_x925_reset_func, \
+	cortex_x925_core_pwr_dwn
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 35c98f5..1937c30 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -1046,13 +1046,11 @@
 	write_hfgwtr_el2(read_el2_ctx_fgt(ctx, hfgwtr_el2));
 }
 
-#if CTX_INCLUDE_MPAM_REGS
-
-static void el2_sysregs_context_save_mpam(mpam_t *ctx)
+static void el2_sysregs_context_save_mpam(el2_sysregs_t *ctx)
 {
 	u_register_t mpam_idr = read_mpamidr_el1();
 
-	write_ctx_reg(ctx, CTX_MPAM2_EL2, read_mpam2_el2());
+	write_el2_ctx_mpam(ctx, mpam2_el2, read_mpam2_el2());
 
 	/*
 	 * The context registers that we intend to save would be part of the
@@ -1066,9 +1064,9 @@
 	 * MPAMHCR_EL2, MPAMVPMV_EL2 and MPAMVPM0_EL2 are always present if
 	 * MPAMIDR_HAS_HCR_BIT == 1.
 	 */
-	write_ctx_reg(ctx, CTX_MPAMHCR_EL2, read_mpamhcr_el2());
-	write_ctx_reg(ctx, CTX_MPAMVPM0_EL2, read_mpamvpm0_el2());
-	write_ctx_reg(ctx, CTX_MPAMVPMV_EL2, read_mpamvpmv_el2());
+	write_el2_ctx_mpam(ctx, mpamhcr_el2, read_mpamhcr_el2());
+	write_el2_ctx_mpam(ctx, mpamvpm0_el2, read_mpamvpm0_el2());
+	write_el2_ctx_mpam(ctx, mpamvpmv_el2, read_mpamvpmv_el2());
 
 	/*
 	 * The number of MPAMVPM registers is implementation defined, their
@@ -1076,71 +1074,67 @@
 	 */
 	switch ((mpam_idr >> MPAMIDR_EL1_VPMR_MAX_SHIFT) & MPAMIDR_EL1_VPMR_MAX_MASK) {
 	case 7:
-		write_ctx_reg(ctx, CTX_MPAMVPM7_EL2, read_mpamvpm7_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm7_el2, read_mpamvpm7_el2());
 		__fallthrough;
 	case 6:
-		write_ctx_reg(ctx, CTX_MPAMVPM6_EL2, read_mpamvpm6_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm6_el2, read_mpamvpm6_el2());
 		__fallthrough;
 	case 5:
-		write_ctx_reg(ctx, CTX_MPAMVPM5_EL2, read_mpamvpm5_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm5_el2, read_mpamvpm5_el2());
 		__fallthrough;
 	case 4:
-		write_ctx_reg(ctx, CTX_MPAMVPM4_EL2, read_mpamvpm4_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm4_el2, read_mpamvpm4_el2());
 		__fallthrough;
 	case 3:
-		write_ctx_reg(ctx, CTX_MPAMVPM3_EL2, read_mpamvpm3_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm3_el2, read_mpamvpm3_el2());
 		__fallthrough;
 	case 2:
-		write_ctx_reg(ctx, CTX_MPAMVPM2_EL2, read_mpamvpm2_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm2_el2, read_mpamvpm2_el2());
 		__fallthrough;
 	case 1:
-		write_ctx_reg(ctx, CTX_MPAMVPM1_EL2, read_mpamvpm1_el2());
+		write_el2_ctx_mpam(ctx, mpamvpm1_el2, read_mpamvpm1_el2());
 		break;
 	}
 }
 
-#endif /* CTX_INCLUDE_MPAM_REGS */
-
-#if CTX_INCLUDE_MPAM_REGS
-static void el2_sysregs_context_restore_mpam(mpam_t *ctx)
+static void el2_sysregs_context_restore_mpam(el2_sysregs_t *ctx)
 {
 	u_register_t mpam_idr = read_mpamidr_el1();
 
-	write_mpam2_el2(read_ctx_reg(ctx, CTX_MPAM2_EL2));
+	write_mpam2_el2(read_el2_ctx_mpam(ctx, mpam2_el2));
 
 	if ((mpam_idr & MPAMIDR_HAS_HCR_BIT) == 0U) {
 		return;
 	}
 
-	write_mpamhcr_el2(read_ctx_reg(ctx, CTX_MPAMHCR_EL2));
-	write_mpamvpm0_el2(read_ctx_reg(ctx, CTX_MPAMVPM0_EL2));
-	write_mpamvpmv_el2(read_ctx_reg(ctx, CTX_MPAMVPMV_EL2));
+	write_mpamhcr_el2(read_el2_ctx_mpam(ctx, mpamhcr_el2));
+	write_mpamvpm0_el2(read_el2_ctx_mpam(ctx, mpamvpm0_el2));
+	write_mpamvpmv_el2(read_el2_ctx_mpam(ctx, mpamvpmv_el2));
 
 	switch ((mpam_idr >> MPAMIDR_EL1_VPMR_MAX_SHIFT) & MPAMIDR_EL1_VPMR_MAX_MASK) {
 	case 7:
-		write_mpamvpm7_el2(read_ctx_reg(ctx, CTX_MPAMVPM7_EL2));
+		write_mpamvpm7_el2(read_el2_ctx_mpam(ctx, mpamvpm7_el2));
 		__fallthrough;
 	case 6:
-		write_mpamvpm6_el2(read_ctx_reg(ctx, CTX_MPAMVPM6_EL2));
+		write_mpamvpm6_el2(read_el2_ctx_mpam(ctx, mpamvpm6_el2));
 		__fallthrough;
 	case 5:
-		write_mpamvpm5_el2(read_ctx_reg(ctx, CTX_MPAMVPM5_EL2));
+		write_mpamvpm5_el2(read_el2_ctx_mpam(ctx, mpamvpm5_el2));
 		__fallthrough;
 	case 4:
-		write_mpamvpm4_el2(read_ctx_reg(ctx, CTX_MPAMVPM4_EL2));
+		write_mpamvpm4_el2(read_el2_ctx_mpam(ctx, mpamvpm4_el2));
 		__fallthrough;
 	case 3:
-		write_mpamvpm3_el2(read_ctx_reg(ctx, CTX_MPAMVPM3_EL2));
+		write_mpamvpm3_el2(read_el2_ctx_mpam(ctx, mpamvpm3_el2));
 		__fallthrough;
 	case 2:
-		write_mpamvpm2_el2(read_ctx_reg(ctx, CTX_MPAMVPM2_EL2));
+		write_mpamvpm2_el2(read_el2_ctx_mpam(ctx, mpamvpm2_el2));
 		__fallthrough;
 	case 1:
-		write_mpamvpm1_el2(read_ctx_reg(ctx, CTX_MPAMVPM1_EL2));
+		write_mpamvpm1_el2(read_el2_ctx_mpam(ctx, mpamvpm1_el2));
 		break;
 	}
 }
-#endif /* CTX_INCLUDE_MPAM_REGS */
 
 /* ---------------------------------------------------------------------------
  * The following registers are not added:
@@ -1283,12 +1277,9 @@
 		write_el2_ctx_mte2(el2_sysregs_ctx, tfsr_el2, read_tfsr_el2());
 	}
 
-#if CTX_INCLUDE_MPAM_REGS
 	if (is_feat_mpam_supported()) {
-		mpam_t *mpam_ctx = get_mpam_ctx(ctx);
-		el2_sysregs_context_save_mpam(mpam_ctx);
+		el2_sysregs_context_save_mpam(el2_sysregs_ctx);
 	}
-#endif
 
 	if (is_feat_fgt_supported()) {
 		el2_sysregs_context_save_fgt(el2_sysregs_ctx);
@@ -1369,12 +1360,9 @@
 		write_tfsr_el2(read_el2_ctx_mte2(el2_sysregs_ctx, tfsr_el2));
 	}
 
-#if CTX_INCLUDE_MPAM_REGS
 	if (is_feat_mpam_supported()) {
-		mpam_t *mpam_ctx = get_mpam_ctx(ctx);
-		el2_sysregs_context_restore_mpam(mpam_ctx);
+		el2_sysregs_context_restore_mpam(el2_sysregs_ctx);
 	}
-#endif
 
 	if (is_feat_fgt_supported()) {
 		el2_sysregs_context_restore_fgt(el2_sysregs_ctx);
diff --git a/lib/gpt_rme/gpt_rme.c b/lib/gpt_rme/gpt_rme.c
index 72e905e..4d80373 100644
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
@@ -57,6 +57,13 @@
  */
 static const gpt_p_val_e gpt_p_lookup[] = {PGS_4KB_P, PGS_64KB_P, PGS_16KB_P};
 
+static void shatter_2mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc);
+static void shatter_32mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc);
+static void shatter_512mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc);
+
 /*
  * This structure contains GPT configuration data
  */
@@ -70,10 +77,187 @@
 
 static gpt_config_t gpt_config;
 
+/*
+ * Number of L1 entries in 2MB, depending on GPCCR_EL3.PGS:
+ * +-------+------------+
+ * |  PGS  | L1 entries |
+ * +-------+------------+
+ * |  4KB  |     32     |
+ * +-------+------------+
+ * |  16KB |     8      |
+ * +-------+------------+
+ * |  64KB |     2      |
+ * +-------+------------+
+ */
+static unsigned int gpt_l1_cnt_2mb;
+
+/*
+ * Mask for the L1 index field, depending on
+ * GPCCR_EL3.L0GPTSZ and GPCCR_EL3.PGS:
+ * +---------+-------------------------------+
+ * |         |             PGS               |
+ * +---------+----------+----------+---------+
+ * | L0GPTSZ |   4KB    |   16KB   |   64KB  |
+ * +---------+----------+----------+---------+
+ * |  1GB    |  0x3FFF  |  0xFFF   |  0x3FF  |
+ * +---------+----------+----------+---------+
+ * |  16GB   | 0x3FFFF  |  0xFFFF  | 0x3FFF  |
+ * +---------+----------+----------+---------+
+ * |  64GB   | 0xFFFFF  | 0x3FFFF  | 0xFFFF  |
+ * +---------+----------+----------+---------+
+ * |  512GB  | 0x7FFFFF | 0x1FFFFF | 0x7FFFF |
+ * +---------+----------+----------+---------+
+ */
+static uint64_t gpt_l1_index_mask;
+
+/* Number of 128-bit L1 entries in 2MB, 32MB and 512MB */
+#define L1_QWORDS_2MB	(gpt_l1_cnt_2mb / 2U)
+#define L1_QWORDS_32MB	(L1_QWORDS_2MB * 16U)
+#define L1_QWORDS_512MB	(L1_QWORDS_32MB * 16U)
+
+/* Size in bytes of L1 entries in 2MB, 32MB */
+#define L1_BYTES_2MB	(gpt_l1_cnt_2mb * sizeof(uint64_t))
+#define L1_BYTES_32MB	(L1_BYTES_2MB * 16U)
+
+/* Get the index into the L1 table from a physical address */
+#define GPT_L1_INDEX(_pa)	\
+	(((_pa) >> (unsigned int)GPT_L1_IDX_SHIFT(gpt_config.p)) & gpt_l1_index_mask)
+
 /* These variables are used during initialization of the L1 tables */
-static unsigned int gpt_next_l1_tbl_idx;
 static uintptr_t gpt_l1_tbl;
 
+/* These variable is used during runtime */
+#if (RME_GPT_BITLOCK_BLOCK == 0)
+/*
+ * The GPTs are protected by a global spinlock to ensure
+ * that multiple CPUs do not attempt to change the descriptors at once.
+ */
+static spinlock_t gpt_lock;
+#else
+
+/* Bitlocks base address */
+static bitlock_t *gpt_bitlock_base;
+#endif
+
+/* Lock/unlock macros for GPT entries */
+#if (RME_GPT_BITLOCK_BLOCK == 0)
+/*
+ * Access to GPT is controlled by a global lock to ensure
+ * that no more than one CPU is allowed to make changes at any
+ * given time.
+ */
+#define GPT_LOCK	spin_lock(&gpt_lock)
+#define GPT_UNLOCK	spin_unlock(&gpt_lock)
+#else
+/*
+ * Access to a block of memory is controlled by a bitlock.
+ * Size of block = RME_GPT_BITLOCK_BLOCK * 512MB.
+ */
+#define GPT_LOCK	bit_lock(gpi_info.lock, gpi_info.mask)
+#define GPT_UNLOCK	bit_unlock(gpi_info.lock, gpi_info.mask)
+#endif
+
+static void tlbi_page_dsbosh(uintptr_t base)
+{
+	/* Look-up table for invalidation TLBs for 4KB, 16KB and 64KB pages */
+	static const gpt_tlbi_lookup_t tlbi_page_lookup[] = {
+		{ tlbirpalos_4k, ~(SZ_4K - 1UL) },
+		{ tlbirpalos_64k, ~(SZ_64K - 1UL) },
+		{ tlbirpalos_16k, ~(SZ_16K - 1UL) }
+	};
+
+	tlbi_page_lookup[gpt_config.pgs].function(
+			base & tlbi_page_lookup[gpt_config.pgs].mask);
+	dsbosh();
+}
+
+/*
+ * Helper function to fill out GPI entries in a single L1 table
+ * with Granules or Contiguous descriptor.
+ *
+ * Parameters
+ *   l1			Pointer to 2MB, 32MB or 512MB aligned L1 table entry to fill out
+ *   l1_desc		GPT Granules or Contiguous descriptor set this range to
+ *   cnt		Number of double 128-bit L1 entries to fill
+ *
+ */
+static void fill_desc(uint64_t *l1, uint64_t l1_desc, unsigned int cnt)
+{
+	uint128_t *l1_quad = (uint128_t *)l1;
+	uint128_t l1_quad_desc = (uint128_t)l1_desc | ((uint128_t)l1_desc << 64);
+
+	VERBOSE("GPT: %s(%p 0x%"PRIx64" %u)\n", __func__, l1, l1_desc, cnt);
+
+	for (unsigned int i = 0U; i < cnt; i++) {
+		*l1_quad++ = l1_quad_desc;
+	}
+}
+
+static void shatter_2mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	unsigned long idx = GPT_L1_INDEX(ALIGN_2MB(base));
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n",
+				__func__, base, l1_desc);
+
+	/* Convert 2MB Contiguous block to Granules */
+	fill_desc(&gpi_info->gpt_l1_addr[idx], l1_desc, L1_QWORDS_2MB);
+}
+
+static void shatter_32mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	unsigned long idx = GPT_L1_INDEX(ALIGN_2MB(base));
+	const uint64_t *l1_gran = &gpi_info->gpt_l1_addr[idx];
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 2MB);
+	uint64_t *l1;
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n",
+				__func__, base, l1_desc);
+
+	/* Get index corresponding to 32MB aligned address */
+	idx = GPT_L1_INDEX(ALIGN_32MB(base));
+	l1 = &gpi_info->gpt_l1_addr[idx];
+
+	/* 16 x 2MB blocks in 32MB */
+	for (unsigned int i = 0U; i < 16U; i++) {
+		/* Fill with Granules or Contiguous descriptors */
+		fill_desc(l1, (l1 == l1_gran) ? l1_desc : l1_cont_desc,
+							L1_QWORDS_2MB);
+		l1 = (uint64_t *)((uintptr_t)l1 + L1_BYTES_2MB);
+	}
+}
+
+static void shatter_512mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	unsigned long idx = GPT_L1_INDEX(ALIGN_32MB(base));
+	const uint64_t *l1_32mb = &gpi_info->gpt_l1_addr[idx];
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 32MB);
+	uint64_t *l1;
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n",
+				__func__, base, l1_desc);
+
+	/* Get index corresponding to 512MB aligned address */
+	idx = GPT_L1_INDEX(ALIGN_512MB(base));
+	l1 = &gpi_info->gpt_l1_addr[idx];
+
+	/* 16 x 32MB blocks in 512MB */
+	for (unsigned int i = 0U; i < 16U; i++) {
+		if (l1 == l1_32mb) {
+			/* Shatter this 32MB block */
+			shatter_32mb(base, gpi_info, l1_desc);
+		} else {
+			/* Fill 32MB with Contiguous descriptors */
+			fill_desc(l1, l1_cont_desc, L1_QWORDS_32MB);
+		}
+
+		l1 = (uint64_t *)((uintptr_t)l1 + L1_BYTES_32MB);
+	}
+}
+
 /*
  * This function checks to see if a GPI value is valid.
  *
@@ -213,10 +397,11 @@
 		 * to see if this PAS would fall into one that has already been
 		 * initialized.
 		 */
-		for (unsigned int i = GPT_L0_IDX(pas_regions[idx].base_pa);
-		     i <= GPT_L0_IDX(pas_regions[idx].base_pa +
-						pas_regions[idx].size - 1UL);
-		     i++) {
+		for (unsigned int i =
+			(unsigned int)GPT_L0_IDX(pas_regions[idx].base_pa);
+			i <= GPT_L0_IDX(pas_regions[idx].base_pa +
+					pas_regions[idx].size - 1UL);
+			i++) {
 			if ((GPT_L0_TYPE(l0_desc[i]) == GPT_L0_TYPE_BLK_DESC) &&
 			    (GPT_L0_BLKD_GPI(l0_desc[i]) == GPT_GPI_ANY)) {
 				/* This descriptor is unused so continue */
@@ -227,7 +412,7 @@
 			 * This descriptor has been initialized in a previous
 			 * call to this function so cannot be initialized again.
 			 */
-			ERROR("GPT: PAS[%u] overlaps with previous L0[%d]!\n",
+			ERROR("GPT: PAS[%u] overlaps with previous L0[%u]!\n",
 			      idx, i);
 			return -EFAULT;
 		}
@@ -318,7 +503,7 @@
 static int validate_l0_params(gpccr_pps_e pps, uintptr_t l0_mem_base,
 				size_t l0_mem_size)
 {
-	size_t l0_alignment;
+	size_t l0_alignment, locks_size = 0;
 
 	/*
 	 * Make sure PPS is valid and then store it since macros need this value
@@ -344,12 +529,28 @@
 		return -EFAULT;
 	}
 
-	/* Check size */
-	if (l0_mem_size < GPT_L0_TABLE_SIZE(gpt_config.t)) {
-		ERROR("%sL0%s\n", "GPT: Inadequate ", " memory\n");
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	/*
+	 * Size of bitlocks in bytes for the protected address space
+	 * with RME_GPT_BITLOCK_BLOCK * 512MB per bitlock.
+	 */
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) /
+			(RME_GPT_BITLOCK_BLOCK * SZ_512M * 8U);
+
+	/*
+	 * If protected space size is less than the size covered
+	 * by 'bitlock' structure, check for a single bitlock.
+	 */
+	if (locks_size < LOCK_SIZE) {
+		locks_size = LOCK_SIZE;
+	}
+#endif
+	/* Check size for L0 tables and bitlocks */
+	if (l0_mem_size < (GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size)) {
+		ERROR("GPT: Inadequate L0 memory\n");
 		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-		      GPT_L0_TABLE_SIZE(gpt_config.t),
-		      l0_mem_size);
+			GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size,
+			l0_mem_size);
 		return -ENOMEM;
 	}
 
@@ -397,9 +598,10 @@
 
 	/* Make sure enough space was supplied */
 	if (l1_mem_size < l1_gpt_mem_sz) {
-		ERROR("%sL1 GPTs%s", "GPT: Inadequate ", " memory\n");
+		ERROR("%sL1 GPTs%s", (const char *)"GPT: Inadequate ",
+			(const char *)" memory\n");
 		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-		      l1_gpt_mem_sz, l1_mem_size);
+			l1_gpt_mem_sz, l1_mem_size);
 		return -ENOMEM;
 	}
 
@@ -418,8 +620,7 @@
 static void generate_l0_blk_desc(pas_region_t *pas)
 {
 	uint64_t gpt_desc;
-	unsigned int end_idx;
-	unsigned int idx;
+	unsigned long idx, end_idx;
 	uint64_t *l0_gpt_arr;
 
 	assert(gpt_config.plat_gpt_l0_base != 0U);
@@ -448,7 +649,7 @@
 	/* Generate the needed block descriptors */
 	for (; idx < end_idx; idx++) {
 		l0_gpt_arr[idx] = gpt_desc;
-		VERBOSE("GPT: L0 entry (BLOCK) index %u [%p]: GPI = 0x%"PRIx64" (0x%"PRIx64")\n",
+		VERBOSE("GPT: L0 entry (BLOCK) index %lu [%p]: GPI = 0x%"PRIx64" (0x%"PRIx64")\n",
 			idx, &l0_gpt_arr[idx],
 			(gpt_desc >> GPT_L0_BLK_DESC_GPI_SHIFT) &
 			GPT_L0_BLK_DESC_GPI_MASK, l0_gpt_arr[idx]);
@@ -482,51 +683,199 @@
 		return end_pa;
 	}
 
-	return (cur_idx + 1U) << GPT_L0_IDX_SHIFT;
+	return (cur_idx + 1UL) << GPT_L0_IDX_SHIFT;
 }
 
 /*
- * Helper function to fill out GPI entries in a single L1 table. This function
- * fills out entire L1 descriptors at a time to save memory writes.
+ * Helper function to fill out GPI entries from 'first' granule address of
+ * the specified 'length' in a single L1 table with 'l1_desc' Contiguous
+ * descriptor.
  *
  * Parameters
- *   gpi		GPI to set this range to
  *   l1			Pointer to L1 table to fill out
- *   first		Address of first granule in range.
- *   last		Address of last granule in range (inclusive).
+ *   first		Address of first granule in range
+ *   length		Length of the range in bytes
+ *   gpi		GPI set this range to
+ *
+ * Return
+ *   Address of next granule in range.
  */
-static void fill_l1_tbl(uint64_t gpi, uint64_t *l1, uintptr_t first,
-			    uintptr_t last)
+static uintptr_t fill_l1_cont_desc(uint64_t *l1, uintptr_t first,
+				   size_t length, unsigned int gpi)
 {
-	uint64_t gpi_field = GPT_BUILD_L1_DESC(gpi);
-	uint64_t gpi_mask = ULONG_MAX;
+	/*
+	 * Look up table for contiguous blocks and descriptors.
+	 * Entries should be defined in descending block sizes:
+	 * 512MB, 32MB and 2MB.
+	 */
+	static const gpt_fill_lookup_t gpt_fill_lookup[] = {
+#if (RME_GPT_MAX_BLOCK == 512)
+		{ SZ_512M, GPT_L1_CONT_DESC_512MB },
+#endif
+#if (RME_GPT_MAX_BLOCK >= 32)
+		{ SZ_32M, GPT_L1_CONT_DESC_32MB },
+#endif
+#if (RME_GPT_MAX_BLOCK != 0)
+		{ SZ_2M, GPT_L1_CONT_DESC_2MB }
+#endif
+	};
 
-	assert(first <= last);
-	assert((first & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1)) == 0U);
-	assert((last & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1)) == 0U);
-	assert(GPT_L0_IDX(first) == GPT_L0_IDX(last));
-	assert(l1 != NULL);
+	/*
+	 * Iterate through all block sizes (512MB, 32MB and 2MB)
+	 * starting with maximum supported.
+	 */
+	for (unsigned long i = 0UL; i < ARRAY_SIZE(gpt_fill_lookup); i++) {
+		/* Calculate index */
+		unsigned long idx = GPT_L1_INDEX(first);
+
+		/* Contiguous block size */
+		size_t cont_size = gpt_fill_lookup[i].size;
+
+		if (GPT_REGION_IS_CONT(length, first, cont_size)) {
+
+			/* Generate Contiguous descriptor */
+			uint64_t l1_desc = GPT_L1_GPI_CONT_DESC(gpi,
+						gpt_fill_lookup[i].desc);
+
+			/* Number of 128-bit L1 entries in block */
+			unsigned int cnt;
+
+			switch (cont_size) {
+			case SZ_512M:
+				cnt = L1_QWORDS_512MB;
+				break;
+			case SZ_32M:
+				cnt = L1_QWORDS_32MB;
+				break;
+			default:			/* SZ_2MB */
+				cnt = L1_QWORDS_2MB;
+			}
+
+			VERBOSE("GPT: Contiguous descriptor 0x%"PRIxPTR" %luMB\n",
+				first, cont_size / SZ_1M);
+
+			/* Fill Contiguous descriptors */
+			fill_desc(&l1[idx], l1_desc, cnt);
+			first += cont_size;
+			length -= cont_size;
+
+			if (length == 0UL) {
+				break;
+			}
+		}
+	}
+
+	return first;
+}
+
+/* Build Granules descriptor with the same 'gpi' for every GPI entry */
+static uint64_t build_l1_desc(unsigned int gpi)
+{
+	uint64_t l1_desc = (uint64_t)gpi | ((uint64_t)gpi << 4);
+
+	l1_desc |= (l1_desc << 8);
+	l1_desc |= (l1_desc << 16);
+	return (l1_desc | (l1_desc << 32));
+}
+
+/*
+ * Helper function to fill out GPI entries from 'first' to 'last' granule
+ * address in a single L1 table with 'l1_desc' Granules descriptor.
+ *
+ * Parameters
+ *   l1			Pointer to L1 table to fill out
+ *   first		Address of first granule in range
+ *   last		Address of last granule in range (inclusive)
+ *   gpi		GPI set this range to
+ *
+ * Return
+ *   Address of next granule in range.
+ */
+static uintptr_t fill_l1_gran_desc(uint64_t *l1, uintptr_t first,
+				   uintptr_t last, unsigned int gpi)
+{
+	uint64_t gpi_mask;
+	unsigned long i;
+
+	/* Generate Granules descriptor */
+	uint64_t l1_desc = build_l1_desc(gpi);
 
 	/* Shift the mask if we're starting in the middle of an L1 entry */
-	gpi_mask = gpi_mask << (GPT_L1_GPI_IDX(gpt_config.p, first) << 2);
+	gpi_mask = ULONG_MAX << (GPT_L1_GPI_IDX(gpt_config.p, first) << 2);
 
 	/* Fill out each L1 entry for this region */
-	for (unsigned int i = GPT_L1_IDX(gpt_config.p, first);
-	     i <= GPT_L1_IDX(gpt_config.p, last); i++) {
+	for (i = GPT_L1_INDEX(first); i <= GPT_L1_INDEX(last); i++) {
+
 		/* Account for stopping in the middle of an L1 entry */
-		if (i == GPT_L1_IDX(gpt_config.p, last)) {
+		if (i == GPT_L1_INDEX(last)) {
 			gpi_mask &= (gpi_mask >> ((15U -
 				    GPT_L1_GPI_IDX(gpt_config.p, last)) << 2));
 		}
 
+		assert((l1[i] & gpi_mask) == (GPT_L1_ANY_DESC & gpi_mask));
+
 		/* Write GPI values */
-		assert((l1[i] & gpi_mask) ==
-		       (GPT_BUILD_L1_DESC(GPT_GPI_ANY) & gpi_mask));
-		l1[i] = (l1[i] & ~gpi_mask) | (gpi_mask & gpi_field);
+		l1[i] = (l1[i] & ~gpi_mask) | (l1_desc & gpi_mask);
 
 		/* Reset mask */
 		gpi_mask = ULONG_MAX;
 	}
+
+	return last + GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+}
+
+/*
+ * Helper function to fill out GPI entries in a single L1 table.
+ * This function fills out an entire L1 table with either Contiguous
+ * or Granules descriptors depending on region length and alignment.
+ *
+ * Parameters
+ *   l1			Pointer to L1 table to fill out
+ *   first		Address of first granule in range
+ *   last		Address of last granule in range (inclusive)
+ *   gpi		GPI set this range to
+ */
+static void fill_l1_tbl(uint64_t *l1, uintptr_t first, uintptr_t last,
+			unsigned int gpi)
+{
+	assert(l1 != NULL);
+	assert(first <= last);
+	assert((first & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1UL)) == 0UL);
+	assert((last & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1UL)) == 0UL);
+	assert(GPT_L0_IDX(first) == GPT_L0_IDX(last));
+
+	while (first < last) {
+		/* Region length */
+		size_t length = last - first + GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+
+		if (length < SZ_2M) {
+			/*
+			 * Fill with Granule descriptor in case of
+			 * region length < 2MB.
+			 */
+			first = fill_l1_gran_desc(l1, first, last, gpi);
+
+		} else if ((first & (SZ_2M - UL(1))) == UL(0)) {
+			/*
+			 * For region length >= 2MB and at least 2MB aligned
+			 * call to fill_l1_cont_desc will iterate through
+			 * all block sizes (512MB, 32MB and 2MB) supported and
+			 * fill corresponding Contiguous descriptors.
+			 */
+			first = fill_l1_cont_desc(l1, first, length, gpi);
+		} else {
+			/*
+			 * For not aligned region >= 2MB fill with Granules
+			 * descriptors up to the next 2MB aligned address.
+			 */
+			uintptr_t new_last = ALIGN_2MB(first + SZ_2M) -
+					GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+
+			first = fill_l1_gran_desc(l1, first, new_last, gpi);
+		}
+	}
+
+	assert(first == (last + GPT_PGS_ACTUAL_SIZE(gpt_config.p)));
 }
 
 /*
@@ -543,16 +892,14 @@
 static uint64_t *get_new_l1_tbl(void)
 {
 	/* Retrieve the next L1 table */
-	uint64_t *l1 = (uint64_t *)((uint64_t)(gpt_l1_tbl) +
-		       (GPT_L1_TABLE_SIZE(gpt_config.p) *
-		       gpt_next_l1_tbl_idx));
+	uint64_t *l1 = (uint64_t *)gpt_l1_tbl;
 
-	/* Increment L1 counter */
-	gpt_next_l1_tbl_idx++;
+	/* Increment L1 GPT address */
+	gpt_l1_tbl += GPT_L1_TABLE_SIZE(gpt_config.p);
 
 	/* Initialize all GPIs to GPT_GPI_ANY */
 	for (unsigned int i = 0U; i < GPT_L1_ENTRY_COUNT(gpt_config.p); i++) {
-		l1[i] = GPT_BUILD_L1_DESC(GPT_GPI_ANY);
+		l1[i] = GPT_L1_ANY_DESC;
 	}
 
 	return l1;
@@ -573,7 +920,7 @@
 	uintptr_t last_gran_pa;
 	uint64_t *l0_gpt_base;
 	uint64_t *l1_gpt_arr;
-	unsigned int l0_idx;
+	unsigned int l0_idx, gpi;
 
 	assert(gpt_config.plat_gpt_l0_base != 0U);
 	assert(pas != NULL);
@@ -582,18 +929,19 @@
 	 * Checking of PAS parameters has already been done in
 	 * validate_pas_mappings so no need to check the same things again.
 	 */
-
 	end_pa = pas->base_pa + pas->size;
 	l0_gpt_base = (uint64_t *)gpt_config.plat_gpt_l0_base;
 
 	/* We start working from the granule at base PA */
 	cur_pa = pas->base_pa;
 
-	/* Iterate over each L0 region in this memory range */
-	for (l0_idx = GPT_L0_IDX(pas->base_pa);
-	     l0_idx <= GPT_L0_IDX(end_pa - 1U);
-	     l0_idx++) {
+	/* Get GPI */
+	gpi = GPT_PAS_ATTR_GPI(pas->attrs);
 
+	/* Iterate over each L0 region in this memory range */
+	for (l0_idx = (unsigned int)GPT_L0_IDX(pas->base_pa);
+	     l0_idx <= (unsigned int)GPT_L0_IDX(end_pa - 1UL);
+	     l0_idx++) {
 		/*
 		 * See if the L0 entry is already a table descriptor or if we
 		 * need to create one.
@@ -623,8 +971,7 @@
 		 * function needs the addresses of the first granule and last
 		 * granule in the range.
 		 */
-		fill_l1_tbl(GPT_PAS_ATTR_GPI(pas->attrs), l1_gpt_arr,
-				cur_pa, last_gran_pa);
+		fill_l1_tbl(l1_gpt_arr, cur_pa, last_gran_pa, gpi);
 
 		/* Advance cur_pa to first granule in next L0 region */
 		cur_pa = get_l1_end_pa(cur_pa, end_pa);
@@ -644,9 +991,9 @@
  */
 static void flush_l0_for_pas_array(pas_region_t *pas, unsigned int pas_count)
 {
-	unsigned int idx;
-	unsigned int start_idx;
-	unsigned int end_idx;
+	unsigned long idx;
+	unsigned long start_idx;
+	unsigned long end_idx;
 	uint64_t *l0 = (uint64_t *)gpt_config.plat_gpt_l0_base;
 
 	assert(pas != NULL);
@@ -657,7 +1004,7 @@
 	end_idx = GPT_L0_IDX(pas[0].base_pa + pas[0].size - 1UL);
 
 	/* Find lowest and highest L0 indices used in this PAS array */
-	for (idx = 1U; idx < pas_count; idx++) {
+	for (idx = 1UL; idx < pas_count; idx++) {
 		if (GPT_L0_IDX(pas[idx].base_pa) < start_idx) {
 			start_idx = GPT_L0_IDX(pas[idx].base_pa);
 		}
@@ -671,7 +1018,7 @@
 	 * the end index value.
 	 */
 	flush_dcache_range((uintptr_t)&l0[start_idx],
-			   ((end_idx + 1U) - start_idx) * sizeof(uint64_t));
+			   ((end_idx + 1UL) - start_idx) * sizeof(uint64_t));
 }
 
 /*
@@ -767,8 +1114,10 @@
 int gpt_init_l0_tables(gpccr_pps_e pps, uintptr_t l0_mem_base,
 		       size_t l0_mem_size)
 {
-	int ret;
 	uint64_t gpt_desc;
+	size_t locks_size = 0;
+	__unused bitlock_t *bit_locks;
+	int ret;
 
 	/* Ensure that MMU and Data caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0U);
@@ -787,9 +1136,31 @@
 		((uint64_t *)l0_mem_base)[i] = gpt_desc;
 	}
 
-	/* Flush updated L0 tables to memory */
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	/* Initialise bitlocks at the end of L0 table */
+	bit_locks = (bitlock_t *)(l0_mem_base +
+					GPT_L0_TABLE_SIZE(gpt_config.t));
+
+	/* Size of bitlocks in bytes */
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) /
+					(RME_GPT_BITLOCK_BLOCK * SZ_512M * 8U);
+
+	/*
+	 * If protected space size is less than the size covered
+	 * by 'bitlock' structure, initialise a single bitlock.
+	 */
+	if (locks_size < LOCK_SIZE) {
+		locks_size = LOCK_SIZE;
+	}
+
+	for (size_t i = 0UL; i < (locks_size/LOCK_SIZE); i++) {
+		bit_locks[i].lock = 0U;
+	}
+#endif
+
+	/* Flush updated L0 tables and bitlocks to memory */
 	flush_dcache_range((uintptr_t)l0_mem_base,
-			   (size_t)GPT_L0_TABLE_SIZE(gpt_config.t));
+				GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size);
 
 	/* Stash the L0 base address once initial setup is complete */
 	gpt_config.plat_gpt_l0_base = l0_mem_base;
@@ -806,7 +1177,7 @@
  * This function can be called multiple times with different L1 memory ranges
  * and PAS regions if it is desirable to place L1 tables in different locations
  * in memory. (ex: you have multiple DDR banks and want to place the L1 tables
- * in the DDR bank that they control)
+ * in the DDR bank that they control).
  *
  * Parameters
  *   pgs		PGS value to use for table generation.
@@ -822,8 +1193,7 @@
 			   size_t l1_mem_size, pas_region_t *pas_regions,
 			   unsigned int pas_count)
 {
-	int ret;
-	int l1_gpt_cnt;
+	int l1_gpt_cnt, ret;
 
 	/* Ensure that MMU and Data caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0U);
@@ -860,9 +1230,14 @@
 
 		/* Set up parameters for L1 table generation */
 		gpt_l1_tbl = l1_mem_base;
-		gpt_next_l1_tbl_idx = 0U;
 	}
 
+	/* Number of L1 entries in 2MB depends on GPCCR_EL3.PGS value */
+	gpt_l1_cnt_2mb = (unsigned int)GPT_L1_ENTRY_COUNT_2MB(gpt_config.p);
+
+	/* Mask for the L1 index field */
+	gpt_l1_index_mask = GPT_L1_IDX_MASK(gpt_config.p);
+
 	INFO("GPT: Boot Configuration\n");
 	INFO("  PPS/T:     0x%x/%u\n", gpt_config.pps, gpt_config.t);
 	INFO("  PGS/P:     0x%x/%u\n", gpt_config.pgs, gpt_config.p);
@@ -894,7 +1269,7 @@
 	if (l1_gpt_cnt > 0) {
 		flush_dcache_range(l1_mem_base,
 				   GPT_L1_TABLE_SIZE(gpt_config.p) *
-				   l1_gpt_cnt);
+				   (size_t)l1_gpt_cnt);
 	}
 
 	/* Make sure that all the entries are written to the memory */
@@ -946,23 +1321,29 @@
 	gpt_config.pgs = (reg >> GPCCR_PGS_SHIFT) & GPCCR_PGS_MASK;
 	gpt_config.p = gpt_p_lookup[gpt_config.pgs];
 
+	/* Number of L1 entries in 2MB depends on GPCCR_EL3.PGS value */
+	gpt_l1_cnt_2mb = (unsigned int)GPT_L1_ENTRY_COUNT_2MB(gpt_config.p);
+
+	/* Mask for the L1 index field */
+	gpt_l1_index_mask = GPT_L1_IDX_MASK(gpt_config.p);
+
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	/* Bitlocks at the end of L0 table */
+	gpt_bitlock_base = (bitlock_t *)(gpt_config.plat_gpt_l0_base +
+					GPT_L0_TABLE_SIZE(gpt_config.t));
+#endif
 	VERBOSE("GPT: Runtime Configuration\n");
 	VERBOSE("  PPS/T:     0x%x/%u\n", gpt_config.pps, gpt_config.t);
 	VERBOSE("  PGS/P:     0x%x/%u\n", gpt_config.pgs, gpt_config.p);
 	VERBOSE("  L0GPTSZ/S: 0x%x/%u\n", GPT_L0GPTSZ, GPT_S_VAL);
 	VERBOSE("  L0 base:   0x%"PRIxPTR"\n", gpt_config.plat_gpt_l0_base);
-
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	VERBOSE("  Bitlocks:  0x%"PRIxPTR"\n", (uintptr_t)gpt_bitlock_base);
+#endif
 	return 0;
 }
 
 /*
- * The L1 descriptors are protected by a spinlock to ensure that multiple
- * CPUs do not attempt to change the descriptors at once. In the future it
- * would be better to have separate spinlocks for each L1 descriptor.
- */
-static spinlock_t gpt_lock;
-
-/*
  * A helper to write the value (target_pas << gpi_shift) to the index of
  * the gpt_l1_addr.
  */
@@ -973,6 +1354,8 @@
 	*gpt_l1_desc &= ~(GPT_L1_GRAN_DESC_GPI_MASK << gpi_shift);
 	*gpt_l1_desc |= ((uint64_t)target_pas << gpi_shift);
 	gpt_l1_addr[idx] = *gpt_l1_desc;
+
+	dsboshst();
 }
 
 /*
@@ -982,6 +1365,7 @@
 static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 {
 	uint64_t gpt_l0_desc, *gpt_l0_base;
+	__unused unsigned int block_idx;
 
 	gpt_l0_base = (uint64_t *)gpt_config.plat_gpt_l0_base;
 	gpt_l0_desc = gpt_l0_base[GPT_L0_IDX(base)];
@@ -993,19 +1377,311 @@
 
 	/* Get the table index and GPI shift from PA */
 	gpi_info->gpt_l1_addr = GPT_L0_TBLD_ADDR(gpt_l0_desc);
-	gpi_info->idx = GPT_L1_IDX(gpt_config.p, base);
+	gpi_info->idx = (unsigned int)GPT_L1_INDEX(base);
 	gpi_info->gpi_shift = GPT_L1_GPI_IDX(gpt_config.p, base) << 2;
 
-	gpi_info->gpt_l1_desc = (gpi_info->gpt_l1_addr)[gpi_info->idx];
-	gpi_info->gpi = (gpi_info->gpt_l1_desc >> gpi_info->gpi_shift) &
-		GPT_L1_GRAN_DESC_GPI_MASK;
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	/* Block index */
+	block_idx = (unsigned int)(base / (RME_GPT_BITLOCK_BLOCK * SZ_512M));
+
+	/* Bitlock address and mask */
+	gpi_info->lock = &gpt_bitlock_base[block_idx / LOCK_BITS];
+	gpi_info->mask = 1U << (block_idx & (LOCK_BITS - 1U));
+#endif
 	return 0;
 }
 
 /*
+ * Helper to retrieve the gpt_l1_desc and GPI information from gpi_info.
+ * This function is called with bitlock or spinlock acquired.
+ */
+static void read_gpi(gpi_info_t *gpi_info)
+{
+	gpi_info->gpt_l1_desc = (gpi_info->gpt_l1_addr)[gpi_info->idx];
+
+	if ((gpi_info->gpt_l1_desc & GPT_L1_TYPE_CONT_DESC_MASK) ==
+				 GPT_L1_TYPE_CONT_DESC) {
+		/* Read GPI from Contiguous descriptor */
+		gpi_info->gpi = (unsigned int)GPT_L1_CONT_GPI(gpi_info->gpt_l1_desc);
+	} else {
+		/* Read GPI from Granules descriptor */
+		gpi_info->gpi = (unsigned int)((gpi_info->gpt_l1_desc >> gpi_info->gpi_shift) &
+						GPT_L1_GRAN_DESC_GPI_MASK);
+	}
+}
+
+static void flush_page_to_popa(uintptr_t addr)
+{
+	size_t size = GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+
+	if (is_feat_mte2_supported()) {
+		flush_dcache_to_popa_range_mte2(addr, size);
+	} else {
+		flush_dcache_to_popa_range(addr, size);
+	}
+}
+
+/*
+ * Helper function to check if all L1 entries in 2MB block have
+ * the same Granules descriptor value.
+ *
+ * Parameters
+ *   base		Base address of the region to be checked
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor with all entries
+ *			set to the same GPI.
+ *
+ * Return
+ *   true if L1 all entries have the same descriptor value, false otherwise.
+ */
+__unused static bool check_fuse_2mb(uint64_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc)
+{
+	/* Last L1 entry index in 2MB block */
+	unsigned int long idx = GPT_L1_INDEX(ALIGN_2MB(base)) +
+						gpt_l1_cnt_2mb - 1UL;
+
+	/* Number of L1 entries in 2MB block */
+	unsigned int cnt = gpt_l1_cnt_2mb;
+
+	/*
+	 * Start check from the last L1 entry and continue until the first
+	 * non-matching to the passed Granules descriptor value is found.
+	 */
+	while (cnt-- != 0U) {
+		if (gpi_info->gpt_l1_addr[idx--] != l1_desc) {
+			/* Non-matching L1 entry found */
+			return false;
+		}
+	}
+
+	return true;
+}
+
+__unused static void fuse_2mb(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* L1 entry index of the start of 2MB block */
+	unsigned long idx_2 = GPT_L1_INDEX(ALIGN_2MB(base));
+
+	/* 2MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 2MB);
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n", __func__, base, l1_desc);
+
+	fill_desc(&gpi_info->gpt_l1_addr[idx_2], l1_cont_desc, L1_QWORDS_2MB);
+}
+
+/*
+ * Helper function to check if all 1st L1 entries of 2MB blocks
+ * in 32MB have the same 2MB Contiguous descriptor value.
+ *
+ * Parameters
+ *   base		Base address of the region to be checked
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor.
+ *
+ * Return
+ *   true if all L1 entries have the same descriptor value, false otherwise.
+ */
+__unused static bool check_fuse_32mb(uint64_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc)
+{
+	/* The 1st L1 entry index of the last 2MB block in 32MB */
+	unsigned long idx = GPT_L1_INDEX(ALIGN_32MB(base)) +
+					(15UL * gpt_l1_cnt_2mb);
+
+	/* 2MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 2MB);
+
+	/* Number of 2MB blocks in 32MB */
+	unsigned int cnt = 16U;
+
+	/* Set the first L1 entry to 2MB Contiguous descriptor */
+	gpi_info->gpt_l1_addr[GPT_L1_INDEX(ALIGN_2MB(base))] = l1_cont_desc;
+
+	/*
+	 * Start check from the 1st L1 entry of the last 2MB block and
+	 * continue until the first non-matching to 2MB Contiguous descriptor
+	 * value is found.
+	 */
+	while (cnt-- != 0U) {
+		if (gpi_info->gpt_l1_addr[idx] != l1_cont_desc) {
+			/* Non-matching L1 entry found */
+			return false;
+		}
+		idx -= gpt_l1_cnt_2mb;
+	}
+
+	return true;
+}
+
+__unused static void fuse_32mb(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* L1 entry index of the start of 32MB block */
+	unsigned long idx_32 = GPT_L1_INDEX(ALIGN_32MB(base));
+
+	/* 32MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 32MB);
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n", __func__, base, l1_desc);
+
+	fill_desc(&gpi_info->gpt_l1_addr[idx_32], l1_cont_desc, L1_QWORDS_32MB);
+}
+
+/*
+ * Helper function to check if all 1st L1 entries of 32MB blocks
+ * in 512MB have the same 32MB Contiguous descriptor value.
+ *
+ * Parameters
+ *   base		Base address of the region to be checked
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor.
+ *
+ * Return
+ *   true if all L1 entries have the same descriptor value, false otherwise.
+ */
+__unused static bool check_fuse_512mb(uint64_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc)
+{
+	/* The 1st L1 entry index of the last 32MB block in 512MB */
+	unsigned long idx = GPT_L1_INDEX(ALIGN_512MB(base)) +
+					(15UL * 16UL * gpt_l1_cnt_2mb);
+
+	/* 32MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 32MB);
+
+	/* Number of 32MB blocks in 512MB */
+	unsigned int cnt = 16U;
+
+	/* Set the first L1 entry to 2MB Contiguous descriptor */
+	gpi_info->gpt_l1_addr[GPT_L1_INDEX(ALIGN_32MB(base))] = l1_cont_desc;
+
+	/*
+	 * Start check from the 1st L1 entry of the last 32MB block and
+	 * continue until the first non-matching to 32MB Contiguous descriptor
+	 * value is found.
+	 */
+	while (cnt-- != 0U) {
+		if (gpi_info->gpt_l1_addr[idx] != l1_cont_desc) {
+			/* Non-matching L1 entry found */
+			return false;
+		}
+		idx -= 16UL * gpt_l1_cnt_2mb;
+	}
+
+	return true;
+}
+
+__unused static void fuse_512mb(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* L1 entry index of the start of 512MB block */
+	unsigned long idx_512 = GPT_L1_INDEX(ALIGN_512MB(base));
+
+	/* 512MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 512MB);
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n", __func__, base, l1_desc);
+
+	fill_desc(&gpi_info->gpt_l1_addr[idx_512], l1_cont_desc, L1_QWORDS_512MB);
+}
+
+/*
+ * Helper function to convert GPI entries in a single L1 table
+ * from Granules to Contiguous descriptor.
+ *
+ * Parameters
+ *   base		Base address of the region to be written
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor with all entries
+ *			set to the same GPI.
+ */
+__unused static void fuse_block(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* Start with check for 2MB block */
+	if (!check_fuse_2mb(base, gpi_info, l1_desc)) {
+		/* Check for 2MB fusing failed */
+		return;
+	}
+
+#if (RME_GPT_MAX_BLOCK == 2)
+	fuse_2mb(base, gpi_info, l1_desc);
+#else
+	/* Check for 32MB block */
+	if (!check_fuse_32mb(base, gpi_info, l1_desc)) {
+		/* Check for 32MB fusing failed, fuse to 2MB */
+		fuse_2mb(base, gpi_info, l1_desc);
+		return;
+	}
+
+#if (RME_GPT_MAX_BLOCK == 32)
+	fuse_32mb(base, gpi_info, l1_desc);
+#else
+	/* Check for 512MB block */
+	if (!check_fuse_512mb(base, gpi_info, l1_desc)) {
+		/* Check for 512MB fusing failed, fuse to 32MB */
+		fuse_32mb(base, gpi_info, l1_desc);
+		return;
+	}
+
+	/* Fuse to 512MB */
+	fuse_512mb(base, gpi_info, l1_desc);
+
+#endif	/* RME_GPT_MAX_BLOCK == 32 */
+#endif	/* RME_GPT_MAX_BLOCK == 2 */
+}
+
+/*
+ * Helper function to convert GPI entries in a single L1 table
+ * from Contiguous to Granules descriptor. This function updates
+ * descriptor to Granules in passed 'gpt_config_t' structure as
+ * the result of shuttering.
+ *
+ * Parameters
+ *   base		Base address of the region to be written
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor set this range to.
+ */
+__unused static void shatter_block(uint64_t base, gpi_info_t *gpi_info,
+				   uint64_t l1_desc)
+{
+	/* Look-up table for 2MB, 32MB and 512MB locks shattering */
+	static const gpt_shatter_func gpt_shatter_lookup[] = {
+		shatter_2mb,
+		shatter_32mb,
+		shatter_512mb
+	};
+
+	/* Look-up table for invalidation TLBs for 2MB, 32MB and 512MB blocks */
+	static const gpt_tlbi_lookup_t tlbi_lookup[] = {
+		{ tlbirpalos_2m, ~(SZ_2M - 1UL) },
+		{ tlbirpalos_32m, ~(SZ_32M - 1UL) },
+		{ tlbirpalos_512m, ~(SZ_512M - 1UL) }
+	};
+
+	/* Get shattering level from Contig field of Contiguous descriptor */
+	unsigned long level = GPT_L1_CONT_CONTIG(gpi_info->gpt_l1_desc) - 1UL;
+
+	/* Shatter contiguous block */
+	gpt_shatter_lookup[level](base, gpi_info, l1_desc);
+
+	tlbi_lookup[level].function(base & tlbi_lookup[level].mask);
+	dsbosh();
+
+	/*
+	 * Update 'gpt_config_t' structure's descriptor to Granules to reflect
+	 * the shattered GPI back to caller.
+	 */
+	gpi_info->gpt_l1_desc = l1_desc;
+}
+
+/*
  * This function is the granule transition delegate service. When a granule
  * transition request occurs it is routed to this function to have the request,
- * if valid, fulfilled following A1.1.1 Delegate of RME supplement
+ * if valid, fulfilled following A1.1.1 Delegate of RME supplement.
  *
  * TODO: implement support for transitioning multiple granules at once.
  *
@@ -1022,9 +1698,9 @@
 int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 {
 	gpi_info_t gpi_info;
-	uint64_t nse;
-	int res;
+	uint64_t nse, __unused l1_desc;
 	unsigned int target_pas;
+	int res;
 
 	/* Ensure that the tables have been set up before taking requests */
 	assert(gpt_config.plat_gpt_l0_base != 0UL);
@@ -1032,10 +1708,6 @@
 	/* Ensure that caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0UL);
 
-	/* Delegate request can only come from REALM or SECURE */
-	assert(src_sec_state == SMC_FROM_REALM ||
-	       src_sec_state == SMC_FROM_SECURE);
-
 	/* See if this is a single or a range of granule transition */
 	if (size != GPT_PGS_ACTUAL_SIZE(gpt_config.p)) {
 		return -EINVAL;
@@ -1060,70 +1732,80 @@
 		return -EINVAL;
 	}
 
-	target_pas = GPT_GPI_REALM;
-	if (src_sec_state == SMC_FROM_SECURE) {
+	/* Delegate request can only come from REALM or SECURE */
+	if ((src_sec_state != SMC_FROM_REALM) &&
+	    (src_sec_state != SMC_FROM_SECURE)) {
+		VERBOSE("GPT: Invalid caller security state 0x%x\n",
+							src_sec_state);
+		return -EINVAL;
+	}
+
+	if (src_sec_state == SMC_FROM_REALM) {
+		target_pas = GPT_GPI_REALM;
+		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
+		l1_desc = GPT_L1_REALM_DESC;
+	} else {
 		target_pas = GPT_GPI_SECURE;
+		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
+		l1_desc = GPT_L1_SECURE_DESC;
+	}
+
+	res = get_gpi_params(base, &gpi_info);
+	if (res != 0) {
+		return res;
 	}
 
 	/*
-	 * Access to L1 tables is controlled by a global lock to ensure
-	 * that no more than one CPU is allowed to make changes at any
-	 * given time.
+	 * Access to GPT is controlled by a lock to ensure that no more
+	 * than one CPU is allowed to make changes at any given time.
 	 */
-	spin_lock(&gpt_lock);
-	res = get_gpi_params(base, &gpi_info);
-	if (res != 0) {
-		spin_unlock(&gpt_lock);
-		return res;
-	}
+	GPT_LOCK;
+	read_gpi(&gpi_info);
 
 	/* Check that the current address is in NS state */
 	if (gpi_info.gpi != GPT_GPI_NS) {
 		VERBOSE("GPT: Only Granule in NS state can be delegated.\n");
 		VERBOSE("      Caller: %u, Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		spin_unlock(&gpt_lock);
+		GPT_UNLOCK;
 		return -EPERM;
 	}
 
-	if (src_sec_state == SMC_FROM_SECURE) {
-		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
-	} else {
-		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
+#if (RME_GPT_MAX_BLOCK != 0)
+	/* Check for Contiguous descriptor */
+	if ((gpi_info.gpt_l1_desc & GPT_L1_TYPE_CONT_DESC_MASK) ==
+					GPT_L1_TYPE_CONT_DESC) {
+		shatter_block(base, &gpi_info, GPT_L1_NS_DESC);
 	}
-
+#endif
 	/*
 	 * In order to maintain mutual distrust between Realm and Secure
 	 * states, remove any data speculatively fetched into the target
-	 * physical address space. Issue DC CIPAPA over address range.
+	 * physical address space.
+	 * Issue DC CIPAPA or DC_CIGDPAPA on implementations with FEAT_MTE2.
 	 */
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	flush_page_to_popa(base | nse);
 
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, target_pas);
-	dsboshst();
 
-	gpt_tlbi_by_pa_ll(base, GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	dsbosh();
+	/* Ensure that all agents observe the new configuration */
+	tlbi_page_dsbosh(base);
 
 	nse = (uint64_t)GPT_NSE_NS << GPT_NSE_SHIFT;
 
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	/* Ensure that the scrubbed data have made it past the PoPA */
+	flush_page_to_popa(base | nse);
 
-	/* Unlock access to the L1 tables */
-	spin_unlock(&gpt_lock);
+#if (RME_GPT_MAX_BLOCK != 0)
+	if (gpi_info.gpt_l1_desc == l1_desc) {
+		/* Try to fuse */
+		fuse_block(base, &gpi_info, l1_desc);
+	}
+#endif
+
+	/* Unlock the lock to GPT */
+	GPT_UNLOCK;
 
 	/*
 	 * The isb() will be done as part of context
@@ -1155,7 +1837,7 @@
 int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 {
 	gpi_info_t gpi_info;
-	uint64_t nse;
+	uint64_t nse, __unused l1_desc;
 	int res;
 
 	/* Ensure that the tables have been set up before taking requests */
@@ -1164,10 +1846,6 @@
 	/* Ensure that MMU and caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0UL);
 
-	/* Delegate request can only come from REALM or SECURE */
-	assert(src_sec_state == SMC_FROM_REALM ||
-	       src_sec_state == SMC_FROM_SECURE);
-
 	/* See if this is a single or a range of granule transition */
 	if (size != GPT_PGS_ACTUAL_SIZE(gpt_config.p)) {
 		return -EINVAL;
@@ -1192,84 +1870,80 @@
 		return -EINVAL;
 	}
 
-	/*
-	 * Access to L1 tables is controlled by a global lock to ensure
-	 * that no more than one CPU is allowed to make changes at any
-	 * given time.
-	 */
-	spin_lock(&gpt_lock);
-
 	res = get_gpi_params(base, &gpi_info);
 	if (res != 0) {
-		spin_unlock(&gpt_lock);
 		return res;
 	}
 
+	/*
+	 * Access to GPT is controlled by a lock to ensure that no more
+	 * than one CPU is allowed to make changes at any given time.
+	 */
+	GPT_LOCK;
+	read_gpi(&gpi_info);
+
 	/* Check that the current address is in the delegated state */
-	if ((src_sec_state == SMC_FROM_REALM  &&
-	     gpi_info.gpi != GPT_GPI_REALM) ||
-	    (src_sec_state == SMC_FROM_SECURE &&
-	     gpi_info.gpi != GPT_GPI_SECURE)) {
-		VERBOSE("GPT: Only Granule in REALM or SECURE state can be undelegated.\n");
+	if ((src_sec_state == SMC_FROM_REALM) &&
+		(gpi_info.gpi == GPT_GPI_REALM)) {
+		l1_desc = GPT_L1_REALM_DESC;
+		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
+	} else if ((src_sec_state == SMC_FROM_SECURE) &&
+		(gpi_info.gpi == GPT_GPI_SECURE)) {
+		l1_desc = GPT_L1_SECURE_DESC;
+		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
+	} else {
+		VERBOSE("GPT: Only Granule in REALM or SECURE state can be undelegated\n");
 		VERBOSE("      Caller: %u Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		spin_unlock(&gpt_lock);
+		GPT_UNLOCK;
 		return -EPERM;
 	}
 
-
-	/* In order to maintain mutual distrust between Realm and Secure
+#if (RME_GPT_MAX_BLOCK != 0)
+	/* Check for Contiguous descriptor */
+	if ((gpi_info.gpt_l1_desc & GPT_L1_TYPE_CONT_DESC_MASK) ==
+					GPT_L1_TYPE_CONT_DESC) {
+		shatter_block(base, &gpi_info, l1_desc);
+	}
+#endif
+	/*
+	 * In order to maintain mutual distrust between Realm and Secure
 	 * states, remove access now, in order to guarantee that writes
 	 * to the currently-accessible physical address space will not
 	 * later become observable.
 	 */
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, GPT_GPI_NO_ACCESS);
-	dsboshst();
 
-	gpt_tlbi_by_pa_ll(base, GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	dsbosh();
+	/* Ensure that all agents observe the new NO_ACCESS configuration */
+	tlbi_page_dsbosh(base);
 
-	if (src_sec_state == SMC_FROM_SECURE) {
-		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
-	} else {
-		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
-	}
-
-	/* Ensure that the scrubbed data has made it past the PoPA */
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	/* Ensure that the scrubbed data have made it past the PoPA */
+	flush_page_to_popa(base | nse);
 
 	/*
-	 * Remove any data loaded speculatively
-	 * in NS space from before the scrubbing
+	 * Remove any data loaded speculatively in NS space from before
+	 * the scrubbing.
 	 */
 	nse = (uint64_t)GPT_NSE_NS << GPT_NSE_SHIFT;
 
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	flush_page_to_popa(base | nse);
 
-	/* Clear existing GPI encoding and transition granule. */
+	/* Clear existing GPI encoding and transition granule */
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, GPT_GPI_NS);
-	dsboshst();
 
 	/* Ensure that all agents observe the new NS configuration */
-	gpt_tlbi_by_pa_ll(base, GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	dsbosh();
+	tlbi_page_dsbosh(base);
 
-	/* Unlock access to the L1 tables. */
-	spin_unlock(&gpt_lock);
+#if (RME_GPT_MAX_BLOCK != 0)
+	if (gpi_info.gpt_l1_desc == GPT_L1_NS_DESC) {
+		/* Try to fuse */
+		fuse_block(base, &gpi_info, GPT_L1_NS_DESC);
+	}
+#endif
+	/* Unlock the lock to GPT */
+	GPT_UNLOCK;
 
 	/*
 	 * The isb() will be done as part of context
diff --git a/lib/gpt_rme/gpt_rme.mk b/lib/gpt_rme/gpt_rme.mk
index 60176f4..7d6b61f 100644
--- a/lib/gpt_rme/gpt_rme.mk
+++ b/lib/gpt_rme/gpt_rme.mk
@@ -1,8 +1,22 @@
 #
-# Copyright (c) 2021, Arm Limited. All rights reserved.
+# Copyright (c) 2021-2024, Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# Process RME_GPT_BITLOCK_BLOCK value
+ifeq ($(filter 0 1 2 4 8 16 32 64 128 256 512, ${RME_GPT_BITLOCK_BLOCK}),)
+    $(error "Invalid value for RME_GPT_BITLOCK_BLOCK: ${RME_GPT_BITLOCK_BLOCK}")
+endif
+
+ifeq (${RME_GPT_BITLOCK_BLOCK},0)
+    $(warning "GPT library uses global spinlock")
+endif
+
+# Process RME_GPT_MAX_BLOCK value
+ifeq ($(filter 0 2 32 512, ${RME_GPT_MAX_BLOCK}),)
+    $(error "Invalid value for RME_GPT_MAX_BLOCK: ${RME_GPT_MAX_BLOCK}")
+endif
+
 GPT_LIB_SRCS	:=	$(addprefix lib/gpt_rme/,        \
 			gpt_rme.c)
diff --git a/lib/gpt_rme/gpt_rme_private.h b/lib/gpt_rme/gpt_rme_private.h
index b2a5dae..31dad20 100644
--- a/lib/gpt_rme/gpt_rme_private.h
+++ b/lib/gpt_rme/gpt_rme_private.h
@@ -9,6 +9,7 @@
 
 #include <arch.h>
 #include <lib/gpt_rme/gpt_rme.h>
+#include <lib/spinlock.h>
 #include <lib/utils_def.h>
 
 /******************************************************************************/
@@ -19,7 +20,7 @@
 #define GPT_L0_TYPE_MASK		UL(0xF)
 #define GPT_L0_TYPE_SHIFT		U(0)
 
-/* For now, we don't support contiguous descriptors, only table and block */
+/* GPT level 0 table and block descriptors */
 #define GPT_L0_TYPE_TBL_DESC		UL(3)
 #define GPT_L0_TYPE_BLK_DESC		UL(1)
 
@@ -29,29 +30,63 @@
 #define GPT_L0_BLK_DESC_GPI_MASK	UL(0xF)
 #define GPT_L0_BLK_DESC_GPI_SHIFT	U(4)
 
-/* GPT level 1 descriptor bit definitions */
+/* GPT level 1 Contiguous descriptor */
+#define GPT_L1_TYPE_CONT_DESC_MASK	UL(0xF)
+#define GPT_L1_TYPE_CONT_DESC		UL(1)
+
+/* GPT level 1 Contiguous descriptor definitions */
+#define GPT_L1_CONTIG_2MB		UL(1)
+#define GPT_L1_CONTIG_32MB		UL(2)
+#define GPT_L1_CONTIG_512MB		UL(3)
+
+#define GPT_L1_CONT_DESC_GPI_SHIFT	U(4)
+#define GPT_L1_CONT_DESC_GPI_MASK	UL(0xF)
+#define GPT_L1_CONT_DESC_CONTIG_SHIFT	U(8)
+#define GPT_L1_CONT_DESC_CONTIG_MASK	UL(3)
+
+/* GPT level 1 Granules descriptor bit definitions */
 #define GPT_L1_GRAN_DESC_GPI_MASK	UL(0xF)
 
+/* L1 Contiguous descriptors templates */
+#define GPT_L1_CONT_DESC_2MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_2MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_32MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_32MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_512MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_512MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and template */
+#define GPT_L1_GPI_CONT_DESC(_gpi, _desc)	\
+			((_desc) | ((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from Granules descriptor and size */
+#define GPT_L1_CONT_DESC(_desc, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((_desc) & GPT_L1_GRAN_DESC_GPI_MASK) << \
+				GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and size */
+#define GPT_L1_CONT_DESC_SIZE(_gpi, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+#define GPT_L1_GPI_BYTE(_gpi)		(uint64_t)((_gpi) | ((_gpi) << 4))
+#define GPT_L1_GPI_HALF(_gpi)		(GPT_L1_GPI_BYTE(_gpi) | (GPT_L1_GPI_BYTE(_gpi) << 8))
+#define GPT_L1_GPI_WORD(_gpi)		(GPT_L1_GPI_HALF(_gpi) | (GPT_L1_GPI_HALF(_gpi) << 16))
+
 /*
- * This macro fills out every GPI entry in a granules descriptor to the same
- * value.
+ * This macro generates a Granules descriptor
+ * with the same value for every GPI entry.
  */
-#define GPT_BUILD_L1_DESC(_gpi)		(((uint64_t)(_gpi) << 4*0) | \
-					 ((uint64_t)(_gpi) << 4*1) | \
-					 ((uint64_t)(_gpi) << 4*2) | \
-					 ((uint64_t)(_gpi) << 4*3) | \
-					 ((uint64_t)(_gpi) << 4*4) | \
-					 ((uint64_t)(_gpi) << 4*5) | \
-					 ((uint64_t)(_gpi) << 4*6) | \
-					 ((uint64_t)(_gpi) << 4*7) | \
-					 ((uint64_t)(_gpi) << 4*8) | \
-					 ((uint64_t)(_gpi) << 4*9) | \
-					 ((uint64_t)(_gpi) << 4*10) | \
-					 ((uint64_t)(_gpi) << 4*11) | \
-					 ((uint64_t)(_gpi) << 4*12) | \
-					 ((uint64_t)(_gpi) << 4*13) | \
-					 ((uint64_t)(_gpi) << 4*14) | \
-					 ((uint64_t)(_gpi) << 4*15))
+#define GPT_BUILD_L1_DESC(_gpi)		(GPT_L1_GPI_WORD(_gpi) | (GPT_L1_GPI_WORD(_gpi) << 32))
+
+#define GPT_L1_SECURE_DESC	GPT_BUILD_L1_DESC(GPT_GPI_SECURE)
+#define GPT_L1_NS_DESC		GPT_BUILD_L1_DESC(GPT_GPI_NS)
+#define GPT_L1_REALM_DESC	GPT_BUILD_L1_DESC(GPT_GPI_REALM)
+#define GPT_L1_ANY_DESC		GPT_BUILD_L1_DESC(GPT_GPI_ANY)
 
 /******************************************************************************/
 /* GPT platform configuration                                                 */
@@ -106,17 +141,46 @@
 	PGS_64KB_P =	16U
 } gpt_p_val_e;
 
+#define LOCK_SIZE	sizeof(((bitlock_t *)NULL)->lock)
+#define LOCK_TYPE	typeof(((bitlock_t *)NULL)->lock)
+#define LOCK_BITS	(LOCK_SIZE * 8U)
+
 /*
- * Internal structure to retrieve the values from get_gpi_info();
+ * Internal structure to retrieve the values from get_gpi_params();
  */
-typedef struct gpi_info {
+typedef struct {
 	uint64_t gpt_l1_desc;
 	uint64_t *gpt_l1_addr;
 	unsigned int idx;
 	unsigned int gpi_shift;
 	unsigned int gpi;
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	bitlock_t *lock;
+	LOCK_TYPE mask;
+#endif
 } gpi_info_t;
 
+/*
+ * Look up structure for contiguous blocks and descriptors
+ */
+typedef struct {
+	size_t size;
+	unsigned int desc;
+} gpt_fill_lookup_t;
+
+typedef void (*gpt_shatter_func)(uintptr_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc);
+typedef void (*gpt_tlbi_func)(uintptr_t base);
+
+/*
+ * Look-up structure for
+ * invalidating TLBs of GPT entries by Physical address, last level.
+ */
+typedef struct {
+	gpt_tlbi_func function;
+	size_t mask;
+} gpt_tlbi_lookup_t;
+
 /* Max valid value for PGS */
 #define GPT_PGS_MAX			(2U)
 
@@ -136,8 +200,8 @@
  * special case we'll get a negative width value which does not make sense and
  * would cause problems.
  */
-#define GPT_L0_IDX_WIDTH(_t)		(((_t) > GPT_S_VAL) ? \
-					((_t) - GPT_S_VAL) : (0U))
+#define GPT_L0_IDX_WIDTH(_t)		(((unsigned int)(_t) > GPT_S_VAL) ? \
+					((unsigned int)(_t) - GPT_S_VAL) : (0U))
 
 /* Bit shift for the L0 index field in a PA */
 #define GPT_L0_IDX_SHIFT		(GPT_S_VAL)
@@ -173,10 +237,11 @@
  * the L0 index field above since all valid combinations of PGS (p) and L0GPTSZ
  * (s) will result in a positive width value.
  */
-#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - ((_p) + 3U))
+#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - \
+					((unsigned int)(_p) + 3U))
 
 /* Bit shift for the L1 index field */
-#define GPT_L1_IDX_SHIFT(_p)		((_p) + 4U)
+#define GPT_L1_IDX_SHIFT(_p)		((unsigned int)(_p) + 4U)
 
 /*
  * Mask for the L1 index field, must be shifted.
@@ -196,7 +261,10 @@
 #define GPT_L1_GPI_IDX_MASK		(0xF)
 
 /* Total number of entries in each L1 table */
-#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1U)
+#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1UL)
+
+/* Number of L1 entries in 2MB block */
+#define GPT_L1_ENTRY_COUNT_2MB(_p)	(SZ_2M >> GPT_L1_IDX_SHIFT(_p))
 
 /* Total size in bytes of each L1 table */
 #define GPT_L1_TABLE_SIZE(_p)		((GPT_L1_ENTRY_COUNT(_p)) << 3U)
@@ -206,10 +274,13 @@
 /******************************************************************************/
 
 /* Protected space actual size in bytes */
-#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (_t))
+#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (unsigned int)(_t))
 
 /* Granule actual size in bytes */
-#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (_p))
+#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (unsigned int)(_p))
+
+/* Number of granules in 2MB block */
+#define GPT_PGS_COUNT_2MB(_p)	(1UL << (21U - (unsigned int)(_p)))
 
 /* L0 GPT region size in bytes */
 #define GPT_L0GPTSZ_ACTUAL_SIZE	(1UL << GPT_S_VAL)
@@ -221,7 +292,8 @@
  * This definition is used to determine if a physical address lies on an L0
  * region boundary.
  */
-#define GPT_IS_L0_ALIGNED(_pa)	(((_pa) & (GPT_L0_REGION_SIZE - U(1))) == U(0))
+#define GPT_IS_L0_ALIGNED(_pa)	\
+	(((_pa) & (GPT_L0_REGION_SIZE - UL(1))) == UL(0))
 
 /* Get the type field from an L0 descriptor */
 #define GPT_L0_TYPE(_desc)	(((_desc) >> GPT_L0_TYPE_SHIFT) & \
@@ -246,16 +318,43 @@
 				(GPT_L0_TBL_DESC_L1ADDR_MASK << \
 				GPT_L0_TBL_DESC_L1ADDR_SHIFT))))
 
+/* Get the GPI from L1 Contiguous descriptor */
+#define GPT_L1_CONT_GPI(_desc)		\
+	(((_desc) >> GPT_L1_CONT_DESC_GPI_SHIFT) & GPT_L1_CONT_DESC_GPI_MASK)
+
+/* Get the GPI from L1 Granules descriptor */
+#define GPT_L1_GRAN_GPI(_desc)	((_desc) & GPT_L1_GRAN_DESC_GPI_MASK)
+
+/* Get the Contig from L1 Contiguous descriptor */
+#define GPT_L1_CONT_CONTIG(_desc)	\
+	(((_desc) >> GPT_L1_CONT_DESC_CONTIG_SHIFT) & \
+					GPT_L1_CONT_DESC_CONTIG_MASK)
+
 /* Get the index into the L1 table from a physical address */
-#define GPT_L1_IDX(_p, _pa)	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & \
-				GPT_L1_IDX_MASK(_p))
+#define GPT_L1_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & GPT_L1_IDX_MASK(_p))
 
 /* Get the index of the GPI within an L1 table entry from a physical address */
-#define GPT_L1_GPI_IDX(_p, _pa)	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & \
-				GPT_L1_GPI_IDX_MASK)
+#define GPT_L1_GPI_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & GPT_L1_GPI_IDX_MASK)
 
 /* Determine if an address is granule-aligned */
-#define GPT_IS_L1_ALIGNED(_p, _pa) (((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - U(1))) \
-				   == U(0))
+#define GPT_IS_L1_ALIGNED(_p, _pa)	\
+	(((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - UL(1))) == UL(0))
+
+/* Get aligned addresses */
+#define ALIGN_2MB(_addr)	((_addr) & ~(SZ_2M - 1UL))
+#define ALIGN_32MB(_addr)	((_addr) & ~(SZ_32M - 1UL))
+#define ALIGN_512MB(_addr)	((_addr) & ~(SZ_512M - 1UL))
+
+/* Determine if region is contiguous */
+#define GPT_REGION_IS_CONT(_len, _addr, _size)	\
+	(((_len) >= (_size)) && (((_addr) & ((_size) - UL(1))) == UL(0)))
+
+/* Get 32MB block number in 512MB block: 0-15 */
+#define GET_32MB_NUM(_addr)	((_addr >> 25) & 0xF)
+
+/* Get 2MB block number in 32MB block: 0-15 */
+#define GET_2MB_NUM(_addr)	((_addr >> 21) & 0xF)
 
 #endif /* GPT_RME_PRIVATE_H */
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index 2685195..a5c78ae 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -139,6 +139,12 @@
 # For Chain of Trust
 GENERATE_COT			:= 0
 
+# Default number of 512 blocks per bitlock
+RME_GPT_BITLOCK_BLOCK		:= 1
+
+# Default maximum size of GPT contiguous block
+RME_GPT_MAX_BLOCK		:= 2
+
 # Hint platform interrupt control layer that Group 0 interrupts are for EL3. By
 # default, they are for Secure EL1.
 GICV2_G0_FOR_EL3		:= 0
diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk
index ec0b19e..520a73a 100644
--- a/plat/arm/board/arm_fpga/platform.mk
+++ b/plat/arm/board/arm_fpga/platform.mk
@@ -77,8 +77,8 @@
 				lib/cpus/aarch64/neoverse_n1.S			\
 				lib/cpus/aarch64/neoverse_n2.S			\
 				lib/cpus/aarch64/neoverse_v1.S			\
-				lib/cpus/aarch64/cortex_chaberton.S		\
-				lib/cpus/aarch64/cortex_blackhawk.S
+				lib/cpus/aarch64/cortex_a725.S		\
+				lib/cpus/aarch64/cortex_x925.S
 
 # AArch64/AArch32 cores
 	FPGA_CPU_LIBS	+=	lib/cpus/aarch64/cortex_a55.S	\
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index c0bba30..033eb7c 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -24,7 +24,7 @@
 
 FVP_DT_PREFIX			:= fvp-base-gicv3-psci
 
-# Size (in kilobytes) of the Trusted SRAM region to  utilize when building for
+# Size (in kilobytes) of the Trusted SRAM region to  utilize when building for
 # the FVP platform. This option defaults to 256.
 FVP_TRUSTED_SRAM_SIZE		:= 256
 
diff --git a/plat/arm/board/tc/include/platform_def.h b/plat/arm/board/tc/include/platform_def.h
index dcae95f..0cf7500 100644
--- a/plat/arm/board/tc/include/platform_def.h
+++ b/plat/arm/board/tc/include/platform_def.h
@@ -298,9 +298,14 @@
 #endif /* TARGET_PLATFORM == 3 */
 #define PLAT_MHUV2_BASE			PLAT_CSS_MHU_BASE
 
-/* TC2: AP<->RSE MHUs */
+/* AP<->RSS MHUs */
+#if TARGET_PLATFORM <= 2
 #define PLAT_RSE_AP_SND_MHU_BASE	UL(0x2A840000)
 #define PLAT_RSE_AP_RCV_MHU_BASE	UL(0x2A850000)
+#elif TARGET_PLATFORM == 3
+#define PLAT_RSE_AP_SND_MHU_BASE	UL(0x49000000)
+#define PLAT_RSE_AP_RCV_MHU_BASE	UL(0x49100000)
+#endif
 
 #define CSS_SYSTEM_PWR_DMN_LVL		ARM_PWR_LVL2
 #define PLAT_MAX_PWR_LVL		ARM_PWR_LVL1
diff --git a/plat/arm/board/tc/platform.mk b/plat/arm/board/tc/platform.mk
index 37ca4c6..cd5abd0 100644
--- a/plat/arm/board/tc/platform.mk
+++ b/plat/arm/board/tc/platform.mk
@@ -44,6 +44,18 @@
 	CTX_INCLUDE_PAUTH_REGS	:=	1
 endif
 
+# TC RESOLUTION - LIST OF VALID OPTIONS (this impacts only FVP)
+TC_RESOLUTION_OPTIONS		:= 	640x480p60 \
+					1920x1080p60
+# Set default to the 640x480p60 resolution mode
+TC_RESOLUTION ?= $(firstword $(TC_RESOLUTION_OPTIONS))
+
+# Check resolution option for FVP
+ifneq ($(filter ${TARGET_FLAVOUR}, fvp),)
+ifeq ($(filter ${TC_RESOLUTION}, ${TC_RESOLUTION_OPTIONS}),)
+        $(error TC_RESOLUTION is ${TC_RESOLUTION}, it must be: ${TC_RESOLUTION_OPTIONS})
+endif
+endif
 
 ifneq ($(shell expr $(TARGET_PLATFORM) \<= 1), 0)
         $(warning Platform ${PLAT}$(TARGET_PLATFORM) is deprecated. \
@@ -61,6 +73,7 @@
 $(eval $(call add_defines, \
 	TARGET_PLATFORM \
 	TARGET_FLAVOUR_$(call uppercase,${TARGET_FLAVOUR}) \
+	TC_RESOLUTION_$(call uppercase,${TC_RESOLUTION}) \
 	TC_DPU_USE_SCMI_CLK \
 	TC_SCMI_PD_CTRL_EN \
 	TC_IOMMU_EN \
@@ -71,6 +84,13 @@
 # Save DSU PMU registers on cluster off and restore them on cluster on
 PRESERVE_DSU_PMU_REGS		:= 1
 
+# Specify MHU type based on platform
+ifneq ($(filter ${TARGET_PLATFORM}, 2),)
+	PLAT_MHU_VERSION	:= 2
+else
+	PLAT_MHU_VERSION	:= 3
+endif
+
 # Include GICv3 driver files
 include drivers/arm/gic/v3/gicv3.mk
 
@@ -100,8 +120,8 @@
 # CPU libraries for TARGET_PLATFORM=3
 ifeq (${TARGET_PLATFORM}, 3)
 TC_CPU_SOURCES	+=	lib/cpus/aarch64/cortex_a520.S \
-			lib/cpus/aarch64/cortex_chaberton.S \
-			lib/cpus/aarch64/cortex_blackhawk.S
+			lib/cpus/aarch64/cortex_a725.S \
+			lib/cpus/aarch64/cortex_x925.S
 endif
 
 INTERCONNECT_SOURCES	:=	${TC_BASE}/tc_interconnect.c
diff --git a/plat/arm/board/tc/tc_bl31_setup.c b/plat/arm/board/tc/tc_bl31_setup.c
index 5742d07..d6b0311 100644
--- a/plat/arm/board/tc/tc_bl31_setup.c
+++ b/plat/arm/board/tc/tc_bl31_setup.c
@@ -50,25 +50,33 @@
 }
 #endif /* PLATFORM_TEST_TFM_TESTSUITE */
 
-static scmi_channel_plat_info_t tc_scmi_plat_info[] = {
-	{
-		.scmi_mbx_mem = CSS_SCMI_PAYLOAD_BASE,
-		.db_reg_addr = PLAT_CSS_MHU_BASE + SENDER_REG_SET(0),
-		.db_preserve_mask = 0xfffffffe,
-		.db_modify_mask = 0x1,
-		.ring_doorbell = &mhuv2_ring_doorbell,
-	}
+#if TARGET_PLATFORM <= 2
+static scmi_channel_plat_info_t tc_scmi_plat_info = {
+	.scmi_mbx_mem = CSS_SCMI_PAYLOAD_BASE,
+	.db_reg_addr = PLAT_CSS_MHU_BASE + SENDER_REG_SET(0),
+	.db_preserve_mask = 0xfffffffe,
+	.db_modify_mask = 0x1,
+	.ring_doorbell = &mhuv2_ring_doorbell,
 };
+#elif TARGET_PLATFORM == 3
+static scmi_channel_plat_info_t tc_scmi_plat_info = {
+	.scmi_mbx_mem = CSS_SCMI_PAYLOAD_BASE,
+	.db_reg_addr = PLAT_CSS_MHU_BASE + MHU_V3_SENDER_REG_SET(0),
+	.db_preserve_mask = 0xfffffffe,
+	.db_modify_mask = 0x1,
+	.ring_doorbell = &mhu_ring_doorbell,
+};
+#endif
 
 void bl31_platform_setup(void)
 {
 	tc_bl31_common_platform_setup();
 }
 
-scmi_channel_plat_info_t *plat_css_get_scmi_info(unsigned int channel_id)
+scmi_channel_plat_info_t *plat_css_get_scmi_info(unsigned int channel_id __unused)
 {
 
-	return &tc_scmi_plat_info[channel_id];
+	return &tc_scmi_plat_info;
 
 }
 
diff --git a/plat/imx/common/imx_common.c b/plat/imx/common/imx_common.c
new file mode 100644
index 0000000..01f354a
--- /dev/null
+++ b/plat/imx/common/imx_common.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2024, Pengutronix, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <errno.h>
+#include <stdint.h>
+
+#include <common/bl_common.h>
+#include <common/desc_image_load.h>
+
+#include <plat_common.h>
+
+/*
+ * This function checks if @arg0 can safely be accessed as a pointer
+ * and if it does, it fills in @bl32_info and @bl33_info with data
+ * found in @arg0.
+ *
+ * Returns 0 when @arg0 can be used as entry point info and a negative
+ * error code otherwise.
+ */
+int imx_bl31_params_parse(uintptr_t arg0, uintptr_t ocram_base,
+			  uintptr_t ocram_size,
+			  entry_point_info_t *bl32_info,
+			  entry_point_info_t *bl33_info)
+{
+	bl_params_t *v2 = (void *)(uintptr_t)arg0;
+
+	if (arg0 & 0x3) {
+		return -EINVAL;
+	}
+
+	if (arg0 < ocram_base || arg0 >= ocram_base + ocram_size) {
+		return -EINVAL;
+	}
+
+	if (v2->h.version != PARAM_VERSION_2) {
+		return -EINVAL;
+	}
+
+	if (v2->h.type != PARAM_BL_PARAMS) {
+		return -EINVAL;
+	}
+
+	bl31_params_parse_helper(arg0, bl32_info, bl33_info);
+
+	return 0;
+}
diff --git a/plat/imx/common/imx_sip_svc.c b/plat/imx/common/imx_sip_svc.c
index c625704..75b709a 100644
--- a/plat/imx/common/imx_sip_svc.c
+++ b/plat/imx/common/imx_sip_svc.c
@@ -80,7 +80,8 @@
 		SMC_RET1(handle, imx_src_handler(smc_fid, x1, x2, x3, handle));
 		break;
 #endif
-#if defined(PLAT_imx8mm) || defined(PLAT_imx8mn) || defined(PLAT_imx8mp)
+#if defined(PLAT_imx8mm) || defined(PLAT_imx8mn) || defined(PLAT_imx8mp) || \
+	defined(PLAT_imx8mq)
 	case IMX_SIP_HAB:
 		SMC_RET1(handle, imx_hab_handler(smc_fid, x1, x2, x3, x4));
 		break;
diff --git a/plat/imx/common/include/plat_common.h b/plat/imx/common/include/plat_common.h
new file mode 100644
index 0000000..6f41222
--- /dev/null
+++ b/plat/imx/common/include/plat_common.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2024, Pengutronix, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef PLAT_COMMON_H
+#define PLAT_COMMON_H
+
+#include <stdint.h>
+#include <common/bl_common.h>
+
+int imx_bl31_params_parse(uintptr_t arg0, uintptr_t ocram_base,
+			  uintptr_t ocram_size,
+			  entry_point_info_t *bl32_info,
+			  entry_point_info_t *bl33_info);
+
+#endif /* PLAT_COMMON_H */
diff --git a/plat/imx/imx8m/imx8mm/imx8mm_bl31_setup.c b/plat/imx/imx8m/imx8mm/imx8mm_bl31_setup.c
index bff8fb4..f6e46eb 100644
--- a/plat/imx/imx8m/imx8mm/imx8mm_bl31_setup.c
+++ b/plat/imx/imx8m/imx8mm/imx8mm_bl31_setup.c
@@ -30,6 +30,7 @@
 #include <imx8m_ccm.h>
 #include <imx8m_csu.h>
 #include <imx8m_snvs.h>
+#include <plat_common.h>
 #include <plat_imx8.h>
 
 #define TRUSTY_PARAMS_LEN_BYTES      (4096*2)
@@ -154,7 +155,7 @@
 {
 	unsigned int console_base = IMX_BOOT_UART_BASE;
 	static console_t console;
-	int i;
+	int i, ret;
 
 	/* Enable CSU NS access permission */
 	for (i = 0; i < 64; i++) {
@@ -207,6 +208,13 @@
 	bl32_image_ep_info.args.arg3 = BL32_FDT_OVERLAY_ADDR;
 #endif
 #endif
+	ret = imx_bl31_params_parse(arg0, IMX_NS_OCRAM_SIZE, IMX_NS_OCRAM_BASE,
+				    &bl32_image_ep_info, &bl33_image_ep_info);
+	if (ret != 0) {
+		ret = imx_bl31_params_parse(arg0, IMX_TCM_BASE, IMX_TCM_SIZE,
+					    &bl32_image_ep_info,
+					    &bl33_image_ep_info);
+	}
 
 #if !defined(SPD_opteed) && !defined(SPD_trusty)
 	enable_snvs_privileged_access();
diff --git a/plat/imx/imx8m/imx8mm/include/platform_def.h b/plat/imx/imx8m/imx8mm/include/platform_def.h
index 349233a..2fa6199 100644
--- a/plat/imx/imx8m/imx8mm/include/platform_def.h
+++ b/plat/imx/imx8m/imx8mm/include/platform_def.h
@@ -118,6 +118,8 @@
 #define IMX_ROM_SIZE			U(0x40000)
 #define IMX_NS_OCRAM_BASE		U(0x900000)
 #define IMX_NS_OCRAM_SIZE		U(0x20000)
+#define IMX_TCM_BASE			U(0x7E0000)
+#define IMX_TCM_SIZE			U(0x40000)
 #define IMX_CAAM_RAM_BASE		U(0x100000)
 #define IMX_CAAM_RAM_SIZE		U(0x10000)
 #define IMX_DRAM_BASE			U(0x40000000)
diff --git a/plat/imx/imx8m/imx8mm/platform.mk b/plat/imx/imx8m/imx8mm/platform.mk
index 6136820..d5c553a 100644
--- a/plat/imx/imx8m/imx8mm/platform.mk
+++ b/plat/imx/imx8m/imx8mm/platform.mk
@@ -30,7 +30,8 @@
 				plat/common/plat_psci_common.c		\
 				plat/imx/common/plat_imx8_gic.c
 
-BL31_SOURCES		+=	plat/imx/common/imx8_helpers.S			\
+BL31_SOURCES		+=	common/desc_image_load.c			\
+				plat/imx/common/imx8_helpers.S			\
 				plat/imx/imx8m/gpc_common.c			\
 				plat/imx/imx8m/imx_hab.c			\
 				plat/imx/imx8m/imx_aipstz.c			\
@@ -46,6 +47,7 @@
 				plat/imx/common/imx8_topology.c			\
 				plat/imx/common/imx_sip_handler.c		\
 				plat/imx/common/imx_sip_svc.c			\
+				plat/imx/common/imx_common.c			\
 				plat/imx/common/imx_uart_console.S		\
 				lib/cpus/aarch64/cortex_a53.S			\
 				drivers/arm/tzc/tzc380.c			\
diff --git a/plat/imx/imx8m/imx8mn/imx8mn_bl31_setup.c b/plat/imx/imx8m/imx8mn/imx8mn_bl31_setup.c
index f9e430b..befa769 100644
--- a/plat/imx/imx8m/imx8mn/imx8mn_bl31_setup.c
+++ b/plat/imx/imx8m/imx8mn/imx8mn_bl31_setup.c
@@ -29,6 +29,7 @@
 #include <imx8m_csu.h>
 #include <imx8m_snvs.h>
 #include <platform_def.h>
+#include <plat_common.h>
 #include <plat_imx8.h>
 
 #define TRUSTY_PARAMS_LEN_BYTES      (4096*2)
@@ -126,7 +127,7 @@
 	unsigned int console_base = IMX_BOOT_UART_BASE;
 	static console_t console;
 	unsigned int val;
-	int i;
+	int i, ret;
 
 	/* Enable CSU NS access permission */
 	for (i = 0; i < 64; i++) {
@@ -192,6 +193,13 @@
 #endif
 #endif
 
+	ret = imx_bl31_params_parse(arg0, IMX_NS_OCRAM_SIZE, IMX_NS_OCRAM_BASE,
+				    &bl32_image_ep_info, &bl33_image_ep_info);
+	if (ret != 0) {
+		imx_bl31_params_parse(arg0, IMX_TCM_BASE, IMX_TCM_SIZE,
+				      &bl32_image_ep_info, &bl33_image_ep_info);
+	}
+
 #if !defined(SPD_opteed) && !defined(SPD_trusty)
 	enable_snvs_privileged_access();
 #endif
diff --git a/plat/imx/imx8m/imx8mn/include/platform_def.h b/plat/imx/imx8m/imx8mn/include/platform_def.h
index 8e7be98..569432d 100644
--- a/plat/imx/imx8m/imx8mn/include/platform_def.h
+++ b/plat/imx/imx8m/imx8mn/include/platform_def.h
@@ -140,6 +140,8 @@
 #define OCRAM_S_SIZE			U(0x8000)
 #define OCRAM_S_LIMIT			(OCRAM_S_BASE + OCRAM_S_SIZE)
 #define SAVED_DRAM_TIMING_BASE		OCRAM_S_BASE
+#define IMX_TCM_BASE			U(0x7E0000)
+#define IMX_TCM_SIZE			U(0x40000)
 
 #define COUNTER_FREQUENCY		8000000 /* 8MHz */
 
diff --git a/plat/imx/imx8m/imx8mn/platform.mk b/plat/imx/imx8m/imx8mn/platform.mk
index 6036b6a..87b3a6f 100644
--- a/plat/imx/imx8m/imx8mn/platform.mk
+++ b/plat/imx/imx8m/imx8mn/platform.mk
@@ -25,7 +25,8 @@
 				plat/common/plat_psci_common.c		\
 				plat/imx/common/plat_imx8_gic.c
 
-BL31_SOURCES		+=	plat/imx/common/imx8_helpers.S			\
+BL31_SOURCES		+=	common/desc_image_load.c			\
+				plat/imx/common/imx8_helpers.S			\
 				plat/imx/imx8m/gpc_common.c			\
 				plat/imx/imx8m/imx_hab.c			\
 				plat/imx/imx8m/imx_aipstz.c			\
@@ -41,6 +42,7 @@
 				plat/imx/common/imx8_topology.c			\
 				plat/imx/common/imx_sip_handler.c		\
 				plat/imx/common/imx_sip_svc.c			\
+				plat/imx/common/imx_common.c			\
 				plat/imx/common/imx_uart_console.S		\
 				lib/cpus/aarch64/cortex_a53.S			\
 				drivers/arm/tzc/tzc380.c			\
diff --git a/plat/imx/imx8m/imx8mp/imx8mp_bl31_setup.c b/plat/imx/imx8m/imx8mp/imx8mp_bl31_setup.c
index 8e35219..ffad3d1 100644
--- a/plat/imx/imx8m/imx8mp/imx8mp_bl31_setup.c
+++ b/plat/imx/imx8m/imx8mp/imx8mp_bl31_setup.c
@@ -29,6 +29,7 @@
 #include <imx8m_csu.h>
 #include <imx8m_snvs.h>
 #include <platform_def.h>
+#include <plat_common.h>
 #include <plat_imx8.h>
 
 #define TRUSTY_PARAMS_LEN_BYTES      (4096*2)
@@ -156,6 +157,7 @@
 	static console_t console;
 	unsigned int val;
 	unsigned int i;
+	int ret;
 
 	/* Enable CSU NS access permission */
 	for (i = 0; i < 64; i++) {
@@ -213,6 +215,13 @@
 	bl32_image_ep_info.args.arg3 = BL32_FDT_OVERLAY_ADDR;
 #endif
 #endif
+	ret = imx_bl31_params_parse(arg0, IMX_NS_OCRAM_SIZE, IMX_NS_OCRAM_BASE,
+				    &bl32_image_ep_info, &bl33_image_ep_info);
+	if (ret != 0) {
+		ret = imx_bl31_params_parse(arg0, IMX_TCM_BASE, IMX_TCM_SIZE,
+					    &bl32_image_ep_info,
+					    &bl33_image_ep_info);
+	}
 
 #if !defined(SPD_opteed) && !defined(SPD_trusty)
 	enable_snvs_privileged_access();
diff --git a/plat/imx/imx8m/imx8mp/include/platform_def.h b/plat/imx/imx8m/imx8mp/include/platform_def.h
index 4a03830..84a7e00 100644
--- a/plat/imx/imx8m/imx8mp/include/platform_def.h
+++ b/plat/imx/imx8m/imx8mp/include/platform_def.h
@@ -172,6 +172,9 @@
 
 #define MAX_CSU_NUM			U(64)
 
+#define IMX_TCM_BASE			U(0x7E0000)
+#define IMX_TCM_SIZE			U(0x40000)
+
 #define OCRAM_S_BASE			U(0x00180000)
 #define OCRAM_S_SIZE			U(0x8000)
 #define OCRAM_S_LIMIT			(OCRAM_S_BASE + OCRAM_S_SIZE)
diff --git a/plat/imx/imx8m/imx8mp/platform.mk b/plat/imx/imx8m/imx8mp/platform.mk
index 40764b1..631dd29 100644
--- a/plat/imx/imx8m/imx8mp/platform.mk
+++ b/plat/imx/imx8m/imx8mp/platform.mk
@@ -26,7 +26,8 @@
 				plat/common/plat_psci_common.c		\
 				plat/imx/common/plat_imx8_gic.c
 
-BL31_SOURCES		+=	plat/imx/common/imx8_helpers.S			\
+BL31_SOURCES		+=	common/desc_image_load.c			\
+				plat/imx/common/imx8_helpers.S			\
 				plat/imx/imx8m/gpc_common.c			\
 				plat/imx/imx8m/imx_hab.c			\
 				plat/imx/imx8m/imx_aipstz.c			\
@@ -42,6 +43,7 @@
 				plat/imx/common/imx8_topology.c			\
 				plat/imx/common/imx_sip_handler.c		\
 				plat/imx/common/imx_sip_svc.c			\
+				plat/imx/common/imx_common.c			\
 				plat/imx/common/imx_uart_console.S		\
 				lib/cpus/aarch64/cortex_a53.S			\
 				drivers/arm/tzc/tzc380.c			\
diff --git a/plat/imx/imx93/imx93_bl31_setup.c b/plat/imx/imx93/imx93_bl31_setup.c
index a7d0f65..d997e9a 100644
--- a/plat/imx/imx93/imx93_bl31_setup.c
+++ b/plat/imx/imx93/imx93_bl31_setup.c
@@ -20,6 +20,7 @@
 #include <plat/common/platform.h>
 
 #include <imx8_lpuart.h>
+#include <plat_common.h>
 #include <plat_imx8.h>
 #include <platform_def.h>
 
@@ -90,6 +91,9 @@
 	bl33_image_ep_info.args.arg3 = BL32_FDT_OVERLAY_ADDR;
 	bl32_image_ep_info.args.arg3 = BL32_FDT_OVERLAY_ADDR;
 #endif
+
+	imx_bl31_params_parse(arg0, OCRAM_BASE, OCRAM_SIZE,
+				    &bl32_image_ep_info, &bl33_image_ep_info);
 }
 
 void bl31_plat_arch_setup(void)
diff --git a/plat/imx/imx93/include/platform_def.h b/plat/imx/imx93/include/platform_def.h
index 7efbf1c..f0d53cd 100644
--- a/plat/imx/imx93/include/platform_def.h
+++ b/plat/imx/imx93/include/platform_def.h
@@ -31,6 +31,9 @@
 #define BL31_BASE			U(0x204E0000)
 #define BL31_LIMIT			U(0x20520000)
 
+#define OCRAM_BASE			U(0x20480000)
+#define OCRAM_SIZE			U(0xA0000)
+
 /* non-secure uboot base */
 /* TODO */
 #define PLAT_NS_IMAGE_OFFSET		U(0x80200000)
diff --git a/plat/imx/imx93/platform.mk b/plat/imx/imx93/platform.mk
index ed7e81f..f506d8b 100644
--- a/plat/imx/imx93/platform.mk
+++ b/plat/imx/imx93/platform.mk
@@ -19,9 +19,11 @@
 				plat/common/plat_psci_common.c		\
 				plat/imx/common/plat_imx8_gic.c
 
-BL31_SOURCES		+=	plat/common/aarch64/crash_console_helpers.S   \
+BL31_SOURCES		+=	common/desc_image_load.c			\
+				plat/common/aarch64/crash_console_helpers.S	\
 				plat/imx/imx93/aarch64/plat_helpers.S		\
 				plat/imx/imx93/plat_topology.c			\
+				plat/imx/common/imx_common.c			\
 				plat/imx/common/lpuart_console.S		\
 				plat/imx/imx93/trdc.c			\
 				plat/imx/imx93/pwr_ctrl.c			\
diff --git a/plat/mediatek/drivers/emi_mpu/emi_mpu.h b/plat/mediatek/drivers/emi_mpu/emi_mpu.h
index ef7134c..329a45e 100644
--- a/plat/mediatek/drivers/emi_mpu/emi_mpu.h
+++ b/plat/mediatek/drivers/emi_mpu/emi_mpu.h
@@ -18,7 +18,7 @@
 #define FORBIDDEN			(5)
 #define SEC_R_NSEC_RW			(6)
 
-#define LOCK				(1)
+#define LOCK				(1UL)
 #define UNLOCK				(0)
 
 #if (EMI_MPU_DGROUP_NUM == 1)
@@ -69,6 +69,7 @@
 int emi_mpu_optee_handler(uint64_t encoded_addr, uint64_t zone_size,
 						  uint64_t zone_info);
 int emi_mpu_set_protection(struct emi_region_info_t *region_info);
+int emi_mpu_clear_protection(unsigned int region);
 void set_emi_mpu_regions(void);
 int set_apu_emi_mpu_region(void);
 #endif
diff --git a/plat/mediatek/drivers/emi_mpu/emi_mpu_common.c b/plat/mediatek/drivers/emi_mpu/emi_mpu_common.c
index 8810be3..1e732f0 100644
--- a/plat/mediatek/drivers/emi_mpu/emi_mpu_common.c
+++ b/plat/mediatek/drivers/emi_mpu/emi_mpu_common.c
@@ -39,13 +39,13 @@
 	}
 
 #if ENABLE_EMI_MPU_SW_LOCK
-	if (region_lock_state[region] == 1) {
+	if (region_lock_state[region] == LOCK) {
 		WARN("invalid region\n");
 		return -1;
 	}
 
 	if ((dgroup == 0) && ((apc >> 31) & 0x1)) {
-		region_lock_state[region] = 1;
+		region_lock_state[region] = LOCK;
 	}
 
 	apc &= EMI_MPU_APC_SW_LOCK_MASK;
@@ -73,6 +73,50 @@
 	return 0;
 }
 
+int emi_mpu_clear_protection(unsigned int region)
+{
+	unsigned int dgroup;
+
+	if (region >= EMI_MPU_REGION_NUM) {
+		WARN("invalid region number\n");
+		return -1;
+	}
+
+#if ENABLE_EMI_MPU_SW_LOCK
+	if (region_lock_state[region] == LOCK) {
+		WARN("SW:region is locked\n");
+		return -1;
+	}
+#endif
+	if (mmio_read_32(EMI_MPU_APC(region, 0)) & (LOCK << 31UL)) {
+		WARN("HW:EMI-MPU region is locked\n");
+		return -1;
+	}
+
+#if defined(SUB_EMI_MPU_BASE)
+	if (mmio_read_32(SUB_EMI_MPU_APC(region, 0)) & (LOCK << 31UL)) {
+		WARN("HW:SUB EMI-MPU region is locked\n");
+		return -1;
+	}
+#endif
+
+	for (dgroup = 0; dgroup < EMI_MPU_DGROUP_NUM; dgroup++)
+		mmio_write_32(EMI_MPU_APC(region, dgroup), 0x0);
+
+	mmio_write_32(EMI_MPU_SA(region), 0x0);
+	mmio_write_32(EMI_MPU_EA(region), 0x0);
+
+#if defined(SUB_EMI_MPU_BASE)
+	for (dgroup = 0; dgroup < EMI_MPU_DGROUP_NUM; dgroup++)
+		mmio_write_32(SUB_EMI_MPU_APC(region, dgroup), 0x0);
+
+	mmio_write_32(SUB_EMI_MPU_SA(region), 0);
+	mmio_write_32(SUB_EMI_MPU_EA(region), 0);
+#endif
+	return 0;
+}
+
+
 static void dump_emi_mpu_regions(void)
 {
 	int region, i;
diff --git a/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu.c b/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu.c
index e8882f0..f7ed5e6 100644
--- a/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu.c
+++ b/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu.c
@@ -14,11 +14,33 @@
 {
 	struct emi_region_info_t region_info;
 
+	/* BL31 address */
+	region_info.start = TZRAM_BASE;
+	region_info.end = TZRAM_BASE + TZRAM_SIZE - 1;
+	region_info.region = BL31_EMI_REGION_ID;
+	SET_ACCESS_PERMISSION(region_info.apc, LOCK,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, SEC_RW);
+	emi_mpu_set_protection(&region_info);
+
+	/* BL32 address */
+	region_info.start = BL32_REGION_BASE;
+	region_info.end = BL32_REGION_BASE + BL32_REGION_SIZE - 1;
+	region_info.region = BL32_REGION_ID;
+	SET_ACCESS_PERMISSION(region_info.apc, LOCK,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+			      FORBIDDEN, FORBIDDEN, SEC_RW, SEC_RW);
+	emi_mpu_set_protection(&region_info);
+
 	/* SCP core0 DRAM */
-	region_info.start = 0x50000000ULL;
-	region_info.end = 0x528FFFFFULL;
-	region_info.region = 2;
-	SET_ACCESS_PERMISSION(region_info.apc, 1,
+	region_info.start = SCP_CORE0_REGION_BASE;
+	region_info.end = SCP_CORE0_REGION_BASE + SCP_CORE0_REGION_SIZE - 1;
+	region_info.region = SCP_CORE0_REGION_ID;
+	SET_ACCESS_PERMISSION(region_info.apc, LOCK,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, NO_PROTECTION,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
@@ -26,10 +48,10 @@
 	emi_mpu_set_protection(&region_info);
 
 	/* SCP core1 DRAM */
-	region_info.start = 0x70000000ULL;
-	region_info.end = 0x729FFFFFULL;
-	region_info.region = 3;
-	SET_ACCESS_PERMISSION(region_info.apc, 1,
+	region_info.start = SCP_CORE1_REGION_BASE;
+	region_info.end = SCP_CORE1_REGION_BASE + SCP_CORE1_REGION_SIZE - 1;
+	region_info.region = SCP_CORE1_REGION_ID;
+	SET_ACCESS_PERMISSION(region_info.apc, LOCK,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, NO_PROTECTION,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
@@ -37,10 +59,10 @@
 	emi_mpu_set_protection(&region_info);
 
 	/* DSP protect address */
-	region_info.start = 0x60000000ULL;
-	region_info.end = 0x610FFFFFULL;
-	region_info.region = 4;
-	SET_ACCESS_PERMISSION(region_info.apc, 1,
+	region_info.start = DSP_PROTECT_REGION_BASE;
+	region_info.end = DSP_PROTECT_REGION_BASE + DSP_PROTECT_REGION_SIZE - 1;
+	region_info.region = DSP_PROTECT_REGION_ID;
+	SET_ACCESS_PERMISSION(region_info.apc, LOCK,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
 			      FORBIDDEN, FORBIDDEN, FORBIDDEN, NO_PROTECTION,
@@ -48,10 +70,10 @@
 	emi_mpu_set_protection(&region_info);
 
 	/* All default settings */
-	region_info.start = 0x40000000ULL;
-	region_info.end = 0x1FFFF0000ULL;
-	region_info.region = 31;
-	SET_ACCESS_PERMISSION(region_info.apc, 1,
+	region_info.start = DRAM_START_ADDR;
+	region_info.end = DRAM_START_ADDR + DRAM_MAX_SIZE - 1;
+	region_info.region = ALL_DEFAULT_REGION_ID;
+	SET_ACCESS_PERMISSION(region_info.apc, LOCK,
 			      FORBIDDEN, FORBIDDEN, NO_PROTECTION, NO_PROTECTION,
 			      NO_PROTECTION, FORBIDDEN, NO_PROTECTION, NO_PROTECTION,
 			      NO_PROTECTION, SEC_R_NSEC_RW, NO_PROTECTION, FORBIDDEN,
@@ -65,7 +87,7 @@
 
 	region_info.start = (unsigned long long)APUSYS_SEC_BUF_PA;
 	region_info.end = (unsigned long long)(APUSYS_SEC_BUF_PA + APUSYS_SEC_BUF_SZ) - 1;
-	region_info.region = APUSYS_SEC_BUF_EMI_REGION;
+	region_info.region = APUSYS_SEC_BUF_EMI_REGION_ID;
 
 	SET_ACCESS_PERMISSION(region_info.apc, UNLOCK,
 			      FORBIDDEN,     FORBIDDEN, FORBIDDEN,     FORBIDDEN,
@@ -86,12 +108,18 @@
 	return ((info & 0xFFFF0000) >> MPU_PHYSICAL_ADDR_SHIFT_BITS);
 }
 
+static inline uint32_t get_decoded_set_clear_info(uint32_t info)
+{
+	return (info & 0x0000FFFF);
+}
+
 int emi_mpu_optee_handler(uint64_t encoded_addr, uint64_t zone_size,
 						  uint64_t zone_info)
 {
 	uint64_t phys_addr = get_decoded_phys_addr(encoded_addr);
 	struct emi_region_info_t region_info;
 	enum MPU_REQ_ORIGIN_ZONE_ID zone_id = get_decoded_zone_id(zone_info);
+	uint32_t is_set = get_decoded_set_clear_info(zone_info);
 
 	INFO("encoded_addr = 0x%lx, zone_size = 0x%lx, zone_info = 0x%lx\n",
 	     encoded_addr, zone_size, zone_info);
@@ -101,17 +129,21 @@
 		return MTK_SIP_E_INVALID_PARAM;
 	}
 
-	/* SVP DRAM */
-	region_info.start = phys_addr;
-	region_info.end = phys_addr + zone_size;
-	region_info.region = 4;
-	SET_ACCESS_PERMISSION(region_info.apc, 1,
-			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
-			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
-			      FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
-			      FORBIDDEN, FORBIDDEN, FORBIDDEN, SEC_RW);
+	if (is_set > 0) {
+		/* SVP DRAM */
+		region_info.start = phys_addr;
+		region_info.end = phys_addr + zone_size - 1;
+		region_info.region = SVP_DRAM_REGION_ID;
+		SET_ACCESS_PERMISSION(region_info.apc, UNLOCK,
+					  FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+					  FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN,
+					  FORBIDDEN, SEC_RW, FORBIDDEN, FORBIDDEN,
+					  FORBIDDEN, FORBIDDEN, SEC_RW, SEC_RW);
 
-	emi_mpu_set_protection(&region_info);
+		emi_mpu_set_protection(&region_info);
+	} else { /* clear region protection */
+		emi_mpu_clear_protection(SVP_DRAM_REGION_ID);
+	}
 
 	return 0;
-}
\ No newline at end of file
+}
diff --git a/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu_priv.h b/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu_priv.h
index cc7f7f1..18acb9c 100644
--- a/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu_priv.h
+++ b/plat/mediatek/drivers/emi_mpu/mt8188/emi_mpu_priv.h
@@ -7,7 +7,7 @@
 #ifndef EMI_MPU_PRIV_H
 #define EMI_MPU_PRIV_H
 
-#define ENABLE_EMI_MPU_SW_LOCK		(1)
+#define ENABLE_EMI_MPU_SW_LOCK		(0)
 
 #define EMI_MPU_CTRL			(EMI_MPU_BASE + 0x000)
 #define EMI_MPU_DBG			(EMI_MPU_BASE + 0x004)
@@ -38,13 +38,34 @@
 #define EMI_MPU_DOMAIN_NUM		(16)
 #define EMI_MPU_REGION_NUM		(32)
 #define EMI_MPU_ALIGN_BITS		(16)
-#define DRAM_OFFSET			(0x40000000 >> EMI_MPU_ALIGN_BITS)
+#define DRAM_START_ADDR                 (0x40000000ULL)
+#define DRAM_OFFSET			(DRAM_START_ADDR >> EMI_MPU_ALIGN_BITS)
+#define DRAM_MAX_SIZE			(0x200000000ULL)
+#define BL32_REGION_BASE		(0x43000000ULL)
+#define BL32_REGION_SIZE		(0x4600000ULL)
+#define SCP_CORE0_REGION_BASE		(0x50000000ULL)
+#define SCP_CORE0_REGION_SIZE		(0x800000ULL)
+#define SCP_CORE1_REGION_BASE		(0x70000000ULL)
+#define SCP_CORE1_REGION_SIZE		(0xa000000ULL)
+#define DSP_PROTECT_REGION_BASE		(0x60000000ULL)
+#define DSP_PROTECT_REGION_SIZE		(0x1100000ULL)
 
 #define EMI_MPU_DGROUP_NUM		(EMI_MPU_DOMAIN_NUM / 8)
 
 /* APU EMI MPU Setting */
-#define APUSYS_SEC_BUF_EMI_REGION	(21)
 #define APUSYS_SEC_BUF_PA		(0x55000000)
 #define APUSYS_SEC_BUF_SZ		(0x100000)
 
+enum region_ids {
+	BL31_EMI_REGION_ID = 0,
+	BL32_REGION_ID,
+	SCP_CORE0_REGION_ID,
+	SCP_CORE1_REGION_ID,
+	DSP_PROTECT_REGION_ID,
+	SVP_DRAM_REGION_ID,
+
+	APUSYS_SEC_BUF_EMI_REGION_ID = 21,
+
+	ALL_DEFAULT_REGION_ID = 31,
+};
 #endif
diff --git a/plat/nxp/s32/s32g274ardb2/plat_console.c b/plat/nxp/s32/s32g274ardb2/plat_console.c
index 27cae12..542fa7b 100644
--- a/plat/nxp/s32/s32g274ardb2/plat_console.c
+++ b/plat/nxp/s32/s32g274ardb2/plat_console.c
@@ -11,11 +11,12 @@
 
 void console_s32g2_register(void)
 {
-	static console_t s32g2_console;
+	static console_t s32g2_console = {
+		.next = NULL,
+		.flags = 0u,
+	};
 	int ret;
 
-	(void)memset(&s32g2_console, 0, sizeof(s32g2_console));
-
 	ret = console_linflex_register(UART_BASE, UART_CLOCK_HZ,
 				       UART_BAUDRATE, &s32g2_console);
 	if (ret == 0) {
diff --git a/services/std_svc/errata_abi/cpu_errata_info.h b/services/std_svc/errata_abi/cpu_errata_info.h
index 2d59fc9..61e1076 100644
--- a/services/std_svc/errata_abi/cpu_errata_info.h
+++ b/services/std_svc/errata_abi/cpu_errata_info.h
@@ -45,7 +45,7 @@
 };
 
 struct em_cpu_list{
-	unsigned long cpu_partnumber;	/* cpu specific part number defined in midr reg */
+	unsigned long cpu_midr;	/* cpu specific part number is bit[15:4] of midr value */
 	struct   em_cpu cpu_errata_list[MAX_PLAT_CPU_ERRATA_ENTRIES];
 };
 
diff --git a/services/std_svc/errata_abi/errata_abi_main.c b/services/std_svc/errata_abi/errata_abi_main.c
index 0a1d4f3..0d0ecc3 100644
--- a/services/std_svc/errata_abi/errata_abi_main.c
+++ b/services/std_svc/errata_abi/errata_abi_main.c
@@ -23,7 +23,7 @@
 struct em_cpu_list cpu_list[] = {
 #if CORTEX_A78_H_INC
 {
-	.cpu_partnumber = CORTEX_A78_MIDR,
+	.cpu_midr = CORTEX_A78_MIDR,
 	.cpu_errata_list = {
 		[0] = {2712571, 0x00, 0x12},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -33,7 +33,7 @@
 
 #if CORTEX_A78_AE_H_INC
 {
-	.cpu_partnumber = CORTEX_A78_AE_MIDR,
+	.cpu_midr = CORTEX_A78_AE_MIDR,
 	.cpu_errata_list = {
 		[0] = {2712574, 0x00, 0x02},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -43,7 +43,7 @@
 
 #if CORTEX_A78C_H_INC
 {
-	.cpu_partnumber = CORTEX_A78C_MIDR,
+	.cpu_midr = CORTEX_A78C_MIDR,
 	.cpu_errata_list = {
 		[0] = {2712575, 0x01, 0x02},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -53,7 +53,7 @@
 
 #if NEOVERSE_V1_H_INC
 {
-	.cpu_partnumber = NEOVERSE_V1_MIDR,
+	.cpu_midr = NEOVERSE_V1_MIDR,
 	.cpu_errata_list = {
 		[0] = {2701953, 0x00, 0x11},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -63,7 +63,7 @@
 
 #if CORTEX_A710_H_INC
 {
-	.cpu_partnumber = CORTEX_A710_MIDR,
+	.cpu_midr = CORTEX_A710_MIDR,
 	.cpu_errata_list = {
 		[0] = {2701952, 0x00, 0x21},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -73,7 +73,7 @@
 
 #if NEOVERSE_N2_H_INC
 {
-	.cpu_partnumber = NEOVERSE_N2_MIDR,
+	.cpu_midr = NEOVERSE_N2_MIDR,
 	.cpu_errata_list = {
 		[0] = {2728475, 0x00, 0x02},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -83,7 +83,7 @@
 
 #if CORTEX_X2_H_INC
 {
-	.cpu_partnumber = CORTEX_X2_MIDR,
+	.cpu_midr = CORTEX_X2_MIDR,
 	.cpu_errata_list = {
 		[0] = {2701952, 0x00, 0x21},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -93,7 +93,7 @@
 
 #if NEOVERSE_V2_H_INC
 {
-	.cpu_partnumber = NEOVERSE_V2_MIDR,
+	.cpu_midr = NEOVERSE_V2_MIDR,
 	.cpu_errata_list = {
 		[0] = {2719103, 0x00, 0x01},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -103,7 +103,7 @@
 
 #if CORTEX_X3_H_INC
 {
-	.cpu_partnumber = CORTEX_X3_MIDR,
+	.cpu_midr = CORTEX_X3_MIDR,
 	.cpu_errata_list = {
 		[0] = {2701951, 0x00, 0x11},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -113,7 +113,7 @@
 
 #if CORTEX_X4_H_INC
 {
-	.cpu_partnumber = CORTEX_X4_MIDR,
+	.cpu_midr = CORTEX_X4_MIDR,
 	.cpu_errata_list = {
 		[0] = {2701112, 0x00, 0x00},
 		[1 ... ERRATA_LIST_END] = UNDEF_ERRATA,
@@ -142,7 +142,7 @@
 		 * If the cpu partnumber in the cpu list, matches the midr
 		 * part number, check to see if the errata ID matches
 		 */
-		if (EXTRACT_PARTNUM(midr_val) == EXTRACT_PARTNUM(cpu_ptr->cpu_partnumber)) {
+		if (EXTRACT_PARTNUM(midr_val) == EXTRACT_PARTNUM(cpu_ptr->cpu_midr)) {
 
 			struct em_cpu *ptr = NULL;