diff options
Diffstat (limited to '0005-haoc-kernel.patch')
-rw-r--r-- | 0005-haoc-kernel.patch | 13472 |
1 files changed, 13472 insertions, 0 deletions
diff --git a/0005-haoc-kernel.patch b/0005-haoc-kernel.patch new file mode 100644 index 0000000..2d9484f --- /dev/null +++ b/0005-haoc-kernel.patch @@ -0,0 +1,13472 @@ +From 099672494b5fbd626031f313bbdbc42d1eb3f02d Mon Sep 17 00:00:00 2001 +From: liuzh <liuzhh@zgclab.edu.cn> +Date: Wed, 22 May 2024 16:36:55 +0800 +Subject: [PATCH] Squashed commit of the following: + +commit 909ad06b3bf629d9af4e143347c1d1ef8a3a5808 +Author: liuzh <liuzhh@zgclab.edu.cn> +Date: Wed May 22 16:23:13 2024 +0800 + + fix mte_sync_tags() parameters after rebase to 6.6.0-27.0.0. + +commit 1cbf51371b0539a45f816419b2da82cf36162b4a +Author: ljl <ljl2013@163.com> +Date: Mon Mar 25 08:01:32 2024 +0000 + + IEE SI: Removed redundant codes. + +commit 0178bfc79ad1769a36f4165348a671d2182cff55 +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Mon Mar 25 11:01:11 2024 +0800 + + Fix bugs on qemu when opening CONFIG_CREDP. + +commit 8e714f6e8f2ace5a6fc900b4bce6b03c83c41870 +Author: ljl <ljl2013@163.com> +Date: Thu Mar 21 04:44:26 2024 +0000 + + IEE SI: Remove PAN operations as BTLB BUG is already fixed. + +commit 7b5fc74cb99e377d3bc59da81612cd6f3dd8a4d8 +Author: ljl <lvjinglin2013@163.com> +Date: Wed Mar 20 18:31:47 2024 +0800 + + IEE SI: Migration of iee rwx gate. + +commit aad2c7e89c9c4ad8ff0fb3ee53cd1b974144a283 +Author: liuzh <liuzhh@zgclab.edu.cn> +Date: Mon Mar 18 15:32:43 2024 +0800 + + modify slub.c set_track_prepare() + +commit 7452bac06ec09bf8321dfdbfb8b6a429d2cd8637 +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Thu Mar 21 11:26:19 2024 +0800 + + Set pgd of lm Privileged. + +commit 33934cfc3eed798a3a687bf86c6bd92697e68ba9 +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Tue Mar 19 17:14:32 2024 +0800 + + Delete some redundant code and put trans_pgd into IEE. + +commit 2bfe9008a72f8b8ac237bc7a5f99f9d40e84c247 +Author: zhangshiyang17@mails.ucas.ac.cn <zhangshiyang17@mails.ucas.ac.cn> +Date: Mon Mar 18 11:47:50 2024 +0000 + + Fix bugs on physical when opening CONFIG_IEE and CONFIG_PTP. + +commit dafa2df600757511ce3e8f178e05e28adabdf39b +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Mon Mar 18 10:40:42 2024 +0800 + + Fix bugs on qemu when opening CONFIG_IEE and CONFIG_PTP. + +commit 9231a9f6b34c62090b5f202c9c64a52bfdac7a73 +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Thu Mar 14 16:34:53 2024 +0800 + + Fix compiling bugs of CONFIG_PTP. + +commit 6469df3bcce32896c2cb297d3cd7ead82c33f35d +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Thu Mar 14 11:10:00 2024 +0800 + + Fix bugs on qemu when opening CONFIG_IEE and CONFIG_INTERRUPTABLE. + +commit 5f1773dada622a3514c9ed6aa72dd50e918f2664 +Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> +Date: Wed Mar 13 17:31:39 2024 +0800 + + Fix bugs on qemu when opening CONFIG_IEE. + +commit 73f433a093fa84cffa5e11e86bed6f17c9b30a39 +Author: liuzh <liuzhh@zgclab.edu.cn> +Date: Tue Mar 12 15:32:29 2024 +0800 + + fix the map of IEE_SI_TEXT. + +commit 9b92deb4b2338093d9b04f4b81f162855b31c983 +Author: liuzh <liuzhh@zgclab.edu.cn> +Date: Sun Mar 10 16:11:13 2024 +0800 + + modified to be able to compile. + can start the kernel with qemu and successfully reach `start_kernel()`. + +commit e892ec4790d72e9433b48b0221e7e6dc4c361dd9 +Author: liuzh <liuzhh@zgclab.edu.cn> +Date: Thu Mar 7 14:27:45 2024 +0800 + + fix some conflicts + +commit fdec7e39345e81e867e01258487f88801b790b02 +Author: liuzh <liuzhh@zgclab.edu.cn> +Date: Wed Mar 6 12:31:11 2024 +0800 + + migrate openeuler-commit code. (need some fix before compiling) +--- + Makefile | 3 +- + arch/arm64/Kconfig | 18 + + arch/arm64/include/asm/assembler.h | 67 + + arch/arm64/include/asm/daifflags.h | 16 + + arch/arm64/include/asm/efi.h | 4 + + arch/arm64/include/asm/fixmap.h | 3 + + arch/arm64/include/asm/hw_breakpoint.h | 12 + + arch/arm64/include/asm/iee-access.h | 36 + + arch/arm64/include/asm/iee-cred.h | 150 ++ + arch/arm64/include/asm/iee-def.h | 74 + + arch/arm64/include/asm/iee-si.h | 64 + + arch/arm64/include/asm/iee-slab.h | 23 + + arch/arm64/include/asm/iee-token.h | 40 + + arch/arm64/include/asm/iee.h | 10 + + arch/arm64/include/asm/kernel-pgtable.h | 21 + + arch/arm64/include/asm/koi.h | 335 +++++ + arch/arm64/include/asm/memory.h | 24 + + arch/arm64/include/asm/mmu_context.h | 20 + + arch/arm64/include/asm/pgalloc.h | 4 + + arch/arm64/include/asm/pgtable-hwdef.h | 11 + + arch/arm64/include/asm/pgtable.h | 304 +++- + arch/arm64/include/asm/pointer_auth.h | 5 + + arch/arm64/include/asm/sysreg.h | 58 + + arch/arm64/include/asm/tlb.h | 9 + + arch/arm64/include/asm/tlbflush.h | 58 +- + arch/arm64/kernel/Makefile | 2 + + arch/arm64/kernel/armv8_deprecated.c | 16 + + arch/arm64/kernel/asm-offsets.c | 11 + + arch/arm64/kernel/cpu_errata.c | 12 + + arch/arm64/kernel/cpufeature.c | 79 + + arch/arm64/kernel/debug-monitors.c | 4 + + arch/arm64/kernel/entry-common.c | 4 + + arch/arm64/kernel/entry.S | 611 ++++++++ + arch/arm64/kernel/fpsimd.c | 4 + + arch/arm64/kernel/head.S | 56 + + arch/arm64/kernel/hibernate.c | 14 + + arch/arm64/kernel/hw_breakpoint.c | 99 ++ + arch/arm64/kernel/iee/Makefile | 1 + + arch/arm64/kernel/iee/iee-func.c | 187 +++ + arch/arm64/kernel/iee/iee-gate.S | 174 +++ + arch/arm64/kernel/iee/iee.c | 1360 +++++++++++++++++ + arch/arm64/kernel/koi/Makefile | 1 + + arch/arm64/kernel/koi/koi.c | 1327 +++++++++++++++++ + arch/arm64/kernel/mte.c | 5 + + arch/arm64/kernel/process.c | 19 +- + arch/arm64/kernel/proton-pack.c | 8 + + arch/arm64/kernel/setup.c | 33 + + arch/arm64/kernel/traps.c | 26 + + arch/arm64/kernel/vmlinux.lds.S | 61 + + arch/arm64/mm/context.c | 91 +- + arch/arm64/mm/fault.c | 9 + + arch/arm64/mm/fixmap.c | 74 +- + arch/arm64/mm/init.c | 34 + + arch/arm64/mm/mmu.c | 1780 +++++++++++++++++++---- + arch/arm64/mm/pgd.c | 39 + + arch/arm64/mm/proc.S | 28 + + arch/arm64/mm/trans_pgd.c | 46 + + drivers/firmware/efi/arm-runtime.c | 4 + + drivers/firmware/efi/memmap.c | 20 + + drivers/tty/serial/earlycon.c | 4 + + drivers/usb/early/ehci-dbgp.c | 4 + + fs/coredump.c | 8 + + fs/exec.c | 20 + + fs/nfs/flexfilelayout/flexfilelayout.c | 9 + + fs/nfs/nfs4idmap.c | 9 + + fs/nfsd/auth.c | 38 + + fs/nfsd/nfs4callback.c | 12 +- + fs/nfsd/nfs4recover.c | 9 + + fs/nfsd/nfsfh.c | 9 + + fs/open.c | 26 + + fs/overlayfs/dir.c | 9 + + fs/overlayfs/super.c | 12 + + fs/smb/client/cifs_spnego.c | 9 + + fs/smb/client/cifsacl.c | 9 + + include/asm-generic/early_ioremap.h | 3 + + include/asm-generic/fixmap.h | 18 + + include/asm-generic/pgalloc.h | 54 + + include/asm-generic/vmlinux.lds.h | 24 +- + include/linux/cred.h | 45 +- + include/linux/efi.h | 9 + + include/linux/iee-func.h | 27 + + include/linux/module.h | 1 + + include/linux/sched.h | 19 + + init/main.c | 28 +- + kernel/cred.c | 182 +++ + kernel/exit.c | 8 + + kernel/fork.c | 316 ++-- + kernel/groups.c | 7 + + kernel/kthread.c | 13 + + kernel/smpboot.c | 9 + + kernel/sys.c | 107 ++ + kernel/umh.c | 10 + + kernel/user_namespace.c | 18 + + mm/Kconfig | 12 + + mm/damon/ops-common.c | 1 + + mm/debug_vm_pgtable.c | 24 + + mm/early_ioremap.c | 57 + + mm/huge_memory.c | 30 +- + mm/init-mm.c | 17 + + mm/memory.c | 14 + + mm/slub.c | 198 ++- + mm/sparse-vmemmap.c | 21 + + mm/vmalloc.c | 2 +- + net/dns_resolver/dns_key.c | 9 + + security/commoncap.c | 169 +++ + security/keys/keyctl.c | 23 + + security/keys/process_keys.c | 53 + + security/security.c | 15 + + 109 files changed, 8945 insertions(+), 397 deletions(-) + create mode 100644 arch/arm64/include/asm/iee-access.h + create mode 100644 arch/arm64/include/asm/iee-cred.h + create mode 100644 arch/arm64/include/asm/iee-def.h + create mode 100644 arch/arm64/include/asm/iee-si.h + create mode 100644 arch/arm64/include/asm/iee-slab.h + create mode 100644 arch/arm64/include/asm/iee-token.h + create mode 100644 arch/arm64/include/asm/iee.h + create mode 100644 arch/arm64/include/asm/koi.h + create mode 100644 arch/arm64/kernel/iee/Makefile + create mode 100644 arch/arm64/kernel/iee/iee-func.c + create mode 100644 arch/arm64/kernel/iee/iee-gate.S + create mode 100644 arch/arm64/kernel/iee/iee.c + create mode 100644 arch/arm64/kernel/koi/Makefile + create mode 100644 arch/arm64/kernel/koi/koi.c + create mode 100644 include/linux/iee-func.h + +diff --git a/Makefile b/Makefile +index 8e6d9b894b1e..20c367b5957d 100644 +--- a/Makefile ++++ b/Makefile +@@ -554,7 +554,7 @@ LINUXINCLUDE := \ + -I$(objtree)/include \ + $(USERINCLUDE) + +-KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE ++KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE -march=armv8.1-a + + KBUILD_CFLAGS := + KBUILD_CFLAGS += -std=gnu11 +@@ -563,6 +563,7 @@ KBUILD_CFLAGS += -funsigned-char + KBUILD_CFLAGS += -fno-common + KBUILD_CFLAGS += -fno-PIE + KBUILD_CFLAGS += -fno-strict-aliasing ++KBUILD_CFLAGS += -march=armv8.1-a + + KBUILD_CPPFLAGS := -D__KERNEL__ + KBUILD_RUSTFLAGS := $(rust_common_flags) \ +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 2a875546bdc7..7448afc90c0a 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -1730,6 +1730,24 @@ config UNMAP_KERNEL_AT_EL0 + + If unsure, say Y. + ++# Config for iee ++config IEE ++ depends on ARM64 ++ depends on ARM64_PAN ++ depends on ARM64_VA_BITS_48 ++ depends on ARM64_4K_PAGES ++ def_bool y ++ ++# Config for support of interruption of iee ++config IEE_INTERRUPTABLE ++ depends on IEE ++ def_bool n ++ ++# Config for credentials isolation ++config CREDP ++ depends on IEE ++ def_bool y ++ + config MITIGATE_SPECTRE_BRANCH_HISTORY + bool "Mitigate Spectre style attacks against branch history" if EXPERT + default y +diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h +index 38b23786aeb4..6af10d509c2e 100644 +--- a/arch/arm64/include/asm/assembler.h ++++ b/arch/arm64/include/asm/assembler.h +@@ -26,6 +26,41 @@ + #include <asm/ptrace.h> + #include <asm/thread_info.h> + ++#ifdef CONFIG_IEE ++ .macro iee_si_restore_daif, flags:req ++ msr daifclr, #0xf ++ tbnz \flags, #6, 114221f ++ tbnz \flags, #7, 114210f ++ tbnz \flags, #8, 114100f ++ msr daifset, #0b000 ++ b 114514f ++114221: ++ tbnz \flags, #7, 114211f ++ tbnz \flags, #8, 114101f ++ msr daifset, #0b001 ++ b 114514f ++114211: ++ tbnz \flags, #8, 114111f ++ msr daifset, #0b011 ++ b 114514f ++114210: ++ tbnz \flags, #8, 114110f ++ msr daifset, #0b010 ++ b 114514f ++114100: ++ msr daifset, #0b100 ++ b 114514f ++114101: ++ msr daifset, #0b101 ++ b 114514f ++114110: ++ msr daifset, #0b110 ++ b 114514f ++114111: ++ msr daifset, #0b111 ++114514: ++ .endm ++#endif + /* + * Provide a wxN alias for each wN register so what we can paste a xN + * reference after a 'w' to obtain the 32-bit version. +@@ -52,7 +87,11 @@ alternative_else_nop_endif + + .macro disable_daif + disable_allint ++// #ifdef CONFIG_IEE ++// msr daifset, #0x7 ++// #else + msr daifset, #0xf ++// #endif + .endm + + .macro enable_daif +@@ -69,7 +108,11 @@ alternative_else_nop_endif + .endm + + .macro restore_irq, flags ++// #ifdef CONFIG_IEE ++// iee_si_restore_daif \flags ++// #else + msr daif, \flags ++// #endif + .endm + + .macro enable_dbg +@@ -77,20 +120,44 @@ alternative_else_nop_endif + .endm + + .macro disable_step_tsk, flgs, tmp ++// #ifdef CONFIG_IEE ++// 1145: ++// tbz \flgs, #TIF_SINGLESTEP, 9990f ++// mrs \tmp, mdscr_el1 ++// bic \tmp, \tmp, #DBG_MDSCR_SS ++// orr \tmp, \tmp, #DBG_MDSCR_MDE ++// msr mdscr_el1, \tmp ++// isb // Synchronise with enable_dbg ++// mrs \tmp, mdscr_el1 ++// tbz \tmp, #15, 1145b ++// #else + tbz \flgs, #TIF_SINGLESTEP, 9990f + mrs \tmp, mdscr_el1 + bic \tmp, \tmp, #DBG_MDSCR_SS + msr mdscr_el1, \tmp + isb // Synchronise with enable_dbg ++// #endif + 9990: + .endm + + /* call with daif masked */ + .macro enable_step_tsk, flgs, tmp ++// #ifdef CONFIG_IEE ++// 1146: ++// tbz \flgs, #TIF_SINGLESTEP, 9990f ++// mrs \tmp, mdscr_el1 ++// orr \tmp, \tmp, #DBG_MDSCR_SS ++// orr \tmp, \tmp, #DBG_MDSCR_MDE ++// msr mdscr_el1, \tmp ++// isb // Synchronise with enable_dbg ++// mrs \tmp, mdscr_el1 ++// tbz \tmp, #15, 1146b ++// #else + tbz \flgs, #TIF_SINGLESTEP, 9990f + mrs \tmp, mdscr_el1 + orr \tmp, \tmp, #DBG_MDSCR_SS + msr mdscr_el1, \tmp ++// #endif + 9990: + .endm + +diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h +index 2417cc6b1631..cb5b4c2e03b8 100644 +--- a/arch/arm64/include/asm/daifflags.h ++++ b/arch/arm64/include/asm/daifflags.h +@@ -26,11 +26,19 @@ static inline void local_daif_mask(void) + (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET))); + ++// #ifdef CONFIG_IEE ++// asm volatile( ++// "msr daifset, #0x7 // local_daif_mask\n" ++// : ++// : ++// : "memory"); ++// #else + asm volatile( + "msr daifset, #0xf // local_daif_mask\n" + : + : + : "memory"); ++// #endif + + /* Don't really care for a dsb here, we don't intend to enable IRQs */ + if (system_uses_irq_prio_masking()) +@@ -118,7 +126,11 @@ static inline void local_daif_restore(unsigned long flags) + gic_write_pmr(pmr); + } + ++// #ifdef CONFIG_IEE ++// iee_si_write_daif(flags); ++// #else + write_sysreg(flags, daif); ++// #endif + + /* If we can take asynchronous errors we can take NMIs */ + if (system_uses_nmi()) { +@@ -151,7 +163,11 @@ static inline void local_daif_inherit(struct pt_regs *regs) + * system_has_prio_mask_debugging() won't restore the I bit if it can + * use the pmr instead. + */ ++// #ifdef CONFIG_IEE ++// iee_si_write_daif(flags); ++// #else + write_sysreg(flags, daif); ++// #endif + + /* The ALLINT field is at the same position in pstate and ALLINT */ + if (system_uses_nmi()) { +diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h +index bcd5622aa096..76c4bd6c2b20 100644 +--- a/arch/arm64/include/asm/efi.h ++++ b/arch/arm64/include/asm/efi.h +@@ -58,7 +58,11 @@ void arch_efi_call_virt_teardown(void); + #define arch_efi_save_flags(state_flags) \ + ((void)((state_flags) = read_sysreg(daif))) + ++// #ifdef CONFIG_IEE ++// #define arch_efi_restore_flags(state_flags) iee_si_write_daif(state_flags) ++// #else + #define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif) ++// #endif + + + /* arch specific definitions used by the stub code */ +diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h +index 58c294a96676..095a0731dce3 100644 +--- a/arch/arm64/include/asm/fixmap.h ++++ b/arch/arm64/include/asm/fixmap.h +@@ -108,6 +108,9 @@ void __init fixmap_copy(pgd_t *pgdir); + #define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) + + extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); ++#ifdef CONFIG_PTP ++extern void __iee_set_fixmap_pre_init(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); ++#endif + + #include <asm-generic/fixmap.h> + +diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h +index 84055329cd8b..f72d89bb9a32 100644 +--- a/arch/arm64/include/asm/hw_breakpoint.h ++++ b/arch/arm64/include/asm/hw_breakpoint.h +@@ -104,6 +104,18 @@ static inline void decode_ctrl_reg(u32 reg, + write_sysreg(VAL, dbg##REG##N##_el1);\ + } while (0) + ++#ifdef CONFIG_IEE ++#define IEE_SI_AARCH64_DBG_READ(N, REG, VAL) do{\ ++ VAL = this_cpu_read(iee_si_user_##REG##N);\ ++} while (0) ++ ++#define IEE_SI_AARCH64_DBG_WRITE(N, REG, VAL) do{\ ++ u64 __val = (u64)(VAL); \ ++ this_cpu_write(iee_si_user_##REG##N, __val);\ ++ iee_rwx_gate_entry(IEE_WRITE_AFSR0);\ ++} while (0) ++#endif ++ + struct task_struct; + struct notifier_block; + struct perf_event_attr; +diff --git a/arch/arm64/include/asm/iee-access.h b/arch/arm64/include/asm/iee-access.h +new file mode 100644 +index 000000000000..79604c21a510 +--- /dev/null ++++ b/arch/arm64/include/asm/iee-access.h +@@ -0,0 +1,36 @@ ++#ifndef _LINUX_IEE_ACCESS_H ++#define _LINUX_IEE_ACCESS_H ++ ++#include <asm/iee-def.h> ++#include <asm/iee-slab.h> ++ ++extern unsigned long long iee_rw_gate(int flag, ...); ++ ++#ifdef CONFIG_IEE ++void iee_write_in_byte(void *ptr, u64 data, int length) ++{ ++ iee_rw_gate(IEE_WRITE_IN_BYTE, ptr, data, length); ++} ++ ++void iee_memset(void *ptr, int data, size_t n) ++{ ++ iee_rw_gate(IEE_MEMSET, ptr, data, n); ++} ++ ++void iee_set_track(struct track *ptr, struct track *data) ++{ ++ iee_rw_gate(IEE_OP_SET_TRACK, ptr, data); ++} ++ ++void iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr) ++{ ++ iee_rw_gate(IEE_OP_SET_FREEPTR, pptr, ptr); ++} ++ ++void iee_write_entry_task(struct task_struct *tsk) ++{ ++ iee_rw_gate(IEE_WRITE_ENTRY_TASK, tsk); ++} ++#endif ++ ++#endif +\ No newline at end of file +diff --git a/arch/arm64/include/asm/iee-cred.h b/arch/arm64/include/asm/iee-cred.h +new file mode 100644 +index 000000000000..b8c3bb53f98a +--- /dev/null ++++ b/arch/arm64/include/asm/iee-cred.h +@@ -0,0 +1,150 @@ ++#ifndef _LINUX_IEE_CRED_H ++#define _LINUX_IEE_CRED_H ++ ++#include <linux/cred.h> ++#include <asm/iee-def.h> ++ ++extern unsigned long long iee_rw_gate(int flag, ...); ++ ++#ifdef CONFIG_CREDP ++static void __maybe_unused iee_copy_cred(const struct cred *old, struct cred *new) ++{ ++ iee_rw_gate(IEE_OP_COPY_CRED,old,new); ++} ++ ++static void __maybe_unused iee_set_cred_uid(struct cred *cred, kuid_t uid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_UID,cred,uid); ++} ++ ++static void __maybe_unused iee_set_cred_gid(struct cred *cred, kgid_t gid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_GID,cred,gid); ++} ++ ++static void __maybe_unused iee_set_cred_suid(struct cred *cred, kuid_t suid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_SUID,cred,suid); ++} ++ ++static void __maybe_unused iee_set_cred_sgid(struct cred *cred, kgid_t sgid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_SGID,cred,sgid); ++} ++ ++static void __maybe_unused iee_set_cred_euid(struct cred *cred, kuid_t euid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_EUID,cred,euid); ++} ++ ++static void __maybe_unused iee_set_cred_egid(struct cred *cred, kgid_t egid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_EGID,cred,egid); ++} ++ ++static void __maybe_unused iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_FSUID,cred,fsuid); ++} ++ ++static void __maybe_unused iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_FSGID,cred,fsgid); ++} ++ ++static void __maybe_unused iee_set_cred_user(struct cred *cred, struct user_struct *user) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_USER,cred,user); ++} ++ ++static void __maybe_unused iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_USER_NS,cred,user_ns); ++} ++ ++static void __maybe_unused iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_UCOUNTS,cred,ucounts); ++} ++ ++static void __maybe_unused iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_GROUP_INFO,cred,group_info); ++} ++ ++static void __maybe_unused iee_set_cred_securebits(struct cred *cred, unsigned securebits) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_SECUREBITS,cred,securebits); ++} ++ ++static void __maybe_unused iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_CAP_INHER,cred,cap_inheritable); ++} ++ ++static void __maybe_unused iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_CAP_PERM,cred,cap_permitted); ++} ++ ++static void __maybe_unused iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_CAP_EFFECT,cred,cap_effective); ++} ++ ++static void __maybe_unused iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_CAP_BSET,cred,cap_bset); ++} ++ ++static void __maybe_unused iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_CAP_AMBIENT,cred,cap_ambient); ++} ++ ++#ifdef CONFIG_KEYS ++static void __maybe_unused iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_JIT_KEYRING,cred,jit_keyring); ++} ++ ++static void __maybe_unused iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_SESS_KEYRING,cred,session_keyring); ++} ++ ++static void __maybe_unused iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_PROC_KEYRING,cred,process_keyring); ++} ++ ++static void __maybe_unused iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_THREAD_KEYRING,cred,thread_keyring); ++} ++ ++static void __maybe_unused iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_REQ_KEYRING,cred,request_key_auth); ++} ++#endif ++ ++static void __maybe_unused iee_set_cred_atomic_set_usage(struct cred *cred, int i) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_ATSET_USAGE,cred,i); ++} ++ ++#ifdef CONFIG_SECURITY ++static void __maybe_unused iee_set_cred_security(struct cred *cred, void *security) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_SECURITY,cred,security); ++} ++#endif ++ ++static void __maybe_unused iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_RCU,cred,rcu); ++} ++#endif ++ ++#endif +\ No newline at end of file +diff --git a/arch/arm64/include/asm/iee-def.h b/arch/arm64/include/asm/iee-def.h +new file mode 100644 +index 000000000000..76e59259e4d1 +--- /dev/null ++++ b/arch/arm64/include/asm/iee-def.h +@@ -0,0 +1,74 @@ ++// Function Identifiers with Parameters Description ++ ++#define IEE_WRITE_IN_BYTE 0 // Parameters: void *ptr, __u64 data, int length ++#define IEE_OP_SET_PTE 1 // Parameters: pte_t *ptep, pte_t pte ++#define IEE_OP_SET_PMD 2 // Parameters: pmd_t *pmdp, pmd_t pmd ++#define IEE_OP_SET_PUD 3 // Parameters: pud_t *pudp, pud_t pud ++#define IEE_OP_SET_P4D 4 // Parameters: p4d_t *p4dp, p4d_t p4d ++#define IEE_OP_SET_BM_PTE 5 // Parameters: pte_t *ptep, pte_t pte ++#define IEE_OP_SET_SWAPPER_PGD 6 // Parameters: pgd_t *pgdp, pgd_t pgd ++#define IEE_OP_SET_TRAMP_PGD 7 // Parameters: pgd_t *pgdp, pgd_t pgd ++#define IEE_OP_SET_CMPXCHG 8 // Parameters: pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval ++#define IEE_OP_SET_XCHG 9 // Parameters: pte_t *ptep, pteval_t pteval ++#define IEE_OP_COPY_CRED 10 // Parameters: struct cred *old, struct cred *new ++#define IEE_OP_SET_CRED_UID 11 // Parameters: struct cred *cred, kuid_t uid ++#define IEE_OP_SET_CRED_GID 12 // Parameters: struct cred *cred, kgid_t gid ++#define IEE_OP_SET_CRED_SUID 13 // Parameters: struct cred *cred, kuid_t suid ++#define IEE_OP_SET_CRED_SGID 14 // Parameters: struct cred *cred, kgid_t sgid ++#define IEE_OP_SET_CRED_EUID 15 // Parameters: struct cred *cred, kuid_t euid ++#define IEE_OP_SET_CRED_EGID 16 // Parameters: struct cred *cred, kgid_t egid ++#define IEE_OP_SET_CRED_FSUID 17 // Parameters: struct cred *cred, kuid_t fsuid ++#define IEE_OP_SET_CRED_FSGID 18 // Parameters: struct cred *cred, kgid_t fsgid ++#define IEE_OP_SET_CRED_USER 19 // Parameters: struct cred *cred, struct user_struct *user ++#define IEE_OP_SET_CRED_USER_NS 20 // Parameters: struct cred *cred, struct user_namespace *user_ns ++#define IEE_OP_SET_CRED_GROUP_INFO 21 // Parameters: struct cred *cred, struct group_info *group_info ++#define IEE_OP_SET_CRED_SECUREBITS 22 // Parameters: struct cred *cred, unsigned securebits ++#define IEE_OP_SET_CRED_CAP_INHER 23 // Parameters: struct cred *cred, kernel_cap_t cap_inheritable ++#define IEE_OP_SET_CRED_CAP_PERM 24 // Parameters: struct cred *cred, kernel_cap_t cap_permitted ++#define IEE_OP_SET_CRED_CAP_EFFECT 25 // Parameters: struct cred *cred, kernel_cap_t cap_effective ++#define IEE_OP_SET_CRED_CAP_BSET 26 // Parameters: struct cred *cred, kernel_cap_t cap_bset ++#define IEE_OP_SET_CRED_CAP_AMBIENT 27 // Parameters: struct cred *cred, kernel_cap_t cap_ambient ++#define IEE_OP_SET_CRED_JIT_KEYRING 28 // Parameters: struct cred *cred, unsigned char jit_keyring ++#define IEE_OP_SET_CRED_SESS_KEYRING 29 // Parameters: struct cred *cred, struct key *session_keyring ++#define IEE_OP_SET_CRED_PROC_KEYRING 30 // Parameters: struct cred *cred, struct key *process_keyring ++#define IEE_OP_SET_CRED_THREAD_KEYRING 31 // Parameters: struct cred *cred, struct key *thread_keyring ++#define IEE_OP_SET_CRED_REQ_KEYRING 32 // Parameters: struct cred *cred, struct key *request_key_auth ++#define IEE_OP_SET_CRED_NON_RCU 33 // Parameters: struct cred *cred, int non_rcu ++#define IEE_OP_SET_CRED_ATSET_USAGE 34 // Parameters: struct cred *cred, int i ++#define IEE_OP_SET_CRED_ATOP_USAGE 35 // Parameters: struct cred *cred, int flag ++#define IEE_OP_SET_CRED_SECURITY 36 // Parameters: struct cred *cred, void *security ++#define IEE_OP_SET_CRED_RCU 37 // Parameters: struct cred *cred, struct rcu_head *rcu ++#define IEE_MEMSET 38 // Parameters: void *ptr, int data, size_t n ++#define IEE_OP_SET_TRACK 39 // Parameters: struct track *ptr, struct track *data ++#define IEE_OP_SET_FREEPTR 40 // Parameters: void **pptr, void *ptr ++#define IEE_OP_SET_PTE_U 41 // Parameters: pte_t *ptep, pte_t pte ++#define IEE_OP_SET_PTE_P 42 // Parameters: pte_t *ptep, pte_t pte ++#define IEE_SET_TOKEN_MM 43 // Parameters: struct task_token *token, struct mm_struct *mm ++#define IEE_SET_TOKEN_PGD 44 // Parameters: struct task_token *token, pgd_t *pgd ++#define IEE_INIT_TOKEN 45 // Parameters: struct task_struct *tsk, void *kernel_stack, void *iee_stack ++#define IEE_FREE_TOKEN 46 // Parameters: struct task_struct *tsk ++#define IEE_READ_TOKEN_STACK 47 // Parameters: struct task_struct *tsk ++#define IEE_WRITE_ENTRY_TASK 48 // Parameters: struct task_struct *tsk ++#define IEE_OP_SET_CRED_UCOUNTS 49 // Parameters: struct cred *cred, struct ucounts *ucounts ++#ifdef CONFIG_KOI ++#define IEE_READ_KOI_STACK 50 // Parameters: struct task_struct *tsk ++#define IEE_WRITE_KOI_STACK 51 // Parameters: struct task_struct *tsk, unsigned long koi_stack ++#define IEE_READ_TOKEN_TTBR1 52 // Parameters: struct task_struct *tsk ++#define IEE_WRITE_TOKEN_TTBR1 53 // Parameters: struct task_struct *tsk, unsigned long current_ttbr1 ++#define IEE_READ_KOI_KERNEL_STACK 54 // Parameters: struct task_struct *tsk ++#define IEE_WRITE_KOI_KERNEL_STACK 55 // Parameters: struct task_struct *tsk, unsigned long kernel_stack ++#define IEE_READ_KOI_STACK_BASE 56 // Parameters: struct task_struct *tsk ++#define IEE_WRITE_KOI_STACK_BASE 57 // Parameters: struct task_struct *tsk, unsigned long koi_stack_base ++#endif ++ ++/* Add new IEE ops here */ ++ ++#define AT_ADD 1 ++#define AT_INC_NOT_ZERO 2 ++#define AT_SUB_AND_TEST 3 ++/* Atomic ops for atomic_t */ ++ ++#ifdef CONFIG_KOI ++#define IEE_SWITCH_TO_KERNEL 7 ++#define IEE_SWITCH_TO_KOI 8 ++#endif +\ No newline at end of file +diff --git a/arch/arm64/include/asm/iee-si.h b/arch/arm64/include/asm/iee-si.h +new file mode 100644 +index 000000000000..e67d81db66a5 +--- /dev/null ++++ b/arch/arm64/include/asm/iee-si.h +@@ -0,0 +1,64 @@ ++#ifndef _LINUX_IEE_SI_H ++#define _LINUX_IEE_SI_H ++ ++#include <asm/sysreg.h> ++#define __iee_si_code __section(".iee.si_text") ++#define __iee_si_data __section(".iee.si_data") ++ ++/* Used for copying globals that iee rwx gate needs. */ ++extern unsigned long iee_base_idmap_pg_dir; ++extern unsigned long iee_base_reserved_pg_dir; ++extern unsigned long iee_base__bp_harden_el1_vectors; ++extern bool iee_init_done; ++extern unsigned long iee_si_tcr; ++ ++/* The following are __init functions used for iee si initialization. */ ++extern void iee_si_prepare_data(void); ++ ++extern unsigned long __iee_si_start[]; ++// Handler function for sensitive inst ++u64 iee_si_handler(int flag, ...); ++/* ++ * TODO: scan a page to check whether it contains sensitive instructions ++ * return 1 when finding sensitive inst, 0 on safe page. ++ */ ++extern int iee_si_scan_page(unsigned long addr); ++ ++ ++#define DBG_MDSCR_SS (1 << 0) ++#define DBG_MDSCR_MDE (1 << 15) ++ ++#define IEE_SI_TEST 0 ++#define IEE_WRITE_SCTLR 1 ++#define IEE_WRITE_TTBR0 2 ++#define IEE_WRITE_VBAR 3 ++#define IEE_WRITE_TCR 4 ++#define IEE_WRITE_MDSCR 5 ++#define IEE_CONTEXT_SWITCH 6 ++// #define IEE_WRITE_AFSR0 10 ++/* Provide ttbr1 switch gate for KOI */ ++#ifdef CONFIG_KOI ++#define IEE_SWITCH_TO_KERNEL 7 ++#define IEE_SWITCH_TO_KOI 8 ++#endif ++/* MASK modify-permitted bits on IEE protected sys registers */ ++#define IEE_SCTLR_MASK (SCTLR_EL1_CP15BEN | SCTLR_EL1_SED | SCTLR_EL1_UCT | SCTLR_EL1_UCI |\ ++ SCTLR_EL1_BT0 | SCTLR_EL1_BT1 | SCTLR_EL1_TCF0_MASK | SCTLR_ELx_DSSBS |\ ++ SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB|\ ++ SCTLR_EL1_SPINTMASK | SCTLR_EL1_NMI | SCTLR_EL1_TIDCP | SCTLR_EL1_MSCEn|\ ++ SCTLR_ELx_ENTP2 | SCTLR_EL1_TCF_MASK) ++#define IEE_TTBR0_MASK ~0 ++#define IEE_TTBR1_MASK ~0 ++#define IEE_TCR_MASK (TCR_HD | TCR_T0SZ_MASK | TCR_E0PD1) ++#define IEE_MDSCR_MASK (DBG_MDSCR_SS | DBG_MDSCR_MDE) ++ ++#define IEE_DBGBCR_BT 0b0000 << 20 ++#define IEE_DBGBCR_SSC 0b00 << 14 ++#define IEE_DBGBCR_HMC 0b1 << 13 ++#define IEE_DBGBCR_BAS 0b1111 << 5 ++#define IEE_DBGBCR_PMC 0b11 << 1 ++#define IEE_DBGBCR_E 0b1 ++#define IEE_DBGBCR IEE_DBGBCR_BT | IEE_DBGBCR_SSC | IEE_DBGBCR_HMC | IEE_DBGBCR_BAS \ ++ | IEE_DBGBCR_PMC | IEE_DBGBCR_E ++ ++#endif +\ No newline at end of file +diff --git a/arch/arm64/include/asm/iee-slab.h b/arch/arm64/include/asm/iee-slab.h +new file mode 100644 +index 000000000000..4f3c17c7da00 +--- /dev/null ++++ b/arch/arm64/include/asm/iee-slab.h +@@ -0,0 +1,23 @@ ++#ifndef _LINUX_IEE_SLAB_H ++#define _LINUX_IEE_SLAB_H ++/* ++ * Tracking user of a slab. ++ */ ++#include <linux/stackdepot.h> ++ ++#define TRACK_ADDRS_COUNT 16 ++struct track { ++ unsigned long addr; /* Called from address */ ++#ifdef CONFIG_STACKDEPOT ++ depot_stack_handle_t handle; ++#endif ++ int cpu; /* Was running on cpu */ ++ int pid; /* Pid context */ ++ unsigned long when; /* When did the operation occur */ ++}; ++ ++enum track_item { TRACK_ALLOC, TRACK_FREE }; ++ ++typedef struct { unsigned long v; } freeptr_t; ++ ++#endif +\ No newline at end of file +diff --git a/arch/arm64/include/asm/iee-token.h b/arch/arm64/include/asm/iee-token.h +new file mode 100644 +index 000000000000..152474e1a187 +--- /dev/null ++++ b/arch/arm64/include/asm/iee-token.h +@@ -0,0 +1,40 @@ ++#ifndef _LINUX_IEE_TOKEN_H ++#define _LINUX_IEE_TOKEN_H ++ ++#include <asm/iee-def.h> ++ ++extern unsigned long long iee_rw_gate(int flag, ...); ++struct task_token; ++struct task_struct; ++struct mm_struct; ++ ++#ifdef CONFIG_IEE ++void iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ iee_rw_gate(IEE_SET_TOKEN_MM, tsk, mm); ++} ++ ++void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) ++{ ++ iee_rw_gate(IEE_SET_TOKEN_PGD, tsk, pgd); ++} ++ ++void iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack) ++{ ++ iee_rw_gate(IEE_INIT_TOKEN, tsk, kernel_stack, iee_stack); ++} ++ ++void iee_free_token(struct task_struct *tsk) ++{ ++ iee_rw_gate(IEE_FREE_TOKEN, tsk); ++} ++ ++unsigned long iee_read_token_stack(struct task_struct *tsk) ++{ ++ unsigned long ret; ++ ret = iee_rw_gate(IEE_READ_TOKEN_STACK, tsk); ++ return ret; ++} ++#endif ++ ++#endif +\ No newline at end of file +diff --git a/arch/arm64/include/asm/iee.h b/arch/arm64/include/asm/iee.h +new file mode 100644 +index 000000000000..598f6d0b2626 +--- /dev/null ++++ b/arch/arm64/include/asm/iee.h +@@ -0,0 +1,10 @@ ++#ifndef _LINUX_IEE_H ++#define _LINUX_IEE_H ++#define __iee_code __section(".iee.text") ++#define __iee_header __section(".iee.text.header") ++ ++u64 iee_dispatch(int flag, ...); ++ ++#include <asm/iee-def.h> ++ ++#endif +diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h +index 85d26143faa5..e7a3081ce285 100644 +--- a/arch/arm64/include/asm/kernel-pgtable.h ++++ b/arch/arm64/include/asm/kernel-pgtable.h +@@ -118,4 +118,25 @@ + #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) + #endif + ++#ifdef CONFIG_IEE ++ ++#ifdef CONFIG_ARM64_4K_PAGES // zgcXXX: it has been deleted in 6.6. ++#define ARM64_SWAPPER_USES_SECTION_MAPS 1 ++#else ++#define ARM64_SWAPPER_USES_SECTION_MAPS 0 ++#endif ++ ++#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) // zgcXXX: warning: 6.6 delete this macro. should delete this line later. ++ ++#define SWAPPER_PTE_FLAGS_IDMAP (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_RDONLY) ++#define SWAPPER_PMD_FLAGS_IDMAP (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_RDONLY) ++ ++#if ARM64_SWAPPER_USES_SECTION_MAPS ++#define SWAPPER_MM_MMUFLAGS_IDMAP (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS_IDMAP) ++#else ++#define SWAPPER_MM_MMUFLAGS_IDMAP (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS_IDMAP) ++#endif ++ ++#endif ++ + #endif /* __ASM_KERNEL_PGTABLE_H */ +diff --git a/arch/arm64/include/asm/koi.h b/arch/arm64/include/asm/koi.h +new file mode 100644 +index 000000000000..48d9a1378a1d +--- /dev/null ++++ b/arch/arm64/include/asm/koi.h +@@ -0,0 +1,335 @@ ++#include "linux/mm.h" ++#include "asm/current.h" ++#include "asm/pgtable-hwdef.h" ++#include "asm/pgtable-types.h" ++#include "asm/pgtable.h" ++#include "linux/mm_types.h" ++#include "linux/pgtable.h" ++#include "linux/printk.h" ++#include "linux/slab.h" ++#include "linux/string.h" ++#include <linux/sched.h> ++#include "linux/hashtable.h" ++#include "linux/module.h" ++#include "linux/vmalloc.h" ++#include "stacktrace.h" ++#include "asm/mmu.h" ++#ifdef CONFIG_IEE ++#include "asm/iee-si.h" ++#include "asm/iee-def.h" ++#endif ++ ++#define HASH_TABLE_BIT 10 ++#define HASH_TABLE_LEN (1 << HASH_TABLE_BIT) ++#define HASH_KEY_MASK ((1 << HASH_TABLE_BIT) - 1) ++ ++#define MAX_VAR_NAME 64 ++#define DRIVER_ISOLATION_VAR_ARRAY_SIZE 32 ++#define DRIVER_ISOLATION_MAX_VAL 256 ++ ++extern struct hlist_head koi_mem_htbl[1024]; ++extern spinlock_t koi_mem_htbl_spin_lock; ++extern unsigned long koi_swapper_ttbr1; ++extern s64 koi_offset; ++ ++#ifdef CONFIG_IEE ++extern unsigned long long iee_rw_gate(int flag, ...); ++#endif ++ ++DECLARE_PER_CPU(unsigned long[PAGE_SIZE / sizeof(unsigned long)], ++ koi_irq_current_ttbr1); ++ ++/** ++* struct koi_mem_hash_node - ++*@mod:pointer to driver module ++*@mem_list_head:free memory list head ++*@ko_mm: mm_struct in each driver ++*@pgdp:entry to Page Global Directory :pgd ++*@node:hash linked list node ++*@addr_htbl[1 << (HASH_TABLE_BIT)]: ++*@rcu: ++*/ ++struct koi_mem_hash_node { ++ struct module *mod; ++ struct list_head mem_list_head; ++ struct mm_struct *ko_mm; ++ pgd_t *pgdp; ++ unsigned long ko_ttbr1; ++ struct hlist_node node; ++ struct hlist_head addr_htbl[1 << (HASH_TABLE_BIT)]; ++ struct rcu_head rcu; ++ // used to protect free mem list ++ spinlock_t spin_lock; ++ // used to protect addr hashtable ++ spinlock_t addr_htbl_spin_lock; ++}; ++//describe the global shared var ++struct shared_variable_descriptor { ++ unsigned int id; ++ unsigned int type; ++ char name[MAX_VAR_NAME]; ++ unsigned long offset; ++ unsigned int size; ++ unsigned int self_ptr_ids[DRIVER_ISOLATION_VAR_ARRAY_SIZE]; ++}; ++ ++int koi_do_switch_to_kernel_pgtbl(void); ++ ++int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, ++ unsigned long addr, unsigned long end); ++ ++void koi_create_pagetable(struct module *mod); ++ ++void koi_map_kostack(struct module *mod); ++unsigned long koi_mem_alloc(struct module *mod, unsigned long orig_addr, ++ unsigned long size); ++void koi_mem_free(struct module *mod, unsigned long addr, unsigned long size, ++ bool is_const, int count, ...); ++void *koi_mem_lookup(struct module *mod, unsigned long addr); ++void koi_mem_free_callback(struct module *mod, unsigned long addr, ++ unsigned long size, void (*func)(void *)); ++void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size); ++void koi_mem_free_to_user(struct module *mod, unsigned long addr, ++ unsigned long size); ++ ++unsigned long koi_ttbr_ctor(struct module *mod); ++extern void koi_do_switch_to_kernel_stack(void); ++extern void koi_do_switch_to_ko_stack(void); ++ ++#define switch_pgtable(ttbr1) \ ++ do { \ ++ write_sysreg(ttbr1, ttbr1_el1); \ ++ isb(); \ ++ asm volatile(ALTERNATIVE("nop; nop; nop", \ ++ "ic iallu; dsb nsh; isb", \ ++ ARM64_WORKAROUND_CAVIUM_27456)); \ ++ } while (0); ++ ++#ifndef CONFIG_IEE ++#define koi_switch_to_ko() \ ++ do { \ ++ unsigned long flags, ko_ttbr1, cur_sp; \ ++ unsigned long *ptr; \ ++ struct task_token *token; \ ++ asm volatile("mrs %0, daif\n" \ ++ "msr daifset, #2\n" \ ++ "isb\n" \ ++ "mov %1, sp\n" \ ++ : "=r"(flags), "=r"(cur_sp) \ ++ :); \ ++ if (!on_irq_stack(cur_sp, NULL)) { \ ++ koi_do_switch_to_ko_stack(); \ ++ ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ ++ token = (struct task_token *)((unsigned long)current + \ ++ koi_offset); \ ++ token->current_ttbr1 = ko_ttbr1 & (~TTBR_ASID_MASK); \ ++ } else { \ ++ ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ ++ ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ ++ __kern_my_cpu_offset()); \ ++ *ptr = ko_ttbr1 & ~(TTBR_ASID_MASK); \ ++ } \ ++ switch_pgtable(ko_ttbr1); \ ++ asm volatile("msr daif, %0\n" \ ++ "isb\n" \ ++ : \ ++ : "r"(flags)); \ ++ } while (0); ++ ++#define koi_switch_to_kernel() \ ++ do { \ ++ unsigned long cur_sp, flags, asid; \ ++ unsigned long *ptr; \ ++ struct task_token *token; \ ++ asm volatile("mrs %0, daif\n" \ ++ "msr daifset, #2\n" \ ++ "isb\n" \ ++ "mov %1, sp\n" \ ++ "mov %2, ttbr0_el1\n" \ ++ : "=r"(flags), "=r"(cur_sp), "=r"(asid) \ ++ :); \ ++ asid &= ~USER_ASID_FLAG; \ ++ asid &= TTBR_ASID_MASK; \ ++ switch_pgtable(koi_swapper_ttbr1); \ ++ if (!on_irq_stack(cur_sp, NULL)) { \ ++ token = (struct task_token *)((unsigned long)current + \ ++ koi_offset); \ ++ token->current_ttbr1 = koi_swapper_ttbr1; \ ++ koi_do_switch_to_kernel_stack(); \ ++ } else { \ ++ ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ ++ __kern_my_cpu_offset()); \ ++ *ptr = koi_swapper_ttbr1; \ ++ } \ ++ asm volatile("msr daif, %0\n" \ ++ "isb\n" \ ++ : \ ++ : "r"(flags)); \ ++ } while (0); ++#else ++#define koi_switch_to_ko() \ ++ do { \ ++ unsigned long cur_sp, flags, ko_ttbr1; \ ++ unsigned long *ptr; \ ++ asm volatile("mrs %0, daif\n" \ ++ "msr daifset, #2\n" \ ++ "isb\n" \ ++ "mov %1, sp\n" \ ++ : "=r"(flags), "=r"(cur_sp) \ ++ :); \ ++ if (!on_irq_stack(cur_sp, NULL)) { \ ++ koi_do_switch_to_ko_stack(); \ ++ ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ ++ iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, \ ++ ko_ttbr1 &(~TTBR_ASID_MASK)); \ ++ } else { \ ++ ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ ++ ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ ++ __kern_my_cpu_offset()); \ ++ *ptr = ko_ttbr1 & (~TTBR_ASID_MASK); \ ++ } \ ++ iee_rwx_gate_entry(IEE_SWITCH_TO_KOI, ko_ttbr1); \ ++ asm volatile("msr daif, %0\n" \ ++ "isb\n" \ ++ : \ ++ : "r"(flags)); \ ++ } while (0); ++ ++#define koi_switch_to_kernel() \ ++ do { \ ++ unsigned long flags, cur_sp; \ ++ unsigned long *ptr; \ ++ asm volatile("mrs %0, daif\n" \ ++ "msr daifset, #2\n" \ ++ "isb\n" \ ++ "mov %1, sp\n" \ ++ : "=r"(flags), "=r"(cur_sp) \ ++ :); \ ++ iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); \ ++ if (!on_irq_stack(cur_sp, NULL)) { \ ++ iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, \ ++ koi_swapper_ttbr1); \ ++ koi_do_switch_to_kernel_stack(); \ ++ } else { \ ++ ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ ++ __kern_my_cpu_offset()); \ ++ *ptr = koi_swapper_ttbr1; \ ++ } \ ++ asm volatile("msr daif, %0\n" \ ++ "isb\n" \ ++ : \ ++ : "r"(flags)); \ ++ } while (0); ++#endif ++//kzalloc function in driver space ++static __maybe_unused noinline void * ++koi_kzalloc_wrapper(struct module *mod, size_t size, gfp_t flags) ++{ ++ int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; ++ void *addr; ++ struct koi_mem_hash_node *target = NULL; ++ koi_switch_to_kernel(); ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk("mem node for module: %s not found\n", mod->name); ++ return NULL; ++ } ++ ++ addr = kzalloc(size, flags); ++ koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, ++ (unsigned long)addr + PAGE_SIZE * cnt); ++ koi_switch_to_ko(); ++ return addr; ++} ++//kmalloc function in driver space ++static __maybe_unused __always_inline void * ++koi_kmalloc_wrapper(struct module *mod, size_t size, gfp_t flags) ++{ ++ int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; ++ void *addr; ++ struct koi_mem_hash_node *target = NULL; ++ koi_switch_to_kernel(); ++ ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk("mem node for module: %s not found\n", mod->name); ++ return 0; ++ } ++ ++ addr = kmalloc(cnt * PAGE_SIZE, flags); ++ koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, ++ (unsigned long)addr + PAGE_SIZE * cnt); ++ koi_switch_to_ko(); ++ return (void *)addr; ++} ++//vmalloc function in driver space ++static __maybe_unused void *koi_vmalloc_wrapper(struct module *mod, ++ unsigned long size) ++{ ++ int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; ++ void *addr; ++ struct koi_mem_hash_node *target = NULL; ++ koi_switch_to_kernel(); ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk("mem node for module: %s not found\n", mod->name); ++ koi_switch_to_ko(); ++ return 0; ++ } ++ addr = vmalloc(cnt * PAGE_SIZE); ++ koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, ++ (unsigned long)addr + PAGE_SIZE * cnt); ++ koi_switch_to_ko(); ++ return addr; ++} ++//kmalloc_array function in driver space ++static __maybe_unused void *koi_kmalloc_array_wrapper(struct module *mod, ++ size_t n, size_t size, ++ gfp_t flags) ++{ ++ int kpage; ++ void *addr; ++ struct koi_mem_hash_node *target = NULL; ++ koi_switch_to_kernel(); ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk("mem node for module: %s not found\n", mod->name); ++ koi_switch_to_ko(); ++ return 0; ++ } ++ kpage = (n * size + PAGE_SIZE - 1) / PAGE_SIZE; ++ n = (kpage * PAGE_SIZE) / size; ++ addr = kmalloc_array(n, size, flags); ++ koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, ++ (unsigned long)addr + PAGE_SIZE * kpage); ++ koi_switch_to_ko(); ++ return addr; ++} +\ No newline at end of file +diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h +index fde4186cc387..6309e5514a16 100644 +--- a/arch/arm64/include/asm/memory.h ++++ b/arch/arm64/include/asm/memory.h +@@ -190,6 +190,13 @@ extern u64 vabits_actual; + #endif + + extern s64 memstart_addr; ++ ++#if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++extern s64 memstart_addr_init; ++extern s64 iee_offset; ++#define LOGICAL_RANDOM (long long int)((long unsigned int)__va(memstart_addr_init) & (~PAGE_OFFSET)) ++#endif ++ + /* PHYS_OFFSET - the physical address of the start of memory. */ + #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) + +@@ -310,6 +317,23 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); + #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) + #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) + ++#ifdef CONFIG_KOI ++#define KOI_OFFSET ((unsigned long)BIT(vabits_actual - 2)) ++#endif ++ ++#ifdef CONFIG_IEE ++#ifdef CONFIG_IEE_OFFSET ++#define IEE_OFFSET ((CONFIG_IEE_OFFSET) - LOGICAL_RANDOM) ++#else ++#define IEE_OFFSET (((unsigned long)BIT(vabits_actual - 2)) - LOGICAL_RANDOM) ++#endif ++#define __phys_to_iee(x) (__phys_to_virt(x) + IEE_OFFSET) ++#define SET_UPAGE(x) __pgprot(pgprot_val(x) | PTE_USER) ++#define SET_PPAGE(x) __pgprot(pgprot_val(x) & (~PTE_USER)) ++#define SET_INVALID(x) __pgprot(pgprot_val(x) & (~PTE_VALID)) ++#define SET_NG(x) __pgprot(pgprot_val(x) | PTE_NG) ++#endif ++ + /* + * Convert a page to/from a physical address + */ +diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h +index a6fb325424e7..cca5994dabfb 100644 +--- a/arch/arm64/include/asm/mmu_context.h ++++ b/arch/arm64/include/asm/mmu_context.h +@@ -24,6 +24,9 @@ + #include <asm/cputype.h> + #include <asm/sysreg.h> + #include <asm/tlbflush.h> ++#ifdef CONFIG_IEE ++#define INIT_ASID 0x2 ++#endif + + extern bool rodata_full; + +@@ -43,7 +46,12 @@ static inline void cpu_set_reserved_ttbr0_nosync(void) + { + unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); + ++#ifdef CONFIG_IEE ++ ttbr |= FIELD_PREP(TTBR_ASID_MASK, 1); ++ iee_rwx_gate_entry(IEE_WRITE_ttbr0_el1, ttbr); ++#else + write_sysreg(ttbr, ttbr0_el1); ++#endif + } + + static inline void cpu_set_reserved_ttbr0(void) +@@ -79,7 +87,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) + + tcr &= ~TCR_T0SZ_MASK; + tcr |= t0sz << TCR_T0SZ_OFFSET; ++#ifdef CONFIG_IEE ++ iee_rwx_gate_entry(IEE_WRITE_tcr_el1, tcr); ++#else + write_sysreg(tcr, tcr_el1); ++#endif + isb(); + } + +@@ -144,7 +156,11 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) + __cpu_set_tcr_t0sz(t0sz); + + /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ ++ #ifdef CONFIG_IEE ++ iee_rwx_gate_entry(IEE_WRITE_ttbr0_el1, ttbr0); ++ #else + write_sysreg(ttbr0, ttbr0_el1); ++ #endif + isb(); + } + +@@ -174,6 +190,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) + ttbr1 |= TTBR_CNP_BIT; + } + ++ #ifdef CONFIG_IEE ++ ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, ASID(current->active_mm)); ++ #endif ++ + replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); + + __cpu_install_idmap(idmap); +diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h +index 237224484d0f..4e3304da8421 100644 +--- a/arch/arm64/include/asm/pgalloc.h ++++ b/arch/arm64/include/asm/pgalloc.h +@@ -63,6 +63,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) + extern pgd_t *pgd_alloc(struct mm_struct *mm); + extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); + ++#ifdef CONFIG_KOI ++pgd_t *koi_pgd_alloc(void); ++#endif ++ + static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot) + { +diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h +index e4944d517c99..7f60e568c964 100644 +--- a/arch/arm64/include/asm/pgtable-hwdef.h ++++ b/arch/arm64/include/asm/pgtable-hwdef.h +@@ -84,6 +84,13 @@ + #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) + #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) + ++#ifdef CONFIG_IEE ++#define PGD_APT_RO (_AT(pudval_t, 1) << 62) ++#endif ++#define PGD_APT (_AT(pudval_t, 1) << 61) ++#define PGD_PXN (_AT(pudval_t, 1) << 59) ++#define PGD_UXN (_AT(pudval_t, 1) << 60) ++ + /* + * Hardware page table definitions. + * +@@ -285,6 +292,10 @@ + #define TCR_TCMA0 (UL(1) << 57) + #define TCR_TCMA1 (UL(1) << 58) + ++#ifdef CONFIG_IEE ++#define TCR_HPD1 (UL(1) << 42) ++#endif ++ + /* + * TTBR. + */ +diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h +index 8d68d00de0a4..6f8d5b85bfd7 100644 +--- a/arch/arm64/include/asm/pgtable.h ++++ b/arch/arm64/include/asm/pgtable.h +@@ -34,6 +34,9 @@ + #include <linux/mm_types.h> + #include <linux/sched.h> + #include <linux/page_table_check.h> ++#ifdef CONFIG_PTP ++#include <asm/iee.h> ++#endif + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +@@ -156,6 +159,30 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) + #define pud_access_permitted(pud, write) \ + (pte_access_permitted(pud_pte(pud), (write))) + ++#ifdef CONFIG_PTP ++static inline bool in_tramp_pgdir(void *addr); ++extern unsigned long long iee_rw_gate(int flag, ...); ++ ++static void iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) ++{ ++ iee_rw_gate(IEE_OP_SET_TRAMP_PGD, pgdp, pgd); ++} ++ ++static noinline pteval_t iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval) ++{ ++ pteval_t ret; ++ ret = iee_rw_gate(IEE_OP_SET_XCHG, ptep, pteval); ++ return (pteval_t)ret; ++} ++ ++static noinline pteval_t iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval) ++{ ++ pteval_t ret; ++ ret = iee_rw_gate(IEE_OP_SET_CMPXCHG, ptep, old_pteval, new_pteval); ++ return (pteval_t)ret; ++} ++#endif ++ + static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) + { + pte_val(pte) &= ~pgprot_val(prot); +@@ -262,6 +289,64 @@ static inline pte_t pte_mkdevmap(pte_t pte) + + static inline void __set_pte(pte_t *ptep, pte_t pte) + { ++#ifdef CONFIG_KOI ++ if (!pte_none(pte)) { ++ pte = __pte(pte_val(pte) | PTE_NG); ++ } ++#endif ++#ifdef CONFIG_PTP ++ iee_rw_gate(IEE_OP_SET_PTE, ptep, pte); ++ dsb(ishst); ++ isb(); ++#else ++ WRITE_ONCE(*ptep, pte); ++ ++ /* ++ * Only if the new pte is valid and kernel, otherwise TLB maintenance ++ * or update_mmu_cache() have the necessary barriers. ++ */ ++ if (pte_valid_not_user(pte)) { ++ dsb(ishst); ++ isb(); ++ } ++#endif ++} ++ ++ ++#ifdef CONFIG_PTP ++static inline void iee_set_bm_pte(pte_t *ptep, pte_t pte) ++{ ++ // If it is pre init, write once. ++ // Else, write once will cause exception. So it is safe. ++ unsigned long flags; ++ unsigned long res; ++ local_irq_save(flags); ++ asm volatile("at s1e1r, %0"::"r"(__phys_to_iee(__pa_symbol(ptep)))); ++ isb(); ++ res = read_sysreg(par_el1); ++ local_irq_restore(flags); ++ if(res & 0x1) ++ WRITE_ONCE(*ptep,pte); ++ else ++ iee_rw_gate(IEE_OP_SET_BM_PTE, ptep, pte); ++ ++ /* ++ * Only if the new pte is valid and kernel, otherwise TLB maintenance ++ * or update_mmu_cache() have the necessary barriers. ++ */ ++ if (pte_valid_not_user(pte)) { ++ dsb(ishst); ++ isb(); ++ } ++} ++ ++static inline void iee_set_fixmap_pte_pre_init(pte_t *ptep, pte_t pte) ++{ ++#ifdef CONFIG_KOI ++ if (!pte_none(pte)) { ++ pte = __pte(pte_val(pte) | PTE_NG); ++ } ++#endif + WRITE_ONCE(*ptep, pte); + + /* +@@ -273,6 +358,7 @@ static inline void __set_pte(pte_t *ptep, pte_t pte) + isb(); + } + } ++#endif + + static inline pte_t __ptep_get(pte_t *ptep) + { +@@ -546,6 +632,95 @@ static inline void __set_pte_at(struct mm_struct *mm, + __set_pte(ptep, pte); + } + ++#ifdef CONFIG_IEE ++static inline void iee_set_pte_upage(pte_t *ptep, pte_t pte) ++{ ++#ifdef CONFIG_PTP ++ iee_rw_gate(IEE_OP_SET_PTE_U, ptep, pte); ++ dsb(ishst); ++ isb(); ++#else ++ WRITE_ONCE(*ptep, pte); ++ if (pte_valid_not_user(pte)) { ++ dsb(ishst); ++ isb(); ++ } ++#endif ++} ++ ++static inline void iee_set_pte_ppage(pte_t *ptep, pte_t pte) ++{ ++#ifdef CONFIG_PTP ++ iee_rw_gate(IEE_OP_SET_PTE_P, ptep, pte); ++#else ++ WRITE_ONCE(*ptep, pte); ++#endif ++ if (pte_valid_not_user(pte)) { ++ dsb(ishst); ++ isb(); ++ } ++} ++#endif ++ ++#ifdef CONFIG_PTP ++static inline void set_pmd(pmd_t *pmdp, pmd_t pmd); ++static inline void __set_pmd_at(struct mm_struct *mm, unsigned long addr, ++ pmd_t *pmdp, pmd_t pmd) ++{ ++ if (pte_present(pmd_pte(pmd)) && pte_user_exec(pmd_pte(pmd)) && !pte_special(pmd_pte(pmd))) ++ __sync_icache_dcache(pmd_pte(pmd)); ++ ++ /* ++ * If the PTE would provide user space access to the tags associated ++ * with it then ensure that the MTE tags are synchronised. Although ++ * pte_access_permitted() returns false for exec only mappings, they ++ * don't expose tags (instruction fetches don't check tags). ++ */ ++ if (system_supports_mte() && pte_access_permitted(pmd_pte(pmd), false) && ++ !pte_special(pmd_pte(pmd)) && pte_tagged(pmd_pte(pmd))) ++ mte_sync_tags(pmd_pte(pmd), PMD_SIZE >> PAGE_SHIFT); ++ ++ __check_safe_pte_update(mm, (pte_t *)pmdp, pmd_pte(pmd)); ++ ++ set_pmd(pmdp, pmd); ++} ++ ++static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, ++ pmd_t *pmdp, pmd_t pmd) ++{ ++ page_table_check_pmd_set(mm, pmdp, pmd); ++ return __set_pmd_at(mm, addr, pmdp, pmd); ++} ++ ++static inline void set_pud(pud_t *pudp, pud_t pud); ++static inline void __set_pud_at(struct mm_struct *mm, unsigned long addr, ++ pud_t *pudp, pud_t pud) ++{ ++ if (pte_present(pud_pte(pud)) && pte_user_exec(pud_pte(pud)) && !pte_special(pud_pte(pud))) ++ __sync_icache_dcache(pud_pte(pud)); ++ ++ /* ++ * If the PTE would provide user space access to the tags associated ++ * with it then ensure that the MTE tags are synchronised. Although ++ * pte_access_permitted() returns false for exec only mappings, they ++ * don't expose tags (instruction fetches don't check tags). ++ */ ++ if (system_supports_mte() && pte_access_permitted(pud_pte(pud), false) && ++ !pte_special(pud_pte(pud)) && pte_tagged(pud_pte(pud))) ++ mte_sync_tags(pud_pte(pud), PUD_SIZE >> PAGE_SHIFT); ++ ++ __check_safe_pte_update(mm, (pte_t *)pudp, pud_pte(pud)); ++ ++ set_pud(pudp, pud); ++} ++ ++static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, ++ pud_t *pudp, pud_t pud) ++{ ++ page_table_check_pud_set(mm, pudp, pud); ++ return __set_pud_at(mm, addr, pudp, pud); ++} ++#else + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) + { +@@ -561,7 +736,7 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), + PUD_SIZE >> PAGE_SHIFT); + } +- ++#endif + #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) + #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) + +@@ -640,7 +815,14 @@ static inline bool in_swapper_pgdir(void *addr) + ((unsigned long)swapper_pg_dir & PAGE_MASK); + } + +-static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) ++#ifdef CONFIG_PTP ++static inline bool in_tramp_pgdir(void *addr) ++{ ++ return ((unsigned long)addr & PAGE_MASK) == ++ ((unsigned long)tramp_pg_dir & PAGE_MASK); ++} ++ ++static inline void iee_set_fixmap_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) + { + #ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { +@@ -648,7 +830,6 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) + return; + } + #endif /* __PAGETABLE_PMD_FOLDED */ +- + WRITE_ONCE(*pmdp, pmd); + + if (pmd_valid(pmd)) { +@@ -656,6 +837,32 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) + isb(); + } + } ++#endif ++ ++static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) ++{ ++#ifdef __PAGETABLE_PMD_FOLDED ++ if (in_swapper_pgdir(pmdp)) { ++ set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); ++ return; ++ } ++#endif /* __PAGETABLE_PMD_FOLDED */ ++#ifdef CONFIG_KOI ++ pmdval_t val = pmd_val(pmd); ++ if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { ++ pmd = __pmd(val | PMD_SECT_NG); ++ } ++#endif ++#ifdef CONFIG_PTP ++ iee_rw_gate(IEE_OP_SET_PMD, pmdp, pmd); ++#else ++ WRITE_ONCE(*pmdp, pmd); ++#endif ++ if (pmd_valid(pmd)) { ++ dsb(ishst); ++ isb(); ++ } ++} + + static inline void pmd_clear(pmd_t *pmdp) + { +@@ -675,6 +882,12 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) + /* Find an entry in the third-level page table. */ + #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) + ++#ifdef CONFIG_PTP ++#define pte_set_fixmap_init(addr) ((pte_t *)iee_set_fixmap_offset_pre_init(FIX_PTE, addr)) ++#define pte_set_fixmap_offset_init(pmd, addr) pte_set_fixmap_init(pte_offset_phys(pmd, addr)) ++#define pte_clear_fixmap_init() clear_fixmap_init(FIX_PTE) ++#endif ++ + #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) + #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) + #define pte_clear_fixmap() clear_fixmap(FIX_PTE) +@@ -703,7 +916,9 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) + #define pud_user(pud) pte_user(pud_pte(pud)) + #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) + +-static inline void set_pud(pud_t *pudp, pud_t pud) ++ ++#ifdef CONFIG_PTP ++static inline void iee_set_fixmap_pud_pre_init(pud_t *pudp, pud_t pud) + { + #ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { +@@ -711,7 +926,6 @@ static inline void set_pud(pud_t *pudp, pud_t pud) + return; + } + #endif /* __PAGETABLE_PUD_FOLDED */ +- + WRITE_ONCE(*pudp, pud); + + if (pud_valid(pud)) { +@@ -719,6 +933,33 @@ static inline void set_pud(pud_t *pudp, pud_t pud) + isb(); + } + } ++#endif ++ ++static inline void set_pud(pud_t *pudp, pud_t pud) ++{ ++#ifdef __PAGETABLE_PUD_FOLDED ++ if (in_swapper_pgdir(pudp)) { ++ set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); ++ return; ++ } ++#endif /* __PAGETABLE_PUD_FOLDED */ ++#ifdef CONFIG_KOI ++ pudval_t val = pud_val(pud); ++ if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { ++ // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. ++ pud = __pud(val | PMD_SECT_NG); ++ } ++#endif ++#ifdef CONFIG_PTP ++ iee_rw_gate(IEE_OP_SET_PUD, pudp, pud); ++#else ++ WRITE_ONCE(*pudp, pud); ++#endif ++ if (pud_valid(pud)) { ++ dsb(ishst); ++ isb(); ++ } ++} + + static inline void pud_clear(pud_t *pudp) + { +@@ -738,6 +979,12 @@ static inline pmd_t *pud_pgtable(pud_t pud) + /* Find an entry in the second-level page table. */ + #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) + ++#ifdef CONFIG_PTP ++#define pmd_set_fixmap_init(addr) ((pmd_t *)iee_set_fixmap_offset_pre_init(FIX_PMD, addr)) ++#define pmd_set_fixmap_offset_init(pud, addr) pmd_set_fixmap_init(pmd_offset_phys(pud, addr)) ++#define pmd_clear_fixmap_init() clear_fixmap_init(FIX_PMD) ++#endif ++ + #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) + #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) + #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) +@@ -769,15 +1016,26 @@ static inline pmd_t *pud_pgtable(pud_t pud) + #define p4d_none(p4d) (!p4d_val(p4d)) + #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) + #define p4d_present(p4d) (p4d_val(p4d)) ++#define p4d_valid(p4d) pte_valid(p4d_pte(p4d)) + + static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) + { +- if (in_swapper_pgdir(p4dp)) { ++ if (in_swapper_pgdir(p4dp)) ++ { + set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); + return; + } + ++#ifdef CONFIG_PTP ++ if(in_tramp_pgdir(p4dp)) ++ { ++ iee_set_tramp_pgd_pre_init((pgd_t *)p4dp, __pgd(p4d_val(p4d))); ++ return; ++ } ++ iee_rw_gate(IEE_OP_SET_P4D, p4dp, p4d); ++#else + WRITE_ONCE(*p4dp, p4d); ++#endif + dsb(ishst); + isb(); + } +@@ -800,6 +1058,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) + /* Find an entry in the first-level page table. */ + #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) + ++#ifdef CONFIG_PTP ++#define pud_set_fixmap_init(addr) ((pud_t *)iee_set_fixmap_offset_pre_init(FIX_PUD, addr)) ++#define pud_set_fixmap_offset_init(p4d, addr) pud_set_fixmap_init(pud_offset_phys(p4d, addr)) ++#define pud_clear_fixmap_init() clear_fixmap_init(FIX_PUD) ++#endif ++ + #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) + #define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) + #define pud_clear_fixmap() clear_fixmap(FIX_PUD) +@@ -826,6 +1090,10 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) + #define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) + ++#ifdef CONFIG_PTP ++#define pgd_set_fixmap_init(addr) ((pgd_t *)iee_set_fixmap_offset_pre_init(FIX_PGD, addr)) ++#define pgd_clear_fixmap_init() clear_fixmap_init(FIX_PGD) ++#endif + #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) + #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) + +@@ -912,8 +1180,13 @@ static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, + do { + old_pte = pte; + pte = pte_mkold(pte); ++ #ifdef CONFIG_PTP ++ pte_val(pte) = iee_set_cmpxchg_relaxed(ptep, ++ pte_val(old_pte), pte_val(pte)); ++ #else + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); ++ #endif + } while (pte_val(pte) != pte_val(old_pte)); + + return pte_young(pte); +@@ -952,8 +1225,12 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) + { ++ #ifdef CONFIG_PTP ++ pteval_t pteval= iee_set_xchg_relaxed((pte_t *)&pte_val(*ptep), (pteval_t)0); ++ pte_t pte = __pte(pteval); ++ #else + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); +- ++ #endif + page_table_check_pte_clear(mm, pte); + + return pte; +@@ -995,7 +1272,12 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, + static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) + { ++ #ifdef CONFIG_PTP ++ pteval_t pteval= iee_set_xchg_relaxed((pte_t *)&pmd_val(*pmdp), (pteval_t)0); ++ pmd_t pmd = __pmd(pteval); ++ #else + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); ++ #endif + + page_table_check_pmd_clear(mm, pmd); + +@@ -1012,8 +1294,12 @@ static inline void ___ptep_set_wrprotect(struct mm_struct *mm, + do { + old_pte = pte; + pte = pte_wrprotect(pte); ++ #ifdef CONFIG_PTP ++ pte_val(pte) = iee_set_cmpxchg_relaxed(ptep,pte_val(old_pte), pte_val(pte)); ++ #else + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); ++ #endif + } while (pte_val(pte) != pte_val(old_pte)); + } + +@@ -1049,7 +1335,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, pmd_t pmd) + { + page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); ++ #ifdef CONFIG_PTP ++ return __pmd((pmdval_t)iee_set_xchg_relaxed((pte_t *)&pmd_val(*pmdp), (pmdval_t)pmd_val(pmd))); ++ #else + return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); ++ #endif + } + #endif + +diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h +index d2e0306e65d3..8352e92d4536 100644 +--- a/arch/arm64/include/asm/pointer_auth.h ++++ b/arch/arm64/include/asm/pointer_auth.h +@@ -108,8 +108,13 @@ static __always_inline void ptrauth_enable(void) + { + if (!system_supports_address_auth()) + return; ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | ++ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); ++ #else + sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); ++ #endif + isb(); + } + +diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h +index 42358b8d678e..1e2d11e57fe3 100644 +--- a/arch/arm64/include/asm/sysreg.h ++++ b/arch/arm64/include/asm/sysreg.h +@@ -1116,6 +1116,64 @@ + write_sysreg_s(__scs_new, sysreg); \ + } while (0) + ++ ++#ifdef CONFIG_IEE ++ ++#define SYS_TCR_IEE_SI TCR_HPD1 | TCR_A1 ++ ++extern void iee_rwx_gate_entry(int flag, ...); ++#define IEE_SI_TEST 0 ++#define IEE_WRITE_sctlr_el1 1 ++#define IEE_WRITE_ttbr0_el1 2 ++#define IEE_WRITE_vbar_el1 3 ++#define IEE_WRITE_tcr_el1 4 ++#define IEE_WRITE_mdscr_el1 5 ++#define IEE_WRITE_AFSR0 10 ++ ++#define sysreg_clear_set_iee_si(sysreg, clear, set) do { \ ++ u64 __scs_val = read_sysreg(sysreg); \ ++ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ ++ if (__scs_new != __scs_val) \ ++ iee_rwx_gate_entry(IEE_WRITE_##sysreg, __scs_new); \ ++} while (0) ++ ++#define IEE_SI_WRITE_DAIF_SEL "msr daifclr, #0xf\n\t" \ ++ "tbnz %x0, #6, 114221f\n\t" \ ++ "tbnz %x0, #7, 114210f\n\t" \ ++ "tbnz %x0, #8, 114100f\n\t" \ ++ "msr daifset, #0b000\n\t" \ ++ "b 114514f\n\t" \ ++"114221:\n\t" \ ++ "tbnz %x0, #7, 114211f\n\t" \ ++ "tbnz %x0, #8, 114101f\n\t" \ ++ "msr daifset, #0b001\n\t" \ ++ "b 114514f\n\t" \ ++"114211:\n\t" \ ++ "tbnz %x0, #8, 114111f\n\t" \ ++ "msr daifset, #0b011\n\t" \ ++ "b 114514f\n\t" \ ++"114210:\n\t" \ ++ "tbnz %x0, #8, 114110f\n\t" \ ++ "msr daifset, #0b010\n\t" \ ++ "b 114514f\n\t" \ ++"114100:\n\t" \ ++ "msr daifset, #0b100\n\t" \ ++ "b 114514f\n\t" \ ++"114101:\n\t" \ ++ "msr daifset, #0b101\n\t" \ ++ "b 114514f\n\t" \ ++"114110:\n\t" \ ++ "msr daifset, #0b110\n\t" \ ++ "b 114514f\n\t" \ ++"114111:\n\t" \ ++ "msr daifset, #0b111\n\t" \ ++"114514:\n\t" ++ ++#define iee_si_write_daif(v) do { \ ++ u64 __val = (u64)(v); \ ++ asm volatile(IEE_SI_WRITE_DAIF_SEL: : "rZ" (__val));} while (0) ++#endif ++ + #define read_sysreg_par() ({ \ + u64 par; \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ +diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h +index 2c29239d05c3..955f99317790 100644 +--- a/arch/arm64/include/asm/tlb.h ++++ b/arch/arm64/include/asm/tlb.h +@@ -11,8 +11,17 @@ + #include <linux/pagemap.h> + #include <linux/swap.h> + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + static inline void __tlb_remove_table(void *_table) + { ++#ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(page_to_phys((struct page *)_table)); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)page_address((struct page *)_table)); ++#endif + free_page_and_swap_cache((struct page *)_table); + } + +diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h +index 831c314d75ff..7775628528c6 100644 +--- a/arch/arm64/include/asm/tlbflush.h ++++ b/arch/arm64/include/asm/tlbflush.h +@@ -49,6 +49,7 @@ + + #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) + ++ + #define __tlbi_user(op, arg) do { \ + if (arm64_kernel_unmapped_at_el0()) \ + __tlbi(op, (arg) | USER_ASID_FLAG); \ +@@ -258,6 +259,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) + asid = __TLBI_VADDR(0, ASID(mm)); + __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); ++ #if defined(CONFIG_IEE) || defined (CONFIG_KOI) ++ if (!arm64_kernel_unmapped_at_el0()) ++ __tlbi(aside1is, asid | USER_ASID_FLAG); ++ #endif + dsb(ish); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); + } +@@ -273,6 +278,10 @@ static inline void __flush_tlb_page_nosync(struct mm_struct *mm, + __tlbi_user(vale1is, addr); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); ++ #if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ if (!arm64_kernel_unmapped_at_el0()) ++ __tlbi(vale1is, addr | USER_ASID_FLAG); ++ #endif + } + + static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, +@@ -366,6 +375,45 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) + * 2. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. + */ ++#if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++#define __flush_tlb_range_op(op, start, pages, stride, \ ++ asid, tlb_level, tlbi_user) \ ++do { \ ++ int num = 0; \ ++ int scale = 0; \ ++ unsigned long addr; \ ++ \ ++ while (pages > 0) { \ ++ if (!system_supports_tlb_range() || \ ++ pages % 2 == 1) { \ ++ addr = __TLBI_VADDR(start, asid); \ ++ __tlbi_level(op, addr, tlb_level); \ ++ if (!arm64_kernel_unmapped_at_el0()) /* added for IEE */ \ ++ __tlbi_level(op, addr | USER_ASID_FLAG, tlb_level); \ ++ if (tlbi_user) \ ++ __tlbi_user_level(op, addr, tlb_level); \ ++ start += stride; \ ++ pages -= stride >> PAGE_SHIFT; \ ++ continue; \ ++ } \ ++ \ ++ num = __TLBI_RANGE_NUM(pages, scale); \ ++ if (num >= 0) { \ ++ addr = __TLBI_VADDR_RANGE(start, asid, scale, \ ++ num, tlb_level); \ ++ __tlbi(r##op, addr); \ ++ if (!arm64_kernel_unmapped_at_el0()) /* added for IEE */ \ ++ __tlbi(r##op, addr | USER_ASID_FLAG); \ ++ if (tlbi_user) \ ++ __tlbi_user(r##op, addr); \ ++ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ ++ pages -= __TLBI_RANGE_PAGES(num, scale); \ ++ } \ ++ scale++; \ ++ } \ ++} while (0) ++ ++#else + #define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user) \ + do { \ +@@ -399,6 +447,8 @@ do { \ + } \ + } while (0) + ++#endif //if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ + #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + +@@ -467,7 +517,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end + return; + } + +- start = __TLBI_VADDR(start, 0); ++ start = __TLBI_VADDR(start, 0); + end = __TLBI_VADDR(end, 0); + + dsb(ishst); +@@ -483,9 +533,9 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end + */ + static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) + { +- unsigned long addr = __TLBI_VADDR(kaddr, 0); +- +- dsb(ishst); ++ unsigned long addr = __TLBI_VADDR(kaddr, 0); ++ ++ dsb(ishst); + __tlbi(vaae1is, addr); + dsb(ish); + isb(); +diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile +index 21ef9c21a400..44eb76cc54d7 100644 +--- a/arch/arm64/kernel/Makefile ++++ b/arch/arm64/kernel/Makefile +@@ -36,6 +36,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ + syscall.o proton-pack.o idreg-override.o idle.o \ + patching.o + ++obj-y += iee/ ++obj-$(CONFIG_KOI) += koi/ + obj-$(CONFIG_AARCH32_EL0) += binfmt_elf32.o sys32.o signal32.o \ + sys_compat.o + obj-$(CONFIG_AARCH32_EL0) += sigreturn32.o +diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c +index fd0f291e215e..c008e46b5fc0 100644 +--- a/arch/arm64/kernel/armv8_deprecated.c ++++ b/arch/arm64/kernel/armv8_deprecated.c +@@ -306,11 +306,19 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) + + static int cp15_barrier_set_hw_mode(bool enable) + { ++#ifdef CONFIG_IEE ++ if (enable) ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_CP15BEN); ++ else ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_CP15BEN, 0); ++ return 0; ++#else + if (enable) + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_CP15BEN); + else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_CP15BEN, 0); + return 0; ++#endif + } + + static bool try_emulate_cp15_barrier(struct pt_regs *regs, u32 insn) +@@ -341,11 +349,19 @@ static int setend_set_hw_mode(bool enable) + if (!cpu_supports_mixed_endian_el0()) + return -EINVAL; + ++#ifdef CONFIG_IEE ++ if (enable) ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_CP15BEN); ++ else ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_CP15BEN, 0); ++ return 0; ++#else + if (enable) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SED, 0); + else + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SED); + return 0; ++#endif + } + + static int __a32_setend_handler(struct pt_regs *regs, u32 big_endian) +diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c +index e997ad275afb..e105f633355a 100644 +--- a/arch/arm64/kernel/asm-offsets.c ++++ b/arch/arm64/kernel/asm-offsets.c +@@ -97,6 +97,17 @@ int main(void) + DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp)); + #endif + DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); ++#ifdef CONFIG_IEE ++ DEFINE(iee_from_token_offset, offsetof(struct task_token, iee_stack)); ++ DEFINE(kernel_from_token_offset, offsetof(struct task_token, kernel_stack)); ++ DEFINE(mm_from_task_offset, offsetof(struct task_struct, mm)); ++#endif ++#ifdef CONFIG_KOI ++ DEFINE(koi_kernel_from_token_offset, offsetof(struct task_token, koi_kernel_stack)); ++ DEFINE(koi_from_token_offset, offsetof(struct task_token, koi_stack)); ++ DEFINE(ttbr1_from_token_offset, offsetof(struct task_token, current_ttbr1)); ++ DEFINE(koi_stack_base_from_token_offset, offsetof(struct task_token, koi_stack_base)); ++#endif + BLANK(); + #endif + #ifdef CONFIG_AARCH32_EL0 +diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c +index 46813132a09f..412006aa323c 100644 +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -80,7 +80,11 @@ hisilicon_1980005_enable(const struct arm64_cpu_capabilities *__unused) + __set_bit(ARM64_HAS_CACHE_IDC, system_cpucaps); + arm64_ftr_reg_ctrel0.sys_val |= BIT(CTR_EL0_IDC_SHIFT); + arm64_ftr_reg_ctrel0.strict_mask &= ~BIT(CTR_EL0_IDC_SHIFT); ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); ++#else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); ++#endif + } + #endif + +@@ -132,7 +136,11 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) + enable_uct_trap = true; + + if (enable_uct_trap) ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); ++#else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); ++#endif + } + + #ifdef CONFIG_ARM64_ERRATUM_1463225 +@@ -147,7 +155,11 @@ has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, + static void __maybe_unused + cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) + { ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCI, 0); ++#else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); ++#endif + } + + #ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH +diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c +index 74e445251b51..ebc1e776b175 100644 +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -94,6 +94,10 @@ + #include <asm/vectors.h> + #include <asm/virt.h> + ++#ifdef CONFIG_IEE ++#include <asm/iee-si.h> ++#endif ++ + /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ + static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; + +@@ -1612,7 +1616,11 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); ++#else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); ++#endif + } + + static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, +@@ -1873,7 +1881,11 @@ static inline void __cpu_enable_hw_dbm(void) + { + u64 tcr = read_sysreg(tcr_el1) | TCR_HD; + ++#ifdef CONFIG_IEE ++ iee_rwx_gate_entry(IEE_WRITE_tcr_el1, tcr); ++#else + write_sysreg(tcr, tcr_el1); ++#endif + isb(); + local_flush_tlb_all(); + } +@@ -2056,7 +2068,9 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) + */ + WARN_ON_ONCE(in_interrupt()); + ++ #ifndef CONFIG_IEE + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); ++ #endif + set_pstate_pan(1); + } + #endif /* CONFIG_ARM64_PAN */ +@@ -2121,7 +2135,11 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, + static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) + { + if (this_cpu_has_cap(ARM64_HAS_E0PD)) ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(tcr_el1, 0, TCR_E0PD1); ++#else + sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); ++#endif + } + #endif /* CONFIG_ARM64_E0PD */ + +@@ -2214,7 +2232,11 @@ static void nmi_enable(const struct arm64_cpu_capabilities *__unused) + * avoid leaving things masked. + */ + _allint_clear(); ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); ++ #else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); ++ #endif + isb(); + } + #endif +@@ -2229,7 +2251,11 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) + * So, be strict and forbid other BRs using other registers to + * jump onto a PACIxSP instruction: + */ ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); ++#else + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); ++#endif + isb(); + } + #endif /* CONFIG_ARM64_BTI */ +@@ -2237,7 +2263,11 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) + #ifdef CONFIG_ARM64_MTE + static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) + { ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); ++ #else + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); ++ #endif + + mte_cpu_setup(); + +@@ -2271,7 +2301,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in + + static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) + { ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_TIDCP); ++ #else + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); ++ #endif + } + + static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) +@@ -2281,7 +2315,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) + + static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) + { ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_MSCEn); ++ #else + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); ++ #endif + } + + /* Internal helper functions to match cpu capability type */ +@@ -3475,6 +3513,43 @@ static void __init setup_system_capabilities(void) + enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); + } + ++#ifdef CONFIG_IEE ++ ++static void iee_si_test_end(void) ++{ ++ pr_info("IEE: testing iee_exec_entry sctlr...\n"); ++ iee_rwx_gate_entry(IEE_WRITE_SCTLR, read_sysreg(sctlr_el1)& ~SCTLR_ELx_M); ++ pr_info("IEE: testing iee_exec_entry ttbr0_el1...\n"); ++ iee_rwx_gate_entry(IEE_WRITE_TTBR0, read_sysreg(ttbr0_el1)); ++ pr_info("IEE: testing iee_exec_entry vbar...\n"); ++ iee_rwx_gate_entry(IEE_WRITE_VBAR, read_sysreg(vbar_el1)); ++ pr_info("IEE: testing iee_exec_entry tcr...\n"); ++ iee_rwx_gate_entry(IEE_WRITE_TCR, read_sysreg(tcr_el1)); ++ // pr_info("IEE: testing iee_exec_entry mdscr...\n"); ++ // iee_rwx_gate_entry(IEE_WRITE_MDSCR, read_sysreg(mdscr_el1)); ++ // pr_info("IEE: testing iee_exec_entry afsr0...\n"); ++ // iee_rwx_gate_entry(IEE_WRITE_AFSR0); ++ #ifdef CONFIG_KOI ++ write_sysreg(read_sysreg(ttbr0_el1)+0x3000000000000, ttbr0_el1); ++ pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); ++ pr_info("IEE: testing iee_exec_entry switch to koi...\n"); ++ iee_rwx_gate_entry(IEE_SWITCH_TO_KOI, phys_to_ttbr(__pa_symbol(swapper_pg_dir))); ++ pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); ++ pr_info("IEE: testing iee_exec_entry switch to kernel...\n"); ++ iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); ++ #endif ++} ++ ++/* Finish iee rwx gate initializations. */ ++static void __init iee_si_init_done(void) ++{ ++ // Prepare data for iee rwx gate ++ iee_si_prepare_data(); ++ // All initialization is done. Do some simple tests. ++ iee_si_test_end(); ++} ++#endif ++ + void __init setup_cpu_features(void) + { + u32 cwg; +@@ -3502,6 +3577,10 @@ void __init setup_cpu_features(void) + if (!cwg) + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); ++ ++ #ifdef CONFIG_IEE ++ iee_si_init_done(); ++ #endif + } + + static int enable_mismatched_32bit_el0(unsigned int cpu) +diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c +index 745aefddd9a3..265417e0ad81 100644 +--- a/arch/arm64/kernel/debug-monitors.c ++++ b/arch/arm64/kernel/debug-monitors.c +@@ -36,10 +36,14 @@ u8 debug_monitors_arch(void) + */ + static void mdscr_write(u32 mdscr) + { ++// #ifdef CONFIG_IEE ++// iee_rwx_gate_entry(IEE_WRITE_mdscr_el1, mdscr); ++// #else + unsigned long flags; + flags = local_daif_save(); + write_sysreg(mdscr, mdscr_el1); + local_daif_restore(flags); ++// #endif + } + NOKPROBE_SYMBOL(mdscr_write); + +diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c +index 08274e4317b2..0d259e355c90 100644 +--- a/arch/arm64/kernel/entry-common.c ++++ b/arch/arm64/kernel/entry-common.c +@@ -156,7 +156,11 @@ asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs) + * mode. Before this function is called it is not safe to call regular kernel + * code, instrumentable code, or any code which may trigger an exception. + */ ++#ifdef CONFIG_IEE ++void noinstr arm64_enter_nmi(struct pt_regs *regs) ++#else + static void noinstr arm64_enter_nmi(struct pt_regs *regs) ++#endif + { + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + +diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S +index 7fcbee0f6c0e..dee813ee6aad 100644 +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -29,12 +29,391 @@ + #include <asm/asm-uaccess.h> + #include <asm/unistd.h> + ++#ifdef CONFIG_IEE ++#include <asm/iee-def.h> ++ ++#define BAD_SP_EL0 0 ++#define BAD_ELR_EL1 1 ++#define BAD_TCR_EL1 2 ++#define BAD_IEE_SI 4 ++#endif ++ + .macro clear_gp_regs + .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 + mov x\n, xzr + .endr + .endm + ++#ifdef CONFIG_KOI ++#ifdef CONFIG_IEE ++/* ++ * This function is used to switch to ko stack in glue code ++ */ ++SYM_FUNC_START(koi_do_switch_to_ko_stack) ++ sub sp, sp, #48 ++ stp x29, x30, [sp] ++ str x2, [sp, #16] ++ stp x0, x1, [sp, #32] ++ ++ // iee_rw_gate(IEE_WRITE_KERNEL_STACK, current, sp) ++ mov x0, #IEE_WRITE_KOI_KERNEL_STACK ++ mrs x1, sp_el0 ++ add x2, sp, #48 ++ ++ bl iee_rw_gate ++ ++ // iee_rw_gate(IEE_READ_KOI_STACK, current) ++ mov x0, #IEE_READ_KOI_STACK ++ mrs x1, sp_el0 ++ bl iee_rw_gate ++ ++ ldp x29, x30, [sp] ++ ldr x2, [sp, #16] ++ add x1, sp, #32 ++ mov sp, x0 ++ ldp x0, x1, [x1] ++ ++ isb ++ ret ++SYM_FUNC_END(koi_do_switch_to_ko_stack) ++ ++/* ++ * This fucntion is used to switch to kernel stack in glue code ++ */ ++SYM_FUNC_START(koi_do_switch_to_kernel_stack) ++ sub sp, sp, #48 ++ stp x29, x30, [sp] ++ str x2, [sp, #16] ++ stp x0, x1, [sp, #32] ++ // iee_rw_gate(IEE_WRITE_KOI_STACK, current, sp) ++ mov x0, #IEE_WRITE_KOI_STACK ++ mrs x1, sp_el0 ++ add x2, sp, #48 ++ bl iee_rw_gate ++ ++ // iee_rw_gate(IEE_READ_KOI_KERNEL_STACK, current) ++ mov x0, #IEE_READ_KOI_KERNEL_STACK ++ mrs x1, sp_el0 ++ bl iee_rw_gate ++ ++ ldp x29, x30, [sp] ++ ldr x2, [sp, #16] ++ add x1, sp, #32 ++ mov sp, x0 ++ ldp x0, x1, [x1] ++ isb ++ ret ++SYM_FUNC_END(koi_do_switch_to_kernel_stack) ++ ++/* ++ * Before switch to ko's pgtable, we must switch current stack to ko's stack. ++ * We have stored registers to kernel stack, and we need to restore them from ko's stack after switching, ++ * so we need to copy from kernel stack to ko stack ++ * the memory region to copy is [sp, stack_top) ++ * void koi_switch_to_ko_stack(void); ++ */ ++SYM_FUNC_START(koi_switch_to_ko_stack) ++ mrs x17, pan ++ msr pan, 0x0 ++ ++ sub sp, sp, #32 ++ str x17, [sp, #16] ++ stp x30, x29, [sp] ++ ++ // current sp stores in x1 ++ add x1, x1, #176 ++ // current sp_el0 stores in x0 ++ bl _iee_write_koi_kernel_stack ++ ++ mrs x0, sp_el0 ++ bl _iee_read_koi_stack ++ ++ ldr x17, [sp, #16] ++ ldp x30, x29, [sp] ++ add sp, sp, #32 ++ ++ msr pan, x17 ++ ++ sub x0, x0, #176 ++ mov x1, sp ++ mov x2, #176 ++ ++ // memcpy(current->driver_stack, current->kernel_stack, 176) ++ mov x16, lr ++ bl memcpy ++ mov lr, x16 ++ ++ mov sp, x0 ++ isb ++ ret ++SYM_FUNC_END(koi_switch_to_ko_stack) ++ ++SYM_FUNC_START(koi_switch_to_kernel_stack) ++ /* ++ * current sp belongs to driver stack, and the bottom 160 bytes saves registers when exception occurred, ++ * so we should add 160 to current sp, and store it in task_struct ++ * also, fetch kernel sp from task_struct, copy the bottom 160 bytes from driver stack to kernel stack ++ */ ++ mrs x17, pan ++ msr pan, 0x0 ++ ++ sub sp, sp, #32 ++ stp x30, x29, [sp] ++ str x17, [sp, #16] ++ ++ mrs x0, sp_el0 ++ add x1, sp, #192 ++ bl _iee_write_koi_stack ++ ++ mrs x0, sp_el0 ++ bl _iee_read_koi_kernel_stack ++ ++ ldr x17, [sp, #16] ++ ldp x30, x29, [sp] ++ add sp, sp, #32 ++ ++ msr pan, x17 ++ ++ // x0 = kernel_stack ++ sub x0, x0, #160 ++ mov x1, sp ++ // x2 = 160 ++ mov x2, #160 ++ ++ mov x16, lr ++ bl memcpy ++ mov lr, x16 ++ ++ mov sp, x0 ++ isb ++ ret ++SYM_FUNC_END(koi_switch_to_kernel_stack) ++#else ++/* ++ * This function is used to switch to ko stack in glue code ++ */ ++SYM_FUNC_START(koi_do_switch_to_ko_stack) ++ sub sp, sp, #16 ++ stp x16, x17, [sp] ++ mrs x17, sp_el0 ++ adrp x16, koi_offset ++ ldr x16, [x16, #:lo12:koi_offset] ++ add x17, x17, x16 ++ add x16, sp, #16 ++ str x16, [x17, #koi_kernel_from_token_offset] ++ ldr x16, [x17, #koi_from_token_offset] ++ mov x17, sp ++ mov sp, x16 ++ ldp x16, x17, [x17] ++ isb ++ ret ++SYM_FUNC_END(koi_do_switch_to_ko_stack) ++ ++/* ++ * This fucntion is used to switch to kernel stack in glue code ++ */ ++SYM_FUNC_START(koi_do_switch_to_kernel_stack) ++ sub sp, sp, #16 ++ stp x16, x17, [sp] ++ mrs x17, sp_el0 ++ adrp x16, koi_offset ++ ldr x16, [x16, #:lo12:koi_offset] ++ add x17, x17, x16 ++ add x16, sp, #16 ++ str x16, [x17, #koi_from_token_offset] ++ ldr x16, [x17, #koi_kernel_from_token_offset] ++ mov x17, sp ++ mov sp, x16 ++ ldp x16, x17, [x17] ++ isb ++ ret ++SYM_FUNC_END(koi_do_switch_to_kernel_stack) ++ ++/* ++ * Before switch to ko's pgtable, we must switch current stack to ko's stack. ++ * We have stored registers to kernel stack, and we need to restore them from ko's stack after switching, ++ * so we need to copy from kernel stack to ko stack ++ * the memory region to copy is [sp, stack_top) ++ * void koi_switch_to_ko_stack(unsigned long stack_top); ++ */ ++SYM_FUNC_START(koi_switch_to_ko_stack) ++ // current sp stores in x1 ++ add x3, x1, #176 ++ adrp x4, koi_offset ++ ldr x4, [x4, #:lo12:koi_offset] ++ add x4, x0, x4 ++ // current sp_el0 stores in x0 ++ str x3, [x4, #koi_kernel_from_token_offset] ++ ldr x0, [x4, #koi_from_token_offset] ++ sub x0, x0, #176 ++ mov x2, #176 ++ ++ // memcpy(current->driver_stack, current->kernel_stack, 176) ++ mov x16, lr ++ bl memcpy ++ mov lr, x16 ++ ++ mov sp, x0 ++ isb ++ ret ++SYM_FUNC_END(koi_switch_to_ko_stack) ++ ++SYM_FUNC_START(koi_switch_to_kernel_stack) ++ /* ++ * current sp belongs to driver stack, and the bottom 176 bytes saves registers when exception occurred, ++ * so we should add 176 to current sp, and store it in task_struct ++ * also, fetch kernel sp from task_struct, copy the bottom 176 bytes from driver stack to kernel stack ++ */ ++ mov x1, sp ++ add x3, sp, #160 ++ ++ mrs x16, sp_el0 ++ adrp x2, koi_offset ++ ldr x2, [x2, #:lo12:koi_offset] ++ add x16, x16, x2 ++ str x3, [x16, #koi_from_token_offset] ++ // sp points to kernel_stack ++ ldr x0, [x16, #koi_kernel_from_token_offset] ++ ++ // x0 = kernel_stack ++ sub x0, x0, #160 ++ // x2 = 160 ++ mov x2, #160 ++ mov x16, lr ++ // memcpy(kernel_stack, driver_stack, 160) ++ bl memcpy ++ mov lr, x16 ++ mov sp, x0 ++ isb ++ ret ++SYM_FUNC_END(koi_switch_to_kernel_stack) ++#endif ++ ++SYM_FUNC_START(koi_switch_to_ko_pgtbl) ++ stp x0, x1, [sp, #16 * 1] ++ stp x2, x3, [sp, #16 * 2] ++ stp x4, x5, [sp, #16 * 3] ++ stp x6, x7, [sp, #16 * 4] ++ stp x8, x9, [sp, #16 * 5] ++ stp x10, x11, [sp, #16 * 6] ++ stp x12, x13, [sp, #16 * 7] ++ stp x14, x15, [sp, #16 * 8] ++ stp x16, x17, [sp, #16 * 9] ++ stp x18, x30, [sp, #16 * 10] ++ ++ adrp x0, koi_swapper_ttbr1 ++ ldr x0, [x0, #:lo12:koi_swapper_ttbr1] ++ cbz x0, 0f ++ bl koi_do_switch_to_ko_pgtbl ++ // if x0 == 0, don't need to switch pgtable and stack, jump to 0 ++ cbz x0, 0f ++ mov x19, x0 ++ // if current on task's kernel stack, switch to ko stack ++ mrs x0, sp_el0 ++ mov x1, sp ++ ldr x2, [x0, TSK_STACK] ++ eor x2, x2, x1 ++ and x2, x2, #~(THREAD_SIZE - 1) ++ cbnz x2, 1f ++ ++ bl koi_switch_to_ko_stack ++1: ++#ifndef CONFIG_IEE ++ msr ttbr1_el1, x19 ++ isb ++ nop ++ nop ++ nop ++#else ++ mov x0, #IEE_SWITCH_TO_KOI ++ mov x1, x19 ++ bl iee_rwx_gate_entry ++#endif ++0: ++ ++ ldp x0, x1, [sp, #16 * 1] ++ ldp x2, x3, [sp, #16 * 2] ++ ldp x4, x5, [sp, #16 * 3] ++ ldp x6, x7, [sp, #16 * 4] ++ ldp x8, x9, [sp, #16 * 5] ++ ldp x10, x11, [sp, #16 * 6] ++ ldp x12, x13, [sp, #16 * 7] ++ ldp x14, x15, [sp, #16 * 8] ++ ldp x16, x17, [sp, #16 * 9] ++ ldp x18, x30, [sp, #16 * 10] ++ ret ++SYM_FUNC_END(koi_switch_to_ko_pgtbl) ++ ++.pushsection ".koi.text", "ax" ++SYM_FUNC_START(koi_switch_to_kernel_pgtbl) ++ sub sp, sp, #160 ++ stp x0, x1, [sp, #16 * 0] ++ stp x2, x3, [sp, #16 * 1] ++ stp x4, x5, [sp, #16 * 2] ++ stp x6, x7, [sp, #16 * 3] ++ stp x8, x9, [sp, #16 * 4] ++ stp x10, x11, [sp, #16 * 5] ++ ++ stp x12, x13, [sp, #16 * 6] ++ stp x14, x15, [sp, #16 * 7] ++ stp x16, x17, [sp, #16 * 8] ++ stp x18, x30, [sp, #16 * 9] ++ // check whether paging init finished ++ adrp x0, koi_swapper_ttbr1 ++ ldr x0, [x0, #:lo12:koi_swapper_ttbr1] ++ cbz x0, 0f ++ ++ bl koi_do_switch_to_kernel_pgtbl ++ /* ++ * koi_do_switch_to_kernel_pgtbl return 0 indicates ++ * that when exception occurred, the isolated ko is executing under koi pgtbl, ++ * so we need to switch stack to kernel stack after switch pgtbl back to koi_swapper_ttbr1. ++ */ ++ cbz x0, 0f ++#ifndef CONFIG_IEE ++ mrs x0, sp_el0 ++ adrp x1, koi_offset ++ ldr x1, [x1, #:lo12:koi_offset] ++ add x0, x0, x1 ++ mov x16, sp ++ ldr x17, [x0, koi_stack_base_from_token_offset] ++ eor x17, x17, x16 ++ and x17, x17, #~(THREAD_SIZE - 1) ++ cbnz x17, 0f ++#else ++ // save current pan ++ mrs x17, pan ++ // disable pan ++ msr pan, 0x0 ++ mrs x0, sp_el0 ++ bl _iee_read_koi_stack_base ++ // restore pan ++ msr pan, x17 ++ ++ mov x16, sp ++ eor x0, x0, x16 ++ and x0, x0, #~(THREAD_SIZE - 1) ++ cbnz x0, 0f ++#endif ++ bl koi_switch_to_kernel_stack ++0: ++ ++ ldp x0, x1, [sp, #16 * 0] ++ ldp x2, x3, [sp, #16 * 1] ++ ldp x4, x5, [sp, #16 * 2] ++ ldp x6, x7, [sp, #16 * 3] ++ ldp x8, x9, [sp, #16 * 4] ++ ldp x10, x11, [sp, #16 * 5] ++ ldp x12, x13, [sp, #16 * 6] ++ ldp x14, x15, [sp, #16 * 7] ++ ldp x16, x17, [sp, #16 * 8] ++ ldp x18, x30, [sp, #16 * 9] ++ add sp, sp, #160 ++ ret ++SYM_FUNC_END(koi_switch_to_kernel_pgtbl) ++.popsection ++#endif ++ + .macro kernel_ventry, el:req, ht:req, regsize:req, label:req + .align 7 + .Lventry_start\@: +@@ -151,6 +530,17 @@ alternative_else_nop_endif + #endif + .endm + ++#ifdef CONFIG_IEE ++// SP_EL0 check failed. ++SYM_FUNC_START_LOCAL(sp_el0_check_failed) ++ mov x0, sp ++ mov x1, #BAD_SP_EL0 ++ mrs x2, esr_el1 ++ bl iee_bad_mode ++ ASM_BUG() ++SYM_FUNC_END(sp_el0_check_failed) ++#endif ++ + /* Clear the MTE asynchronous tag check faults */ + .macro clear_mte_async_tcf thread_sctlr + #ifdef CONFIG_ARM64_MTE +@@ -224,6 +614,14 @@ alternative_cb_end + ldr_this_cpu tsk, __entry_task, x20 + msr sp_el0, tsk + ++#ifdef CONFIG_IEE ++ // tsk check. ++ ldr_this_cpu x19, __entry_task, x20 ++ mrs x20, sp_el0 ++ cmp x19, x20 ++ b.ne sp_el0_check_failed ++#endif ++ + /* + * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions + * when scheduling. +@@ -276,6 +674,13 @@ alternative_else_nop_endif + + scs_load_current + .else ++#ifdef CONFIG_IEE ++ // tsk check. ++ ldr_this_cpu x19, __entry_task, x20 ++ mrs x20, sp_el0 ++ cmp x19, x20 ++ b.ne sp_el0_check_failed ++#endif + add x21, sp, #PT_REGS_SIZE + get_current_task tsk + .endif /* \el == 0 */ +@@ -333,9 +738,11 @@ alternative_else_nop_endif + .endm + + .macro kernel_exit, el ++ #ifndef CONFIG_IEE + .if \el != 0 + disable_daif + .endif ++ #endif + + #ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if_not ARM64_HAS_GIC_PRIO_MASKING +@@ -411,6 +818,41 @@ alternative_else_nop_endif + + msr elr_el1, x21 // set up the return data + msr spsr_el1, x22 ++ ++#ifdef CONFIG_IEE ++ ++ .if \el == 0 ++ ++ #ifndef CONFIG_UNMAP_KERNEL_AT_EL0 ++ // SET hpd1 = 0 start ++ mrs x0, tcr_el1 ++ and x0, x0, #0xFFFFFBFFFFFFFFFF ++ and x0, x0, #0xFFFFFFFFFFBFFFFF ++ msr tcr_el1, x0 ++ // SET hpd1 = 0 end ++ ++ disable_daif ++ ++ // Check ELR_EL1 ++ mrs x0, elr_el1 ++ lsr x0, x0, #48 ++ tst x0, #0xffff ++ b.ne 5f ++ #endif ++ ++ .endif ++ ++#else ++#ifdef CONFIG_KOI ++ .if \el==0 ++ mrs x0, tcr_el1 ++ and x0, x0, #0xFFFFFFFFFFBFFFFF ++ msr tcr_el1,x0 ++ .endif ++#endif ++ ++#endif ++ + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] +@@ -569,12 +1011,167 @@ SYM_CODE_START_LOCAL(__bad_stack) + SYM_CODE_END(__bad_stack) + #endif /* CONFIG_VMAP_STACK */ + ++/* ++ * iee exception entry ++ */ ++ .macro iee_exception_entry, el ++ ++ /* Check whether exception is permmited. */ ++ ldr x1, =__iee_si_no_irq ++ cmp x1, x22 ++ b.hi 1148f ++ ldr x1, =__iee_si_end ++ cmp x1, x22 ++ b.lo 1148f ++ /* ELR check fail */ ++ mov x0, sp ++ mov x1, #BAD_IEE_SI ++ mrs x2, esr_el1 ++ bl iee_bad_mode ++ ASM_BUG() ++1148: ++ ++ /* el0 set hpds */ ++ .if \el == 0 ++ ++ #ifndef CONFIG_UNMAP_KERNEL_AT_EL0 ++ /* SET hpd1 = 1 start */ ++ mrs x0, tcr_el1 ++ orr x0, x0, #0x0000040000000000 ++ orr x0, x0, #0x0000000000400000 ++ msr tcr_el1, x0 ++ /* SET hpd1 = 1 end */ ++ ++ disable_daif ++ ++ /* Check TCR_EL1 */ ++ mrs x0, tcr_el1 ++ tst x0, #0x0000040000000000 ++ b.eq 5f ++ tst x0, #0x0000000000400000 ++ b.ne 6f ++ ++5: ++ /* TCR_EL1 check fail */ ++ mov x0, sp ++ mov x1, #BAD_TCR_EL1 ++ mrs x2, esr_el1 ++ bl iee_bad_mode ++ ASM_BUG() ++ ++6: ++ nop ++ #endif ++ ++ .else ++#ifdef CONFIG_IEE_INTERRUPTABLE ++ /* el1 save elr_el1 and set pan */ ++ /* Check ELR_EL1 */ ++ ldr x1, =__iee_code_start ++ cmp x1, x22 ++ b.hi 7f ++ ldr x1, =__iee_code_end ++ cmp x1, x22 ++ b.lo 7f ++ /* Exception from iee code */ ++ /* Switch to kernel stack */ ++ mrs x0, sp_el0 /* x0 -> task_struct(VA) */ ++ adrp x2, iee_offset ++ ldr x2, [x2, #:lo12:iee_offset] ++ add x1, x0, x2 /* x1 -> task_token(IEE) */ ++ // store iee stack ++ mov x3, sp ++ str x3, [x1, #iee_from_token_offset] ++ // load kernel stack ++ ldr x3, [x1, #kernel_from_token_offset] ++ mov sp, x3 ++ sub sp, sp, #PT_REGS_SIZE ++ /* Enable PAN */ ++ msr pan, #0x1 ++ ++7: ++ /* Exception from kernel code */ ++ mov x0, #0x0 ++ mov x1, #0x0 ++ mov x2, #0x0 ++ mov x3, #0x0 ++#endif ++ .endif ++ .endm ++ ++/* ++ * iee exception exit ++ */ ++ .macro iee_exception_exit, el ++ // Disable daif ++ disable_daif ++ ++ .if \el == 1 ++#ifdef CONFIG_IEE_INTERRUPTABLE ++ /* el1 pop elr_el1 and set pan */ ++ /* Check ELR_EL1 */ ++ ldr x1, =__iee_code_start ++ cmp x1, x22 ++ b.hi 9f ++ ldr x1, =__iee_code_end ++ cmp x1, x22 ++ b.lo 9f ++ /* Eret iee code */ ++ /* Disable PAN */ ++ msr pan, #0x0 ++ /* Switch to iee stack */ ++ add sp, sp, #PT_REGS_SIZE ++ mrs x0, sp_el0 /* x0 -> task_struct */ ++ adrp x2, iee_offset ++ ldr x2, [x2, #:lo12:iee_offset] ++ add x1, x0, x2 /* x1 -> task_token(IEE) */ ++ // store kernel stack ++ mov x3, sp ++ str x3, [x1, #kernel_from_token_offset] ++ // load iee stack ++ ldr x2, [x1, #iee_from_token_offset] ++ mov sp, x2 ++ /* Load ELR_EL1 from iee stack */ ++ ldr x21, [sp, #S_PC] ++ /* Check the modify of ELR_EL1 */ ++ cmp x21, x22 ++ b.ne 8f ++ /* ELR_EL1 not modified */ ++ b 9f ++ ++8: ++ // ELR_EL1 modified ++ mov x0, sp ++ mov x1, #BAD_ELR_EL1 ++ mrs x2, esr_el1 ++ bl iee_bad_mode ++ ASM_BUG() ++ ++9: ++ // Eret kernel code ++ mov x0, #0x0 ++ mov x1, #0x0 ++ mov x2, #0x0 ++ mov x3, #0x0 ++#endif ++ .endif ++ .endm + + .macro entry_handler el:req, ht:req, regsize:req, label:req + SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) + kernel_entry \el, \regsize ++ ++ #ifdef CONFIG_IEE ++ iee_exception_entry \el ++ #endif ++ + mov x0, sp + bl el\el\ht\()_\regsize\()_\label\()_handler ++ ++ #ifdef CONFIG_IEE ++ iee_exception_exit \el ++ #endif ++ + .if \el == 0 + b ret_to_user + .else +@@ -844,6 +1441,13 @@ SYM_FUNC_START(cpu_switch_to) + ldr lr, [x8] + mov sp, x9 + msr sp_el0, x1 ++#ifdef CONFIG_IEE ++ // tsk check. ++ ldr_this_cpu x8, __entry_task, x9 ++ mrs x9, sp_el0 ++ cmp x8, x9 ++ b.ne sp_el0_check_failed ++#endif + ptrauth_keys_install_kernel x1, x8, x9, x10 + scs_save x0 + scs_load_current +@@ -1033,6 +1637,13 @@ SYM_CODE_START(__sdei_asm_handler) + mrs x28, sp_el0 + ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1 + msr sp_el0, x0 ++#ifdef CONFIG_IEE ++ // tsk check. ++ ldr_this_cpu x0, __entry_task, x1 ++ mrs x1, sp_el0 ++ cmp x0, x1 ++ b.ne sp_el0_check_failed ++#endif + + /* If we interrupted the kernel point to the previous stack/frame. */ + and x0, x3, #0xc +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c +index 5cdfcc9e3e54..c0af965bd92e 100644 +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -1309,7 +1309,11 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) + isb(); + + /* Allow EL0 to access TPIDR2 */ ++ #ifdef CONFIG_IEE ++ iee_rwx_gate_entry(IEE_WRITE_sctlr_el1, read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2); ++ #else + write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); ++ #endif + isb(); + } + +diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S +index 6517bf2644a0..5c75e069d877 100644 +--- a/arch/arm64/kernel/head.S ++++ b/arch/arm64/kernel/head.S +@@ -464,6 +464,42 @@ SYM_FUNC_END(create_kernel_mapping) + set_this_cpu_offset \tmp1 + .endm + ++#ifdef CONFIG_IEE ++ .macro init_cpu_task_checked tsk, tmp1, tmp2 ++ msr sp_el0, \tsk ++ // tsk check. ++ adrp x29, __per_cpu_offset ++ mrs \tmp1, sp_el0 ++ ldr \tmp2, [\tmp1, #TSK_TI_CPU] /* cpu number */ ++1: ++ cmp \tmp2, #0 ++ b.eq 2f ++ add x29, x29, #8 ++ sub \tmp2, \tmp2, #1 ++ b 1b ++2: ++ ldr \tmp2, [x29, #:lo12:__per_cpu_offset] /* cpu offset */ ++ adr_l x29, __entry_task ++ ldr x29, [x29, \tmp2] ++ cmp x29, \tmp1 ++ b.ne sp_el0_check_failed ++ ++ ldr \tmp1, [\tsk, #TSK_STACK] ++ add sp, \tmp1, #THREAD_SIZE ++ sub sp, sp, #PT_REGS_SIZE ++ ++ stp xzr, xzr, [sp, #S_STACKFRAME] ++ add x29, sp, #S_STACKFRAME ++ ++ scs_load_current ++ ++ adr_l \tmp1, __per_cpu_offset ++ ldr w\tmp2, [\tsk, #TSK_TI_CPU] ++ ldr \tmp1, [\tmp1, \tmp2, lsl #3] ++ set_this_cpu_offset \tmp1 ++ .endm ++#endif ++ + /* + * The following fragment of code is executed with the MMU enabled. + * +@@ -661,6 +697,18 @@ SYM_FUNC_START_LOCAL(secondary_startup) + SYM_FUNC_END(secondary_startup) + + .text ++#ifdef CONFIG_IEE ++// SP_EL0 check failed. ++SYM_FUNC_START_LOCAL(sp_el0_check_failed) ++ 1: ++ nop ++ nop ++ nop ++ nop ++ b 1f ++SYM_FUNC_END(sp_el0_check_failed) ++#endif ++ + SYM_FUNC_START_LOCAL(__secondary_switched) + mov x0, x20 + bl set_cpu_boot_mode_flag +@@ -677,7 +725,11 @@ SYM_FUNC_START_LOCAL(__secondary_switched) + ldr x2, [x0, #CPU_BOOT_TASK] + cbz x2, __secondary_too_slow + ++#ifdef CONFIG_IEE ++ init_cpu_task_checked x2, x1, x3 ++#else + init_cpu_task x2, x1, x3 ++#endif + + #ifdef CONFIG_ARM64_PTR_AUTH + ptrauth_keys_init_cpu x2, x3, x4, x5 +@@ -746,6 +798,10 @@ SYM_FUNC_START(__enable_mmu) + cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX + b.gt __no_granule_support + phys_to_ttbr x2, x2 ++#ifdef CONFIG_IEE ++ mov x3, #1 ++ bfi x2, x3, #48, #16 // ASID 1 is used by IEE rwx gate. ++#endif + msr ttbr0_el1, x2 // load TTBR0 + load_ttbr1 x1, x1, x3 + +diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c +index 02870beb271e..1c14428a3ed4 100644 +--- a/arch/arm64/kernel/hibernate.c ++++ b/arch/arm64/kernel/hibernate.c +@@ -34,6 +34,10 @@ + #include <asm/trans_pgd.h> + #include <asm/virt.h> + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + /* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This +@@ -197,12 +201,22 @@ static int create_safe_exec_page(void *src_start, size_t length, + phys_addr_t trans_ttbr0; + unsigned long t0sz; + int rc; ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + + if (!page) + return -ENOMEM; + + memcpy(page, src_start, length); + caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); ++ ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(page_address(page))); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)page_address(page)); ++ #endif ++ + rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); + if (rc) + return rc; +diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c +index d39a8787edf2..b5ac4b7670bc 100644 +--- a/arch/arm64/kernel/hw_breakpoint.c ++++ b/arch/arm64/kernel/hw_breakpoint.c +@@ -26,6 +26,10 @@ + #include <asm/cputype.h> + #include <asm/system_misc.h> + ++#ifdef CONFIG_IEE ++#include <asm/iee-si.h> ++#endif ++ + /* Breakpoint currently in use for each BRP. */ + static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); + +@@ -102,13 +106,68 @@ int hw_breakpoint_slots(int type) + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + ++#ifdef CONFIG_IEE ++ ++#define IEE_SI_READ_WB_REG_CASE(OFF, N, REG, VAL) \ ++ case (OFF + N): \ ++ IEE_SI_AARCH64_DBG_READ(N, REG, VAL); \ ++ break ++ ++#define IEE_SI_WRITE_WB_REG_CASE(OFF, N, REG, VAL) \ ++ case (OFF + N): \ ++ IEE_SI_AARCH64_DBG_WRITE(N, REG, VAL); \ ++ break ++ ++#define IEE_SI_GEN_READ_REG_CASES(OFF, REG, VAL) \ ++ IEE_SI_READ_WB_REG_CASE(OFF, 0, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 15, REG, VAL) ++ ++#define IEE_SI_GEN_WRITE_REG_CASES(OFF, REG, VAL) \ ++ IEE_SI_WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ ++ WRITE_WB_REG_CASE(OFF, 15, REG, VAL) ++ ++#endif ++ + static u64 read_wb_reg(int reg, int n) + { + u64 val = 0; + + switch (reg + n) { ++// #ifdef CONFIG_IEE ++// IEE_SI_GEN_READ_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); ++// IEE_SI_GEN_READ_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); ++// #else + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); ++// #endif + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: +@@ -122,8 +181,13 @@ NOKPROBE_SYMBOL(read_wb_reg); + static void write_wb_reg(int reg, int n, u64 val) + { + switch (reg + n) { ++// #ifdef CONFIG_IEE ++// IEE_SI_GEN_WRITE_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); ++// IEE_SI_GEN_WRITE_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); ++// #else + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); ++// #endif + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: +@@ -171,6 +235,10 @@ static int is_a32_compat_bp(struct perf_event *bp) + return tsk && is_a32_compat_thread(task_thread_info(tsk)); + } + ++#ifdef CONFIG_IEE ++int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); ++#endif ++ + /** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + * operations +@@ -191,6 +259,37 @@ static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, + { + int i; + struct perf_event **slot; ++// reserve hw breakpoint 0 for iee rwx gate in kernel sapce. ++// #ifdef CONFIG_IEE ++// struct arch_hw_breakpoint *info = counter_arch_bp(bp); ++// if (arch_check_bp_in_kernelspace(info)){ ++// for (i = 1; i < max_slots; ++i) { // search from hw breakpoint 1 ++// slot = &slots[i]; ++// switch (ops) { ++// case HW_BREAKPOINT_INSTALL: ++// if (!*slot) { ++// *slot = bp; ++// return i; ++// } ++// break; ++// case HW_BREAKPOINT_UNINSTALL: ++// if (*slot == bp) { ++// *slot = NULL; ++// return i; ++// } ++// break; ++// case HW_BREAKPOINT_RESTORE: ++// if (*slot == bp) ++// return i; ++// break; ++// default: ++// pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); ++// return -EINVAL; ++// } ++// } ++// return -ENOSPC; ++// } ++// #endif + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; +diff --git a/arch/arm64/kernel/iee/Makefile b/arch/arm64/kernel/iee/Makefile +new file mode 100644 +index 000000000000..123c68c5cc4e +--- /dev/null ++++ b/arch/arm64/kernel/iee/Makefile +@@ -0,0 +1 @@ ++obj-$(CONFIG_IEE) += iee.o iee-gate.o iee-func.o +\ No newline at end of file +diff --git a/arch/arm64/kernel/iee/iee-func.c b/arch/arm64/kernel/iee/iee-func.c +new file mode 100644 +index 000000000000..7764dbd41555 +--- /dev/null ++++ b/arch/arm64/kernel/iee/iee-func.c +@@ -0,0 +1,187 @@ ++#include "asm/pgtable.h" ++#include <linux/memory.h> ++#include <linux/mm.h> ++#include <asm/tlb.h> ++#include <asm/tlbflush.h> ++#include <asm/pgalloc.h> ++ ++void set_iee_page_valid(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ pte_t pte = READ_ONCE(*ptep); ++ ++ if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) ++ return; ++ ++ pte = __pte(pte_val(pte) | 0x1); ++ set_pte(ptep, pte); ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ isb(); ++} ++ ++void set_iee_page_invalid(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ pte_t pte = READ_ONCE(*ptep); ++ ++ if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) ++ return; ++ ++ pte = __pte(pte_val(pte) & ~0x1); ++ set_pte(ptep, pte); ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ isb(); ++} ++ ++void iee_set_logical_mem_ro(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ pte_t pte = READ_ONCE(*ptep); ++ ++ if(addr < PAGE_OFFSET) ++ return; ++ ++ pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); ++ set_pte(ptep, pte); ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ isb(); ++} ++ ++void iee_set_logical_mem_rw(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ pte_t pte = READ_ONCE(*ptep); ++ ++ if((addr < PAGE_OFFSET) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 2)))) ++ return; ++ ++ pte = __pte(pte_val(pte) | PTE_DBM); ++ set_pte(ptep, pte); ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ isb(); ++} ++ ++void iee_set_token_page_valid(void *token, void *new) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); ++ ++ pud_t *pudp = pud_offset(p4dp, (unsigned long)token); ++ ++ pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); ++ pte_t pte = READ_ONCE(*ptep); ++ pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); ++ set_pte(ptep, pte); ++ flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); ++ isb(); ++} ++ ++void iee_set_token_page_invalid(void *token) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); ++ ++ pud_t *pudp = pud_offset(p4dp, (unsigned long)token); ++ ++ pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); ++ pte_t pte = READ_ONCE(*ptep); ++ pte = __pte(((pte_val(pte) & ~((unsigned long)0x1)) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token - IEE_OFFSET))); ++ set_pte(ptep, pte); ++ flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); ++ isb(); ++} ++ ++void iee_set_kernel_ppage(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ ++ int i; ++ for(i = 0; i < 4; i++) ++ { ++ pte_t pte = READ_ONCE(*ptep); ++ pte = __pte(pte_val(pte) & ~PTE_USER & ~PTE_NG); ++ iee_set_pte_ppage(ptep, pte); ++ ptep++; ++ } ++ flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); ++ isb(); ++} ++ ++void iee_set_kernel_upage(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ p4d_t p4d = READ_ONCE(*p4dp); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ ++ int i; ++ for(i = 0; i < 4; i++) ++ { ++ pte_t pte = READ_ONCE(*ptep); ++ pte = __pte(pte_val(pte) | PTE_USER | PTE_NG); ++ iee_set_pte_upage(ptep, pte); ++ ptep++; ++ } ++ flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); ++ isb(); ++} +\ No newline at end of file +diff --git a/arch/arm64/kernel/iee/iee-gate.S b/arch/arm64/kernel/iee/iee-gate.S +new file mode 100644 +index 000000000000..6de99a018bde +--- /dev/null ++++ b/arch/arm64/kernel/iee/iee-gate.S +@@ -0,0 +1,174 @@ ++#include <asm/asm-offsets.h> ++#include <linux/linkage.h> ++#include <asm/bug.h> ++#include <asm-generic/export.h> ++ ++#ifdef CONFIG_IEE ++ ++SYM_FUNC_START(iee_rw_gate) ++ /* save daif, close irq */ ++ mrs x13, daif ++ msr daifset, #0x2 ++ isb ++ /* save lr */ ++ sub sp, sp, #16 ++ stp x29, x30, [sp] ++ bl iee_protected_rw_gate ++ /* restore lr */ ++ ldp x29, x30, [sp] ++ add sp, sp, #16 ++ /* restore daif */ ++ msr daif, x13 ++ ret ++SYM_FUNC_END(iee_rw_gate) ++#if defined(CONFIG_CREDP) || defined(CONFIG_KOI) ++EXPORT_SYMBOL(iee_rw_gate) ++#endif ++ ++ .pushsection ".iee.text.header", "ax" ++ ++SYM_FUNC_START(iee_protected_rw_gate) ++ mrs x9, pan ++ /* disable PAN */ ++ msr pan, #0x0 ++ /* switch to iee stack */ ++ mrs x9, sp_el0 /* x9 -> task_struct */ ++ adrp x12, iee_offset ++ ldr x12, [x12, #:lo12:iee_offset] ++ add x11, x9, x12 /* x11 -> task_token(IEE) */ ++ // store kernel stack ++ mov x10, sp ++ str x10, [x11, #kernel_from_token_offset] ++ // load iee stack ++ ldr x10, [x11, #iee_from_token_offset] ++ mov sp, x10 ++#ifdef CONFIG_IEE_INTERRUPTABLE ++ isb ++ /* restore daif */ ++ msr daif, x13 ++ sub sp, sp, #16 ++ stp x29, x30, [sp] ++#else ++ sub sp, sp, #16 ++ stp x13, x30, [sp] ++#endif ++ /* call iee func */ ++ bl iee_dispatch ++#ifdef CONFIG_IEE_INTERRUPTABLE ++ ldp x29, x30, [sp] ++ add sp, sp, #16 ++ /* store and disable daif */ ++ mrs x13, daif ++ msr daifset, #0x2 ++ isb ++#else ++ ldp x13, x30, [sp] ++ add sp, sp, #16 ++#endif ++ /* switch to kernel stack */ ++ mrs x9, sp_el0 /* x9 -> task_struct(VA) */ ++ adrp x12, iee_offset ++ ldr x12, [x12, #:lo12:iee_offset] ++ add x11, x9, x12 /* x11 -> task_token(IEE) */ ++ // store iee stack ++ mov x10, sp ++ str x10, [x11, #iee_from_token_offset] ++ // load kernel stack ++ ldr x10, [x11, #kernel_from_token_offset] ++ mov sp, x10 ++ /* enable PAN */ ++ msr pan, #0x1 ++ ret ++SYM_FUNC_END(iee_protected_rw_gate) ++ ++ .popsection ++ ++#include <asm/asm-bug.h> ++#define BAD_IEE 4 ++#define BAD_IEE_SI 5 ++ ++#define SYS_TCR_EL1_HPD1 0x40000000000 ++#define SYS_TCR_EL1_A1 0x400000 ++ ++ .pushsection ".iee.exec_entry", "ax" ++ ++SYM_FUNC_START(iee_rwx_gate_entry) ++ /* Disable irq first. */ ++ mrs x15, daif // use x15 to restore daif ++ msr DAIFSet, #0xf ++ isb ++ ++ /* Set HPD1 = 0 to exec follwing codes in U RWX page */ ++ mrs x9, tcr_el1 ++ bic x9, x9, #SYS_TCR_EL1_HPD1 ++ bic x9, x9, #SYS_TCR_EL1_A1 ++ msr tcr_el1, x9 ++ isb ++ ++ b iee_rwx_gate_tramp ++SYM_FUNC_END(iee_rwx_gate_entry) ++ .popsection ++ ++ .pushsection ".iee.si_text", "awx" ++ ++SYM_FUNC_START(iee_rwx_gate_tramp) ++ /* Check tcr val. */ ++ mrs x10, tcr_el1 ++ adrp x12, iee_si_tcr // tcr val shall be const after init ++ ldr x12, [x12, #:lo12:iee_si_tcr] ++ cbz x12, 1f ++ cmp x12, x10 ++ b.ne 3f ++1: ++ mov x13, sp ++ /* If iee hasn't been initialized, skip stack switch. */ ++ ldr x11, =iee_init_done ++ ldr x10, [x11] ++ cbz x10, 2f ++ ++ /* Switch to iee stack */ ++ mrs x9, sp_el0 // x9 -> task_struct ++ adrp x12, iee_offset ++ ldr x12, [x12, #:lo12:iee_offset] ++ add x11, x9, x12 // x11 -> task_token(IEE) ++ // load iee stack ++ ldr x10, [x11, #iee_from_token_offset] ++ mov sp, x10 ++ ++ /* x15 stores daif and x13 stores previous sp */ ++2: ++ stp x15, x13, [sp, #-32]! ++ stp x29, x30, [sp, #16] ++ bl iee_si_handler // enter actual handler ++ ldp x29, x30, [sp, #16] ++ ++ b iee_rwx_gate_exit // jump to iee exit ++3: ++ mov x0, sp ++ mov x1, #BAD_IEE_SI ++ mrs x2, esr_el1 ++ bl iee_bad_mode ++ ASM_BUG() ++SYM_FUNC_END(iee_rwx_gate_tramp) ++ ++ .popsection ++ ++ .pushsection ".iee.exec_exit", "ax" ++ ++SYM_FUNC_START(iee_rwx_gate_exit) ++ ldp x15, x13, [sp], #32 ++ mov sp, x13 // switch to kernel stack ++ mrs x9, tcr_el1 ++ orr x9, x9, #SYS_TCR_EL1_HPD1 ++ orr x9, x9, #SYS_TCR_EL1_A1 ++ msr tcr_el1, x9 ++/* --------Page boundary-------- */ ++ isb ++ msr daif, x15 ++ isb ++ ret ++SYM_FUNC_END(iee_rwx_gate_exit) ++ ++ .popsection ++ ++#endif +diff --git a/arch/arm64/kernel/iee/iee.c b/arch/arm64/kernel/iee/iee.c +new file mode 100644 +index 000000000000..6b9f7d40df67 +--- /dev/null ++++ b/arch/arm64/kernel/iee/iee.c +@@ -0,0 +1,1360 @@ ++#include "linux/sched.h" ++#include <linux/stdarg.h> ++#include <asm/pgtable-types.h> ++#include <asm/iee.h> ++#include <asm/iee-si.h> ++#include <asm/sysreg.h> ++#include <linux/pgtable.h> ++#include <linux/cred.h> ++#include <asm/iee-slab.h> ++#include <asm/percpu.h> ++ ++#ifdef CONFIG_IEE ++extern struct cred init_cred; ++extern s64 memstart_addr; ++ ++void __iee_code _iee_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); ++void __iee_code _iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd); ++void __iee_code _iee_set_pte(pte_t *ptep, pte_t pte); ++void __iee_code _iee_set_pmd(pmd_t *pmdp, pmd_t pmd); ++void __iee_code _iee_set_pud(pud_t *pudp, pud_t pud); ++void __iee_code _iee_set_p4d(p4d_t *p4dp, p4d_t p4d); ++void __iee_code _iee_set_bm_pte(pte_t *ptep, pte_t pte); ++pteval_t __iee_code _iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval); ++pteval_t __iee_code _iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval); ++void __iee_code _iee_write_in_byte(void *ptr, __u64 data, int length); ++void __iee_code _iee_set_cred_uid(struct cred *cred, kuid_t uid); ++void __iee_code _iee_set_cred_gid(struct cred *cred, kgid_t gid); ++void __iee_code _iee_copy_cred(struct cred *old, struct cred *new); ++void __iee_code _iee_set_cred_suid(struct cred *cred, kuid_t suid); ++void __iee_code _iee_set_cred_sgid(struct cred *cred, kgid_t sgid); ++void __iee_code _iee_set_cred_euid(struct cred *cred, kuid_t euid); ++void __iee_code _iee_set_cred_egid(struct cred *cred, kgid_t egid); ++void __iee_code _iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid); ++void __iee_code _iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid); ++void __iee_code _iee_set_cred_user(struct cred *cred, struct user_struct *user); ++void __iee_code _iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns); ++void __iee_code _iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts); ++void __iee_code _iee_set_cred_group_info(struct cred *cred, struct group_info *group_info); ++void __iee_code _iee_set_cred_securebits(struct cred *cred, unsigned securebits); ++void __iee_code _iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable); ++void __iee_code _iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted); ++void __iee_code _iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective); ++void __iee_code _iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset); ++void __iee_code _iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient); ++void __iee_code _iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring); ++void __iee_code _iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring); ++void __iee_code _iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring); ++void __iee_code _iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring); ++void __iee_code _iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth); ++void __iee_code _iee_set_cred_non_rcu(struct cred *cred, int non_rcu); ++void __iee_code _iee_set_cred_atomic_set_usage(struct cred *cred, int i); ++bool __iee_code _iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr); ++void __iee_code _iee_set_cred_security(struct cred *cred, void *security); ++void __iee_code _iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu); ++void __iee_code _iee_memset(void *ptr, int data, size_t n); ++void __iee_code _iee_set_track(struct track *ptr, struct track *data); ++void __iee_code _iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr); ++void __iee_code _iee_set_pte_upage(pte_t *ptep, pte_t pte); ++void __iee_code _iee_set_pte_ppage(pte_t *ptep, pte_t pte); ++void __iee_code _iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm); ++void __iee_code _iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd); ++void __iee_code _iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack); ++void __iee_code _iee_free_token(struct task_struct *tsk); ++unsigned long __iee_code _iee_read_token_stack(struct task_struct *tsk); ++void __iee_code _iee_write_entry_task(struct task_struct *tsk); ++#ifdef CONFIG_KOI ++unsigned long __iee_code _iee_read_koi_stack(struct task_struct *tsk); ++void __iee_code _iee_write_koi_stack(struct task_struct *tsk, unsigned long koi_stack); ++unsigned long __iee_code _iee_read_token_ttbr1(struct task_struct *tsk); ++void __iee_code _iee_write_token_ttbr1(struct task_struct *tsk, unsigned long current_ttbr1); ++unsigned long __iee_code _iee_read_koi_kernel_stack(struct task_struct *tsk); ++void __iee_code _iee_write_koi_kernel_stack(struct task_struct *tsk, unsigned long kernel_stack); ++unsigned long __iee_code _iee_read_koi_stack_base(struct task_struct *tsk); ++void __iee_code _iee_write_koi_stack_base(struct task_struct *tsk, unsigned long koi_stack_base); ++#endif ++ ++/* wrapper functions */ ++void __iee_code iee_wrapper_write_in_byte(va_list args) { ++ void *ptr = va_arg(args, void *); ++ __u64 data = va_arg(args, __u64); ++ int length = va_arg(args, int); ++ _iee_write_in_byte(ptr, data, length); ++} ++ ++void __iee_code iee_wrapper_set_pte(va_list args) { ++ pte_t *ptep = va_arg(args, pte_t *); ++ pte_t pte = va_arg(args, pte_t); ++ _iee_set_pte(ptep, pte); ++} ++ ++void __iee_code iee_wrapper_set_pmd(va_list args) { ++ pmd_t *pmdp = va_arg(args, pmd_t *); ++ pmd_t pmd = va_arg(args, pmd_t); ++ _iee_set_pmd(pmdp, pmd); ++} ++ ++void __iee_code iee_wrapper_set_p4d(va_list args) { ++ p4d_t *p4dp = va_arg(args, p4d_t *); ++ p4d_t p4d = va_arg(args, p4d_t); ++ _iee_set_p4d(p4dp, p4d); ++} ++ ++void __iee_code iee_wrapper_set_pud(va_list args) { ++ pud_t *pudp = va_arg(args, pud_t *); ++ pud_t pud = va_arg(args, pud_t); ++ _iee_set_pud(pudp, pud); ++} ++ ++void __iee_code iee_wrapper_set_bm_pte(va_list args) { ++ pte_t *ptep = va_arg(args, pte_t *); ++ pte_t pte = va_arg(args, pte_t); ++ _iee_set_bm_pte(ptep, pte); ++} ++ ++void __iee_code iee_wrapper_set_swapper_pgd(va_list args) { ++ pgd_t *pgdp = va_arg(args, pgd_t *); ++ pgd_t pgd = va_arg(args, pgd_t); ++ _iee_set_swapper_pgd(pgdp, pgd); ++} ++ ++void __iee_code iee_wrapper_set_tramp_pgd(va_list args) { ++ pgd_t *pgdp = va_arg(args, pgd_t *); ++ pgd_t pgd = va_arg(args, pgd_t); ++ _iee_set_tramp_pgd_pre_init(pgdp, pgd); ++} ++ ++pteval_t __iee_code iee_wrapper_set_xchg(va_list args) { ++ pteval_t ret; ++ pte_t *ptep = va_arg(args, pte_t *); ++ pteval_t pteval = va_arg(args, pteval_t); ++ ret = _iee_set_xchg_relaxed(ptep, pteval); ++ return (u64)ret; ++} ++ ++pteval_t __iee_code iee_wrapper_set_cmpxchg(va_list args) { ++ pteval_t ret; ++ pte_t *ptep = va_arg(args, pte_t *); ++ pteval_t old_pteval = va_arg(args, pteval_t); ++ pteval_t new_pteval = va_arg(args, pteval_t); ++ ret = _iee_set_cmpxchg_relaxed(ptep, old_pteval, new_pteval); ++ return (u64)ret; ++} ++ ++void __iee_code iee_wrapper_set_cred_uid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kuid_t uid = va_arg(args, kuid_t); ++ _iee_set_cred_uid(cred, uid); ++} ++ ++void __iee_code iee_wrapper_set_cred_gid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kgid_t gid = va_arg(args, kgid_t); ++ _iee_set_cred_gid(cred, gid); ++} ++ ++void __iee_code iee_wrapper_copy_cred(va_list args) { ++ struct cred *old = va_arg(args, struct cred *); ++ struct cred *new = va_arg(args, struct cred *); ++ _iee_copy_cred(old, new); ++} ++ ++void __iee_code iee_wrapper_set_cred_suid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kuid_t suid = va_arg(args, kuid_t); ++ _iee_set_cred_suid(cred, suid); ++} ++ ++void __iee_code iee_wrapper_set_cred_sgid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kgid_t sgid = va_arg(args, kgid_t); ++ _iee_set_cred_sgid(cred, sgid); ++} ++ ++void __iee_code iee_wrapper_set_cred_euid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kuid_t euid = va_arg(args, kuid_t); ++ _iee_set_cred_euid(cred, euid); ++} ++ ++void __iee_code iee_wrapper_set_cred_egid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kgid_t egid = va_arg(args, kgid_t); ++ _iee_set_cred_egid(cred, egid); ++} ++ ++void __iee_code iee_wrapper_set_cred_fsuid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kuid_t fsuid = va_arg(args, kuid_t); ++ _iee_set_cred_fsuid(cred, fsuid); ++} ++ ++void __iee_code iee_wrapper_set_cred_fsgid(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kgid_t fsgid = va_arg(args, kgid_t); ++ _iee_set_cred_fsgid(cred, fsgid); ++} ++ ++void __iee_code iee_wrapper_set_cred_user(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct user_struct *user = va_arg(args, struct user_struct *); ++ _iee_set_cred_user(cred, user); ++} ++ ++void __iee_code iee_wrapper_set_cred_user_ns(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct user_namespace *user_ns = va_arg(args, struct user_namespace *); ++ _iee_set_cred_user_ns(cred, user_ns); ++} ++ ++void __iee_code iee_wrapper_set_cred_ucounts(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct ucounts *ucounts = va_arg(args, struct ucounts *); ++ _iee_set_cred_ucounts(cred, ucounts); ++} ++ ++void __iee_code iee_wrapper_set_cred_group_info(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct group_info *group_info = va_arg(args, struct group_info *); ++ _iee_set_cred_group_info(cred, group_info); ++} ++ ++void __iee_code iee_wrapper_set_cred_securebits(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ unsigned securebits = va_arg(args, unsigned); ++ _iee_set_cred_securebits(cred, securebits); ++} ++ ++void __iee_code iee_wrapper_set_cred_cap_inheritable(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kernel_cap_t cap_inheritable = va_arg(args, kernel_cap_t); ++ _iee_set_cred_cap_inheritable(cred, cap_inheritable); ++} ++ ++void __iee_code iee_wrapper_set_cred_cap_permitted(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kernel_cap_t cap_permitted = va_arg(args, kernel_cap_t); ++ _iee_set_cred_cap_permitted(cred, cap_permitted); ++} ++ ++void __iee_code iee_wrapper_set_cred_cap_effective(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kernel_cap_t cap_effective = va_arg(args, kernel_cap_t); ++ _iee_set_cred_cap_effective(cred, cap_effective); ++} ++ ++void __iee_code iee_wrapper_set_cred_cap_bset(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kernel_cap_t cap_bset = va_arg(args, kernel_cap_t); ++ _iee_set_cred_cap_bset(cred, cap_bset); ++} ++ ++void __iee_code iee_wrapper_set_cred_cap_ambient(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ kernel_cap_t cap_ambient = va_arg(args, kernel_cap_t); ++ _iee_set_cred_cap_ambient(cred, cap_ambient); ++} ++ ++void __iee_code iee_wrapper_set_cred_jit_keyring(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ unsigned long jit_keyring = va_arg(args, unsigned long); ++ _iee_set_cred_jit_keyring(cred, (unsigned char)jit_keyring); ++} ++ ++void __iee_code iee_wrapper_set_cred_session_keyring(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct key *session_keyring = va_arg(args, struct key *); ++ _iee_set_cred_session_keyring(cred, session_keyring); ++} ++ ++void __iee_code iee_wrapper_set_cred_process_keyring(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct key *process_keyring = va_arg(args, struct key *); ++ _iee_set_cred_process_keyring(cred, process_keyring); ++} ++ ++void __iee_code iee_wrapper_set_cred_thread_keyring(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct key *thread_keyring = va_arg(args, struct key *); ++ _iee_set_cred_thread_keyring(cred, thread_keyring); ++} ++ ++void __iee_code iee_wrapper_set_cred_request_key_auth(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct key *request_key_auth = va_arg(args, struct key *); ++ _iee_set_cred_request_key_auth(cred, request_key_auth); ++} ++ ++void __iee_code iee_wrapper_set_cred_non_rcu(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ int non_rcu = va_arg(args, int); ++ _iee_set_cred_non_rcu(cred, non_rcu); ++} ++ ++void __iee_code iee_wrapper_set_cred_atomic_set_usage(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ int i = va_arg(args, int); ++ _iee_set_cred_atomic_set_usage(cred, i); ++} ++ ++u64 __iee_code iee_wrapper_set_cred_atomic_op_usage(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ int flag = va_arg(args, int); ++ int nr = va_arg(args, int); ++ return (u64)_iee_set_cred_atomic_op_usage(cred, flag, nr); ++} ++ ++void __iee_code iee_wrapper_set_cred_security(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ void *security = va_arg(args, void *); ++ _iee_set_cred_security(cred, security); ++} ++ ++void __iee_code iee_wrapper_set_cred_rcu(va_list args) { ++ struct cred *cred = va_arg(args, struct cred *); ++ struct rcu_head *rcu = va_arg(args, struct rcu_head *); ++ _iee_set_cred_rcu(cred, rcu); ++} ++ ++void __iee_code iee_wrapper_memset(va_list args) { ++ void *ptr = va_arg(args, void *); ++ int data = va_arg(args, int); ++ size_t n = va_arg(args, size_t); ++ _iee_memset(ptr, data, n); ++} ++ ++void __iee_code iee_wrapper_set_track(va_list args) { ++ struct track *ptr = va_arg(args, struct track *); ++ struct track *data = va_arg(args, struct track *); ++ _iee_set_track(ptr, data); ++} ++ ++void __iee_code iee_wrapper_set_freeptr(va_list args) { ++ freeptr_t *pptr = va_arg(args, freeptr_t *); ++ freeptr_t ptr = va_arg(args, freeptr_t); ++ _iee_set_freeptr(pptr, ptr); ++} ++ ++void __iee_code iee_wrapper_set_pte_upage(va_list args) { ++ pte_t *ptep = va_arg(args, pte_t *); ++ pte_t pte = va_arg(args, pte_t); ++ _iee_set_pte_upage(ptep, pte); ++} ++ ++void __iee_code iee_wrapper_set_pte_ppage(va_list args) { ++ pte_t *ptep = va_arg(args, pte_t *); ++ pte_t pte = va_arg(args, pte_t); ++ _iee_set_pte_ppage(ptep, pte); ++} ++ ++void __iee_code iee_wrapper_set_token_mm(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ struct mm_struct *mm = va_arg(args, struct mm_struct *); ++ _iee_set_token_mm(tsk, mm); ++} ++ ++void __iee_code iee_wrapper_set_token_pgd(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ pgd_t *pgd = va_arg(args, pgd_t *); ++ _iee_set_token_pgd(tsk, pgd); ++} ++ ++void __iee_code iee_wrapper_init_token(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ void *kernel_stack = va_arg(args, void *); ++ void *iee_stack = va_arg(args, void *); ++ _iee_init_token(tsk, kernel_stack, iee_stack); ++} ++ ++void __iee_code iee_wrapper_free_token(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ _iee_free_token(tsk); ++} ++ ++u64 __iee_code iee_wrapper_read_token_stack(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ return (u64)_iee_read_token_stack(tsk); ++} ++ ++void __iee_code iee_wrapper_write_entry_task(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ _iee_write_entry_task(tsk); ++} ++ ++#ifdef CONFIG_KOI ++u64 __iee_code iee_wrapper_read_koi_stack(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ return (u64)_iee_read_koi_stack(tsk); ++} ++ ++void __iee_code iee_wrapper_write_koi_stack(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ unsigned long koi_stack = va_arg(args, unsigned long); ++ _iee_write_koi_stack(tsk, koi_stack); ++} ++ ++u64 __iee_code iee_wrapper_read_token_ttbr1(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ return (u64)_iee_read_token_ttbr1(tsk); ++} ++ ++void __iee_code iee_wrapper_write_token_ttbr1(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ unsigned long current_ttbr1 = va_arg(args, unsigned long); ++ _iee_write_token_ttbr1(tsk, current_ttbr1); ++} ++ ++u64 __iee_code iee_wrapper_read_koi_kernel_stack(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ return (u64)_iee_read_koi_kernel_stack(tsk); ++} ++ ++void __iee_code iee_wrapper_write_koi_kernel_stack(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ unsigned long kernel_stack = va_arg(args, unsigned long); ++ _iee_write_koi_kernel_stack(tsk, kernel_stack); ++} ++ ++u64 __iee_code iee_wrapper_read_koi_stack_base(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ return (u64)_iee_read_koi_stack_base(tsk); ++} ++ ++void __iee_code iee_wrapper_write_koi_stack_base(va_list args) { ++ struct task_struct *tsk = va_arg(args, struct task_struct *); ++ unsigned long koi_stack_base = va_arg(args, unsigned long); ++ _iee_write_koi_stack_base(tsk, koi_stack_base); ++} ++#endif ++// Define the function pointer type for wrapper functions. ++// Each function pointer conforms to a standardized calling convention ++// using a variable argument list (va_list) as its parameter. ++// This allows dynamic invocation of different functions with various arguments. ++typedef void (*iee_wrapper_func)(va_list args); ++iee_wrapper_func iee_wrappers[] = { ++ iee_wrapper_write_in_byte, ++ iee_wrapper_set_pte, ++ iee_wrapper_set_pmd, ++ iee_wrapper_set_pud, ++ iee_wrapper_set_p4d, ++ iee_wrapper_set_bm_pte, ++ iee_wrapper_set_swapper_pgd, ++ iee_wrapper_set_tramp_pgd, ++ (iee_wrapper_func)iee_wrapper_set_cmpxchg, ++ (iee_wrapper_func)iee_wrapper_set_xchg, ++ iee_wrapper_copy_cred, ++ iee_wrapper_set_cred_uid, ++ iee_wrapper_set_cred_gid, ++ iee_wrapper_set_cred_suid, ++ iee_wrapper_set_cred_sgid, ++ iee_wrapper_set_cred_euid, ++ iee_wrapper_set_cred_egid, ++ iee_wrapper_set_cred_fsuid, ++ iee_wrapper_set_cred_fsgid, ++ iee_wrapper_set_cred_user, ++ iee_wrapper_set_cred_user_ns, ++ iee_wrapper_set_cred_group_info, ++ iee_wrapper_set_cred_securebits, ++ iee_wrapper_set_cred_cap_inheritable, ++ iee_wrapper_set_cred_cap_permitted, ++ iee_wrapper_set_cred_cap_effective, ++ iee_wrapper_set_cred_cap_bset, ++ iee_wrapper_set_cred_cap_ambient, ++ iee_wrapper_set_cred_jit_keyring, ++ iee_wrapper_set_cred_session_keyring, ++ iee_wrapper_set_cred_process_keyring, ++ iee_wrapper_set_cred_thread_keyring, ++ iee_wrapper_set_cred_request_key_auth, ++ iee_wrapper_set_cred_non_rcu, ++ iee_wrapper_set_cred_atomic_set_usage, ++ (iee_wrapper_func)iee_wrapper_set_cred_atomic_op_usage, ++ iee_wrapper_set_cred_security, ++ iee_wrapper_set_cred_rcu, ++ iee_wrapper_memset, ++ iee_wrapper_set_track, ++ iee_wrapper_set_freeptr, ++ iee_wrapper_set_pte_upage, ++ iee_wrapper_set_pte_ppage, ++ iee_wrapper_set_token_mm, ++ iee_wrapper_set_token_pgd, ++ iee_wrapper_init_token, ++ iee_wrapper_free_token, ++ (iee_wrapper_func)iee_wrapper_read_token_stack, ++ iee_wrapper_write_entry_task, ++ iee_wrapper_set_cred_ucounts, ++#ifdef CONFIG_KOI ++ (iee_wrapper_func)iee_wrapper_read_koi_stack, ++ iee_wrapper_write_koi_stack, ++ (iee_wrapper_func)iee_wrapper_read_token_ttbr1, ++ iee_wrapper_write_token_ttbr1, ++ (iee_wrapper_func)iee_wrapper_read_koi_kernel_stack, ++ iee_wrapper_write_koi_kernel_stack, ++ (iee_wrapper_func)iee_wrapper_read_koi_stack_base, ++ iee_wrapper_write_koi_stack_base ++#endif ++}; ++ ++u64 __iee_code iee_dispatch(int flag, ...){ ++ va_list pArgs; ++ ++ va_start(pArgs, flag); ++ ++ switch(flag) ++ { ++ case IEE_OP_SET_CMPXCHG: ++ { ++ pteval_t ret = iee_wrapper_set_cmpxchg(pArgs); ++ va_end(pArgs); ++ return (u64)ret; ++ } ++ case IEE_OP_SET_XCHG: ++ { ++ pteval_t ret = iee_wrapper_set_xchg(pArgs); ++ va_end(pArgs); ++ return (u64)ret; ++ } ++ case IEE_OP_SET_CRED_ATOP_USAGE: ++ { ++ u64 ret = iee_wrapper_set_cred_atomic_op_usage(pArgs); ++ va_end(pArgs); ++ return ret; ++ } ++ case IEE_READ_TOKEN_STACK: ++ { ++ u64 ret = iee_wrapper_read_token_stack(pArgs); ++ va_end(pArgs); ++ return ret; ++ } ++#ifdef CONFIG_KOI ++ case IEE_READ_KOI_STACK: ++ { ++ u64 ret = iee_wrapper_read_koi_stack(pArgs); ++ va_end(pArgs); ++ return ret; ++ } ++ case IEE_READ_TOKEN_TTBR1: ++ { ++ u64 ret = iee_wrapper_read_token_ttbr1(pArgs); ++ va_end(pArgs); ++ return ret; ++ } ++ case IEE_READ_KOI_KERNEL_STACK: ++ { ++ u64 ret = iee_wrapper_read_koi_kernel_stack(pArgs); ++ va_end(pArgs); ++ return ret; ++ } ++ case IEE_READ_KOI_STACK_BASE: ++ { ++ u64 ret = iee_wrapper_read_koi_stack_base(pArgs); ++ va_end(pArgs); ++ return ret; ++ } ++#endif ++ default: ++ { ++ #ifndef CONFIG_KOI ++ if((flag < IEE_WRITE_IN_BYTE) | (flag > IEE_OP_SET_CRED_UCOUNTS)) ++ panic("Invalid iee flag.\n"); ++ #else ++ if((flag < IEE_WRITE_IN_BYTE) | (flag > IEE_WRITE_KOI_STACK_BASE)) ++ panic("Invalid iee flag.\n"); ++ #endif ++ iee_wrappers[flag](pArgs); ++ break; ++ } ++ } ++ ++ va_end(pArgs); ++ return 0; ++} ++ ++#ifdef CONFIG_KOI ++unsigned long __iee_code _iee_read_koi_stack(struct task_struct *tsk) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ return (unsigned long)token->koi_stack; ++} ++ ++void __iee_code _iee_write_koi_stack(struct task_struct *tsk, unsigned long koi_stack) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->koi_stack = koi_stack; ++} ++ ++unsigned long __iee_code _iee_read_token_ttbr1(struct task_struct *tsk) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ return token->current_ttbr1; ++} ++ ++void __iee_code _iee_write_token_ttbr1(struct task_struct *tsk, unsigned long current_ttbr1) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->current_ttbr1 = current_ttbr1; ++} ++ ++unsigned long __iee_code _iee_read_koi_kernel_stack(struct task_struct *tsk) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ return token->koi_kernel_stack; ++} ++ ++void __iee_code _iee_write_koi_kernel_stack(struct task_struct *tsk, unsigned long kernel_stack) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->koi_kernel_stack = kernel_stack; ++} ++ ++unsigned long __iee_code _iee_read_koi_stack_base(struct task_struct *tsk) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ return (unsigned long)token->koi_stack_base; ++} ++ ++void __iee_code _iee_write_koi_stack_base(struct task_struct *tsk, unsigned long koi_stack_base) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->koi_stack_base = koi_stack_base; ++} ++#endif ++ ++// Protect the __entry_task. ++__attribute__((aligned(PAGE_SIZE))) DECLARE_PER_CPU(struct task_struct *[PAGE_SIZE/sizeof(struct task_struct *)], __entry_task); ++void __iee_code _iee_write_entry_task(struct task_struct *tsk) ++{ ++ // Add check of tsk. ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ ++ unsigned long flags; ++ unsigned long res; ++ struct task_struct **entry_addr; ++ local_irq_save(flags); ++ asm volatile("at s1e1r, %0"::"r"(token)); ++ isb(); ++ res = read_sysreg(par_el1); ++ local_irq_restore(flags); ++ ++ // If it is logical map, that means it is not a token. ++ if(__phys_to_iee(res & PTE_ADDR_MASK) == (((unsigned long)token) & PTE_ADDR_MASK)) ++ panic("Trying to forge a token.\n"); ++ ++ if(!token->valid) ++ panic("Trying to write a wrong task into __entry_task.\n"); ++ entry_addr = (struct task_struct **)__phys_to_iee(__pa(SHIFT_PERCPU_PTR(__entry_task,__kern_my_cpu_offset()))); ++ *entry_addr = tsk; ++} ++ ++unsigned long __iee_code _iee_read_token_stack(struct task_struct *tsk) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ return (unsigned long)token->iee_stack; ++} ++ ++void __iee_code _iee_free_token(struct task_struct *tsk) ++{ ++ _iee_memset(tsk, 0, sizeof(struct task_token)); ++} ++ ++#ifdef CONFIG_KOI ++extern unsigned long koi_swapper_ttbr1; ++#endif ++void __iee_code _iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->kernel_stack = kernel_stack; ++ token->iee_stack = iee_stack; ++ token->valid = true; ++#ifdef CONFIG_KOI ++ token->koi_kernel_stack = NULL; ++ token->koi_stack = NULL; ++ token->koi_stack_base = NULL; ++ token->current_ttbr1 = 0; ++#endif ++} ++ ++void __iee_code _iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->mm = mm; ++} ++ ++void __iee_code _iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) ++{ ++ struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ token->pgd = pgd; ++} ++ ++void __iee_code _iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr) ++{ ++ pptr = (freeptr_t *)__phys_to_iee(__pa(pptr)); ++ *pptr = ptr; ++} ++ ++#pragma GCC push_options ++#pragma GCC optimize("O0") ++void __iee_code _iee_memset(void *ptr, int data, size_t n) ++{ ++ char *_ptr = (char *)__phys_to_iee(__pa(ptr)); ++ ++ while (n--) ++ *_ptr++ = data; ++} ++ ++void __iee_code _iee_memcpy(void *dst, void *src, size_t n) ++{ ++ char *_dst = (char *)__phys_to_iee(__pa(dst)); ++ char *_src = (char *)src; ++ ++ while(n--) ++ *_dst++ = *_src++; ++} ++#pragma GCC pop_options ++ ++void __iee_code _iee_set_track(struct track *ptr, struct track *data) ++{ ++ _iee_memcpy(ptr, data, sizeof(struct track)); ++} ++ ++void __iee_code _iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) ++{ ++ if(cred == &init_cred) ++ cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); ++ else ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ #ifdef CONFIG_CREDP ++ *((struct rcu_head **)(&(cred->rcu.func))) = rcu; ++ #endif ++} ++ ++void __iee_code _iee_set_cred_security(struct cred *cred, void *security) ++{ ++ if(cred == &init_cred) ++ cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); ++ else ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->security = security; ++} ++ ++bool __iee_code _iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ switch (flag) ++ { ++ case AT_ADD: { ++ atomic_long_add(nr, &cred->usage); ++ return 0; ++ } ++ case AT_INC_NOT_ZERO: { ++ return atomic_long_inc_not_zero(&cred->usage); ++ } ++ case AT_SUB_AND_TEST: { ++ return atomic_long_sub_and_test(nr, &cred->usage); ++ } ++ } ++ return 0; ++} ++ ++void __iee_code _iee_set_cred_atomic_set_usage(struct cred *cred, int i) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ atomic_long_set(&cred->usage,i); ++} ++ ++void __iee_code _iee_set_cred_non_rcu(struct cred *cred, int non_rcu) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->non_rcu = non_rcu; ++} ++ ++void __iee_code _iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->session_keyring = session_keyring; ++} ++ ++void __iee_code _iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->process_keyring = process_keyring; ++} ++ ++void __iee_code _iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->thread_keyring = thread_keyring; ++} ++ ++void __iee_code _iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->request_key_auth = request_key_auth; ++} ++ ++void __iee_code _iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->jit_keyring = jit_keyring; ++} ++ ++void __iee_code _iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->cap_inheritable = cap_inheritable; ++} ++ ++void __iee_code _iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->cap_permitted = cap_permitted; ++} ++ ++void __iee_code _iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->cap_effective = cap_effective; ++} ++ ++void __iee_code _iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->cap_bset = cap_bset; ++} ++ ++void __iee_code _iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->cap_ambient = cap_ambient; ++} ++ ++void __iee_code _iee_set_cred_securebits(struct cred *cred, unsigned securebits) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->securebits = securebits; ++} ++ ++void __iee_code _iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->group_info = group_info; ++} ++ ++void __iee_code _iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->ucounts = ucounts; ++} ++ ++void __iee_code _iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->user_ns = user_ns; ++} ++ ++void __iee_code _iee_set_cred_user(struct cred *cred, struct user_struct *user) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->user = user; ++} ++ ++void __iee_code _iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->fsgid = fsgid; ++} ++ ++void __iee_code _iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->fsuid = fsuid; ++} ++ ++void __iee_code _iee_set_cred_egid(struct cred *cred, kgid_t egid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->egid = egid; ++} ++ ++void __iee_code _iee_set_cred_euid(struct cred *cred, kuid_t euid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->euid = euid; ++} ++ ++void __iee_code _iee_set_cred_sgid(struct cred *cred, kgid_t sgid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->sgid = sgid; ++} ++ ++void __iee_code _iee_set_cred_suid(struct cred *cred, kuid_t suid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->suid = suid; ++} ++ ++void __iee_code _iee_copy_cred(struct cred *old, struct cred *new) ++{ ++ #ifdef CONFIG_CREDP ++ struct rcu_head *rcu = (struct rcu_head *)(new->rcu.func); ++ struct cred *_new = (struct cred *)__phys_to_iee(__pa(new)); ++ _iee_memcpy(new,old,sizeof(struct cred)); ++ *(struct rcu_head **)(&(_new->rcu.func)) = rcu; ++ *(struct rcu_head *)(_new->rcu.func) = *(struct rcu_head *)(old->rcu.func); ++ #endif ++} ++ ++void __iee_code _iee_set_cred_gid(struct cred *cred, kgid_t gid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->gid = gid; ++} ++ ++void __iee_code _iee_set_cred_uid(struct cred *cred, kuid_t uid) ++{ ++ cred = (struct cred *)__phys_to_iee(__pa(cred)); ++ cred->uid = uid; ++} ++ ++void __iee_code _iee_write_in_byte(void *ptr, __u64 data, int length) ++{ ++ ptr = (void *)__phys_to_iee(__pa(ptr)); ++ switch(length) { ++ case 8: { ++ *(__u64 *)ptr = data; ++ break; ++ } ++ case 4: { ++ *(__u32 *)ptr = (__u32)data; ++ break; ++ } ++ case 2: { ++ *(__u16 *)ptr = (__u16)data; ++ break; ++ } ++ case 1: { ++ *(__u8 *)ptr = (__u8)data; ++ break; ++ } ++ } ++} ++ ++pteval_t __iee_code _iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval) ++{ ++ pteval_t ret = xchg_relaxed((pteval_t *)(__phys_to_iee(__pa(ptep))), pteval); ++ return ret; ++} ++ ++pteval_t __iee_code _iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval) ++{ ++ pteval_t pteval = cmpxchg_relaxed((pteval_t *)(__phys_to_iee(__pa(ptep))), old_pteval, new_pteval); ++ return pteval; ++} ++ ++/* Check if addr is allocated in IEE page */ ++static inline bool check_addr_in_iee_valid(unsigned long addr) ++{ ++ unsigned long flags; ++ unsigned long res; ++ local_irq_save(flags); ++ asm volatile("at s1e1r, %0"::"r"(addr)); ++ isb(); ++ res = read_sysreg(par_el1); ++ local_irq_restore(flags); ++ ++ // If it is not logical map, that means it is a token. ++ if(__phys_to_iee(res & PTE_ADDR_MASK) != addr) ++ return false; ++ ++ return !(res & 0x1); ++} ++ ++void __iee_code _iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) ++{ ++ WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); ++} ++ ++void __iee_code _iee_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) ++{ ++ if(!(pgd_val(pgd) & PMD_SECT_VALID)) ++ { ++ WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); ++ return; ++ } ++ ++ if ((pgd_val(pgd) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pgd_to_phys(pgd)))) ++ panic("You can't use non-iee-pgtable\n"); ++ ++ if((pgdp >= pgd_offset_pgd((pgd_t *)swapper_pg_dir, PAGE_OFFSET + BIT(vabits_actual - 2))) && (pgdp < pgd_offset_pgd((pgd_t *)swapper_pg_dir, PAGE_OFFSET + BIT(vabits_actual - 1))) && !(pgd_val(pgd) & PGD_APT)) ++ panic("Set IEE pgd U page.\n"); ++ ++ WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); ++} ++ ++void __iee_code _iee_set_p4d(p4d_t *p4dp, p4d_t p4d) ++{ ++ if(!(p4d_val(p4d) & PMD_SECT_VALID)) ++ { ++ WRITE_ONCE(*((p4d_t *)(__phys_to_iee(__pa(p4dp)))), p4d); ++ return; ++ } ++ ++ if ((p4d_val(p4d) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__p4d_to_phys(p4d)))) ++ panic("You can't use non-iee-pgtable\n"); ++ ++ WRITE_ONCE(*((p4d_t *)(__phys_to_iee(__pa(p4dp)))), p4d); ++} ++ ++void __iee_code _iee_set_pud(pud_t *pudp, pud_t pud) ++{ ++ if(!(pud_val(pud) & PMD_SECT_VALID)) ++ { ++ WRITE_ONCE(*((pud_t *)(__phys_to_iee(__pa(pudp)))), pud); ++ return; ++ } ++ ++ if ((pud_val(pud) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pud_to_phys(pud)))) ++ panic("You can't use non-iee-pgtable\n"); ++ ++ WRITE_ONCE(*((pud_t *)(__phys_to_iee(__pa(pudp)))), pud); ++} ++ ++// Return true if the modify does not break DEP. ++static inline bool check_pmd_dep(char *addr, pmd_t pmd) ++{ ++ // DEP for kernel code and readonly data ++ // _text: .text start addr, __init_begin: .rodata end addr ++ if (addr >= _stext && addr < _etext) ++ { ++ if ((PTE_WRITE & pmd_val(pmd)) || // DBM == 1 --> writable ++ !(PTE_RDONLY & pmd_val(pmd))) // DBM == 0 && AP[2] = 0 --> writable ++ { ++ panic("Can't make kernel's text/readonly page as writable!\n" ++ "addr = 0x%16llx, pmd_val = 0x%16llx", ++ (u64)addr, pmd_val(pmd)); ++ } ++ } ++ return true; ++} ++ ++// Return true if the pmd table is a part of kernel page table. ++// TODO : Optimize to get lower overhead. ++static inline bool is_kernel_pmd_table(pmd_t *pmdp, pmd_t pmd) ++{ ++ int i = 0,j = 0; ++ for(i = 0; i < PAGE_SIZE/sizeof(pgd_t); i++) ++ { ++ pgd_t *pgdp = (pgd_t *)swapper_pg_dir + i; ++ if((pgd_val(*pgdp) & PMD_SECT_VALID) && (pgd_val(*pgdp) & PMD_TABLE_BIT)) ++ { ++ for(j = 0; j < PAGE_SIZE/sizeof(pud_t); j++) ++ { ++ pud_t *pudp = (pud_t *)__va(__pgd_to_phys(*pgdp)) + i; ++ if((pud_val(*pudp) & PMD_SECT_VALID) && (pud_val(*pudp) & PMD_TABLE_BIT)) ++ { ++ pmd_t *current_pmdp = __va(__pud_to_phys(*pudp)); ++ if((unsigned long)current_pmdp == ((unsigned long)pmdp & PAGE_MASK)) ++ return true; ++ } ++ } ++ } ++ } ++ return false; ++} ++ ++// Return true if it is mapped to a physical range containing IEE page. ++// TODO : Optimize to get lower overhead. ++static inline bool check_addr_range_in_iee_valid(pmd_t pmd) ++{ ++ int i = 0; ++ unsigned long addr = __phys_to_iee(__pmd_to_phys(pmd)); ++ for(i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++) ++ { ++ if(check_addr_in_iee_valid(addr + PAGE_SIZE * i)) ++ return true; ++ } ++ return false; ++} ++ ++void __iee_code _iee_set_pmd(pmd_t *pmdp, pmd_t pmd) ++{ ++ char * addr = (char *)__phys_to_kimg(__pmd_to_phys(pmd)); ++ ++ if(!(pmd_val(pmd) & PMD_SECT_VALID)) ++ { ++ WRITE_ONCE(*((pmd_t *)(__phys_to_iee(__pa(pmdp)))), pmd); ++ return; ++ } ++ ++ // Check if the pte table is legally allocated. ++ if ((pmd_val(pmd) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pmd_to_phys(pmd)))) ++ panic("You can't use non-iee-pgtable\n"); ++ ++ // Avoid mapping a huge pmd as U page. ++ if(!(pmd_val(pmd) & PMD_TABLE_BIT) && (pmd_val(pmd) & PMD_SECT_USER) && is_kernel_pmd_table(pmdp, pmd)) ++ panic("Set a block descriptor in kernel space U page.\n"); ++ ++ // Avoid mapping a huge pmd to IEE physical page. ++ if(!(pmd_val(pmd) & PMD_TABLE_BIT) && check_addr_range_in_iee_valid(pmd)) ++ panic("Mapping IEE physical page to a huge pmd.\n"); ++ ++ if(!check_pmd_dep(addr, pmd)) ++ return; ++ ++ WRITE_ONCE(*((pmd_t *)(__phys_to_iee(__pa(pmdp)))), pmd); ++} ++ ++// Return true if the pte table is a part of kernel page table. ++// TODO : Optimize to get lower overhead. ++static inline bool is_kernel_pte_table(pte_t *ptep, pte_t pte) ++{ ++ return false; ++} ++ ++// Return true if it does not change the privilage or add new U page in kernel. ++static inline bool check_privilage_safe(pte_t *ptep, pte_t pte) ++{ ++ if(!(pte_val(pte) & PTE_VALID)) ++ return true; ++ ++ if((pte_val(*ptep) & PTE_VALID)) ++ { ++ if((pte_val(*ptep) & PTE_USER) != (pte_val(pte) & PTE_USER)) ++ panic("Incorrectly change privilage.\n"); ++ } ++ else ++ { ++ if((pte_val(pte) & PTE_USER) && is_kernel_pte_table(ptep, pte)) ++ panic("Add new U page in kernel space.\n"); ++ } ++ return true; ++} ++ ++// TODO : When adding a new executable page, check it for DEP. ++static inline bool safely_adding_new_exec_page(pte_t *ptep, pte_t pte) ++{ ++ return true; ++} ++ ++// Return true if it is only changing prot of a pte. ++static inline bool is_changing_pte_prot(pte_t *ptep, pte_t pte) ++{ ++ if(((pte_val(*ptep) ^ pte_val(pte)) & PTE_ADDR_MASK) == 0) ++ return true; ++ else ++ return false; ++} ++ ++// Return true if the modify does not break DEP. ++static inline bool check_pte_dep(char *addr, pte_t pte) ++{ ++ // DEP for kernel code and readonly data ++ // _text: .text start addr, __init_begin: .rodata end addr ++ if (addr >= _stext && addr < _etext) ++ { ++ if ((PTE_WRITE & pte_val(pte)) // DBM == 1 --> writable ++ || !(PTE_RDONLY & pte_val(pte))) // DBM == 0 && AP[2] = 0 --> writable ++ { ++ panic("Can't make kernel's text/readonly page as writable!\n" ++ "addr = 0x%16llx, pte_val = 0x%16llx", ++ (u64)addr, pte_val(pte)); ++ } ++ } ++ return true; ++} ++ ++void __iee_code _iee_set_pte(pte_t *ptep, pte_t pte) ++{ ++ char * addr = (char *)__phys_to_kimg(__pte_to_phys(pte)); ++ ++ if(!(pte_val(pte) & PTE_VALID)) ++ { ++ WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); ++ return; ++ } ++ ++ // Avoid modify privilage unsafely. ++ if(!check_privilage_safe(ptep, pte)) ++ panic("You are modify privilage unsafely.\n"); ++ ++ // Avoid mapping a new executable page. ++ if(!safely_adding_new_exec_page(ptep, pte)) ++ panic("You are adding a new executable page unsafely.\n"); ++ ++ // Avoid mapping a new VA to IEE PA. ++ if(!is_changing_pte_prot(ptep, pte) && ++ check_addr_in_iee_valid(__phys_to_iee(__pte_to_phys(pte)))) ++ panic("You are remmaping IEE page to other VA.\n"); ++ ++ // Avoid mapping a writable VA to kernel code PA. ++ if(!check_pte_dep(addr, pte)) ++ return; ++ ++ WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); ++} ++ ++// Return true if it only sets U page and modify NG. ++static inline bool is_setting_upage(pte_t *ptep, pte_t pte) ++{ ++ if(((pte_val(*ptep) ^ pte_val(pte)) & ~(PTE_USER | PTE_NG)) != 0) ++ panic("Incorrectly setting U page.\n"); ++ if((pte_val(pte) & PTE_USER) != PTE_USER) ++ panic("Using error interface to set P page.\n"); ++ return true; ++} ++ ++void __iee_code _iee_set_pte_upage(pte_t *ptep, pte_t pte) ++{ ++ // Check if it only change the prot. ++ if(!is_setting_upage(ptep,pte)) ++ panic("Incorrectly setting U page.\n"); ++ ++ WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); ++} ++ ++// Return true if it only sets P page and modify NG. ++static inline bool is_setting_ppage(pte_t *ptep, pte_t pte) ++{ ++ if(((pte_val(*ptep) ^ pte_val(pte)) & ~(PTE_USER | PTE_NG)) != 0) ++ panic("Incorrectly setting P page.\n"); ++ if((pte_val(pte) & PTE_USER) != 0) ++ panic("Using error interface to set U page.\n"); ++ return true; ++} ++ ++void __iee_code _iee_set_pte_ppage(pte_t *ptep, pte_t pte) ++{ ++ // Check if it only change the prot. ++ if(!is_setting_ppage(ptep,pte)) ++ panic("Incorrectly setting P page.\n"); ++ ++ WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); ++} ++ ++void __iee_code _iee_set_bm_pte(pte_t *ptep, pte_t pte) ++{ ++ WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa_symbol(ptep)))), pte); ++} ++ ++/* Data in iee_si_base is visible to all pgd while iee_si_data is private. */ ++unsigned long iee_base_idmap_pg_dir __iee_si_data; ++unsigned long iee_base_reserved_pg_dir __iee_si_data; ++unsigned long iee_base__bp_harden_el1_vectors __iee_si_data; ++bool iee_init_done __iee_si_data; ++unsigned long iee_si_tcr __iee_si_data; ++ ++static u64 __iee_si_code inline iee_si_mask(unsigned long mask, unsigned long new_val, unsigned long old_val) ++{ ++ return (new_val & mask) | (old_val & ~mask); ++} ++/* ++ * handler function for requests of executing sensitive instrutions. ++ */ ++u64 __iee_si_code iee_si_handler(int flag, ...) ++{ ++ va_list pArgs; ++ u64 old_val, new_val; ++ ++ // BUG_ON(flag > IEE_WRITE_MDSCR); ++ va_start(pArgs, flag); ++ switch (flag) { ++ case IEE_SI_TEST: ++ break; ++ case IEE_WRITE_SCTLR: { ++ old_val = read_sysreg(sctlr_el1); ++ new_val = va_arg(pArgs, u64); ++ new_val = iee_si_mask(IEE_SCTLR_MASK, new_val, old_val); ++ write_sysreg(new_val, sctlr_el1); ++ break; ++ } ++ case IEE_WRITE_TTBR0: ++ case IEE_CONTEXT_SWITCH: { ++ u64 new_asid, new_phys, old_phys, token_phys; ++ struct task_struct *tsk; ++ struct task_token *token; ++ new_val = va_arg(pArgs, u64); ++ new_phys = (new_val & PAGE_MASK) & ~TTBR_ASID_MASK; ++ new_asid = new_val >> 48; ++ ++ // Check ASID first ++ if (new_phys == iee_base_reserved_pg_dir){ ++ if (new_asid != 1) ++ panic("IEE SI warning: reserved_pg_dir ASID invalid: %llx:%llx", new_asid, new_val); ++ } ++ // Already reserved asid 1 for iee rwx gate. ++ else if (new_asid == 0){ ++ new_val |= FIELD_PREP(TTBR_ASID_MASK, 1); ++ printk("IEE SI: Modify ASID of %llx to 1.", new_val); ++ } ++ // TO DO: operations to protect idmap_pg_dir ++ else if (new_phys == iee_base_idmap_pg_dir) ++ { ++ // printk("IEE SI: switch to idmap_pg_dir.); ++ } ++ else if (new_asid % 2 ==0) ++ panic("IEE SI warning: TTBR0 ASID invalid: %llx:%llx", new_asid, new_val); ++ ++ /* Skip verification if iee hasn't been initialized. */ ++ if (iee_init_done){ ++ // Verify current sp_el0 with iee token info ++ asm volatile("mrs %x0, sp_el0":"=r"(tsk)); ++ token = (struct task_token *)__phys_to_iee(__pa(tsk)); ++ ++ /* ++ * token->pgd != NULL means it is a user task, then we need to check whether current ttbr0 is correct. ++ */ ++ if (token->pgd){ ++ old_val = read_sysreg(ttbr0_el1); ++ // When TTBR0 is reserved_pg_dir then no checking is available. ++ if (old_val != iee_base_reserved_pg_dir){ ++ old_phys = (old_val & PAGE_MASK) & ~TTBR_ASID_MASK; ++ token_phys = __pa(token->pgd); ++ if (old_phys != token_phys) ++ panic("IEE SI warning: Pgd set error. old ttbr0:%lx, token ttbr0:%lx, token pgd:%lx", ++ (unsigned long)old_phys, (unsigned long)token_phys, (unsigned long)(token->pgd)); ++ } ++ } ++ } ++ // all checks are done. ++ write_sysreg(new_val, ttbr0_el1); ++ ++ // SET ASID in TTBR1 when context switch ++ if (flag == IEE_CONTEXT_SWITCH){ ++ new_val = (read_sysreg(ttbr1_el1) & ~TTBR_ASID_MASK) | FIELD_PREP(TTBR_ASID_MASK, new_asid-1); ++ write_sysreg(new_val, ttbr1_el1); ++ } ++ break; ++ } ++ case IEE_WRITE_VBAR: { ++ u64 el1_vector; ++ new_val = va_arg(pArgs, u64); ++ el1_vector = iee_base__bp_harden_el1_vectors; ++ if(new_val == el1_vector || new_val == el1_vector+SZ_2K || ++ new_val == el1_vector+SZ_2K*2 || new_val == el1_vector+SZ_2K*3) ++ write_sysreg(new_val, vbar_el1); ++ break; ++ } ++ case IEE_WRITE_TCR: { ++ old_val = read_sysreg(tcr_el1); ++ new_val = va_arg(pArgs, u64); ++ new_val = iee_si_mask(IEE_TCR_MASK, new_val, old_val); ++ write_sysreg(new_val, tcr_el1); ++ break; ++ } ++ case IEE_WRITE_MDSCR: { ++ old_val = read_sysreg(mdscr_el1); ++ new_val = va_arg(pArgs, u64); ++ new_val = iee_si_mask(IEE_MDSCR_MASK, new_val, old_val); ++ write_sysreg(new_val, mdscr_el1); ++ break; ++ } ++ } ++ va_end(pArgs); ++ return 0; ++} ++/* ++ * TODO: scan a page to check whether it contains sensitive instructions ++ * return 1 when finding sensitive inst, 0 on safe page. ++ */ ++int iee_si_scan_page(unsigned long addr); ++#endif +\ No newline at end of file +diff --git a/arch/arm64/kernel/koi/Makefile b/arch/arm64/kernel/koi/Makefile +new file mode 100644 +index 000000000000..9be8710b714a +--- /dev/null ++++ b/arch/arm64/kernel/koi/Makefile +@@ -0,0 +1 @@ ++obj-y += koi.o +\ No newline at end of file +diff --git a/arch/arm64/kernel/koi/koi.c b/arch/arm64/kernel/koi/koi.c +new file mode 100644 +index 000000000000..716ba16ab358 +--- /dev/null ++++ b/arch/arm64/kernel/koi/koi.c +@@ -0,0 +1,1327 @@ ++#include "asm/koi.h" ++#include "linux/compiler_attributes.h" ++#include "linux/compiler_types.h" ++#include "asm/barrier.h" ++#include "asm-generic/bug.h" ++#include "asm-generic/errno-base.h" ++#include "asm-generic/memory_model.h" ++#include "asm-generic/pgtable-nop4d.h" ++#include "asm-generic/rwonce.h" ++#include "asm/pgalloc.h" ++#include "asm/memory.h" ++#include "linux/bitfield.h" ++#include "linux/compiler.h" ++#include "linux/types.h" ++#include "linux/spinlock.h" ++#include "linux/spinlock_types.h" ++#include "linux/kernel.h" ++#include "linux/rculist.h" ++#include "linux/rcupdate.h" ++#include "linux/list.h" ++#include "asm/current.h" ++#include "linux/compiler_types.h" ++#include "asm-generic/barrier.h" ++#include "asm-generic/rwonce.h" ++#include "asm-generic/pgalloc.h" ++#include "asm/cpufeature.h" ++#include "asm/kvm_hyp.h" ++#include "asm/mmu.h" ++#include "asm/mmu_context.h" ++#include "asm/page-def.h" ++#include "asm/pgalloc.h" ++#include "asm/pgtable-hwdef.h" ++#include "asm/pgtable-types.h" ++#include "asm/pgtable.h" ++#include "asm/string.h" ++#include "asm/sysreg.h" ++#include "linux/bitfield.h" ++#include "linux/compiler.h" ++#include "linux/export.h" ++#include "linux/gfp.h" ++#include "linux/huge_mm.h" ++#include "linux/kallsyms.h" ++#include "linux/kconfig.h" ++#include "linux/kern_levels.h" ++#include "linux/kernel.h" ++#include "linux/list.h" ++#include "linux/lockdep.h" ++#include "linux/mm.h" ++#include "linux/mm_types.h" ++#include "linux/pgtable.h" ++#include "linux/printk.h" ++#include "linux/rculist.h" ++#include "linux/rcupdate.h" ++#include "linux/rmap.h" ++#include "linux/sched.h" ++#include "linux/stddef.h" ++#include "linux/string.h" ++#include "linux/swap.h" ++#include "linux/swapops.h" ++#include "linux/types.h" ++#include "linux/slab.h" ++#include "linux/string.h" ++#include "linux/hashtable.h" ++ ++#define __koi_code __section(".koi.text") ++#define __koi_data __section(".data..koi") ++ ++extern unsigned long __koi_code_start[]; ++extern unsigned long __koi_code_end[]; ++extern unsigned long __koi_data_start[]; ++extern unsigned long __koi_data_end[]; ++#ifdef CONFIG_IEE ++extern unsigned long __iee_si_base_start[]; ++extern unsigned long __iee_exec_entry_start[]; ++extern unsigned long __iee_exec_entry_end[]; ++#endif ++ ++__koi_data unsigned long koi_swapper_ttbr1 = 0; ++EXPORT_SYMBOL(koi_swapper_ttbr1); ++#define KOI_SWAPPER_MASK 0x0000fffffffffff0 ++ ++__attribute__((aligned(PAGE_SIZE))) ++DEFINE_PER_CPU(unsigned long[PAGE_SIZE / sizeof(unsigned long)], ++ koi_irq_current_ttbr1); ++EXPORT_SYMBOL(koi_irq_current_ttbr1); ++ ++extern void koi_switch_to_ko_stack(unsigned long stack_top); ++extern void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp); ++extern void koi_check_and_switch_context(struct mm_struct *mm); ++extern int koi_add_page_mapping(unsigned long dst, unsigned long src); ++extern unsigned long _iee_read_token_ttbr1(struct task_struct *tsk); ++/** ++*struct koi_mem_list - maintain a linked list of free memory in the kernel ++*@addr: stating address of this memory ++*@size: the size of the memory ++*@list: the head of the koi_mem_list ++*@rcu: for rcu ++*/ ++struct koi_mem_list { ++ unsigned long addr; ++ unsigned long size; ++ struct list_head list; ++ struct rcu_head rcu; ++}; ++//mapping parameter pointer to copy ++struct koi_addr_map { ++ unsigned long buffer_addr; ++ unsigned long orig_addr; ++ int offset; ++ struct hlist_node node; ++ struct rcu_head rcu; ++}; ++ ++DEFINE_HASHTABLE(koi_mem_htbl, HASH_TABLE_BIT); ++EXPORT_SYMBOL(koi_mem_htbl); ++DEFINE_SPINLOCK(koi_mem_htbl_spin_lock); ++EXPORT_SYMBOL(koi_mem_htbl_spin_lock); ++ ++EXPORT_SYMBOL(koi_do_switch_to_ko_stack); ++EXPORT_SYMBOL(koi_do_switch_to_kernel_stack); ++ ++extern unsigned long long iee_rw_gate(int flag, ...); ++ ++/** ++* koi_ttbr_ctor - return ttbr1 for the given driver module ++*/ ++unsigned long koi_ttbr_ctor(struct module *mod) ++{ ++ struct koi_mem_hash_node *ko; ++ struct mm_struct *ko_mm; ++ unsigned long ttbr1; ++ unsigned long asid; ++ int bkt; ++ rcu_read_lock(); ++ hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { ++ if (ko->mod == mod) { ++ ko_mm = ko->ko_mm; ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (!ko_mm) { ++ printk(KERN_ERR "cannot found module %s in koi_mem_htbl", ++ mod->name); ++ return 0; ++ } ++ asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); ++ asid &= TTBR_ASID_MASK; ++ ttbr1 = ko->ko_ttbr1 | asid; ++ // koi_check_and_switch_context(ko_mm); ++ // asid = ASID(ko_mm); ++ // ttbr1 = ko->ko_ttbr1 | FIELD_PREP(TTBR_ASID_MASK, asid); ++ return ttbr1; ++} ++EXPORT_SYMBOL(koi_ttbr_ctor); ++//release the hash node ++static __maybe_unused void koi_mem_hash_node_free(struct rcu_head *rcu) ++{ ++ struct koi_mem_hash_node *node = ++ container_of(rcu, struct koi_mem_hash_node, rcu); ++ kfree(node); ++} ++//release free memory linked list nodes ++static void koi_mem_node_free(struct rcu_head *rcu) ++{ ++ struct koi_mem_list *mem_node = ++ container_of(rcu, struct koi_mem_list, rcu); ++ kfree(mem_node); ++} ++//release the node in koi_addr_map ++static void koi_addr_map_node_free(struct rcu_head *rcu) ++{ ++ struct koi_addr_map *addr_map_node = ++ container_of(rcu, struct koi_addr_map, rcu); ++ kfree(addr_map_node); ++} ++ ++#ifndef CONFIG_IEE ++/* ++ * This function is used to switch to ko's pgtable. ++ */ ++__koi_code noinline unsigned long koi_do_switch_to_ko_pgtbl(void) ++{ ++ struct koi_mem_hash_node *ko; ++ // struct mm_struct *ko_mm; ++ unsigned long addr; ++ unsigned long ttbr1, asid; ++ unsigned long *ptr; ++ struct task_token *token_addr = ++ (struct task_token *)((unsigned long)current + ++ (unsigned long)koi_offset); ++ int bkt; ++ asm volatile(" mrs %0, elr_el1\n" : "=r"(addr)); ++ ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, __kern_my_cpu_offset()); ++ rcu_read_lock(); ++ hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { ++ if (ko->mod->init_layout.base != NULL) { ++ if (addr >= (unsigned long)ko->mod->init_layout.base && ++ addr < (unsigned long)(ko->mod->init_layout.base + ++ ko->mod->init_layout.size)) { ++ if (token_addr->current_ttbr1 == ko->ko_ttbr1 || ++ *ptr == ko->ko_ttbr1) { ++ // ko_mm = ko->ko_mm; ++ // koi_check_and_switch_context(ko_mm); ++ // asid = ASID(ko_mm); ++ // ttbr1 = ko->ko_ttbr1; ++ // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); ++ asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); ++ asid &= TTBR_ASID_MASK; ++ ttbr1 = ko->ko_ttbr1 | asid; ++ rcu_read_unlock(); ++ return ttbr1; ++ } ++ rcu_read_unlock(); ++ return 0; ++ } ++ } ++ if (addr >= (unsigned long)ko->mod->core_layout.base && ++ addr < (unsigned long)ko->mod->core_layout.base + ++ ko->mod->core_layout.size) { ++ if (token_addr->current_ttbr1 == ko->ko_ttbr1 || ++ *ptr == ko->ko_ttbr1) { ++ // ko_mm = ko->ko_mm; ++ // koi_check_and_switch_context(ko_mm); ++ // asid = ASID(ko_mm); ++ // ttbr1 = ko->ko_ttbr1; ++ // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); ++ asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); ++ asid &= TTBR_ASID_MASK; ++ ttbr1 = ko->ko_ttbr1 | asid; ++ rcu_read_unlock(); ++ return ttbr1; ++ } ++ rcu_read_unlock(); ++ return 0; ++ } ++ } ++ rcu_read_unlock(); ++ return 0; ++} ++/** ++* koi_do_switch_to_kernel_pgtbl - switch to kernel pagetable ++*/ ++__koi_code noinline int koi_do_switch_to_kernel_pgtbl(void) ++{ ++ unsigned long curr_ttbr1, asid; ++ // if (!cpu_online(smp_processor_id())) ++ // return 0; ++ asm volatile(" mrs %0, ttbr1_el1\n" : "=r"(curr_ttbr1)); ++ if ((curr_ttbr1 & KOI_SWAPPER_MASK) == ++ (koi_swapper_ttbr1 & KOI_SWAPPER_MASK)) { ++ return 0; ++ } ++ if (((curr_ttbr1 & TTBR_ASID_MASK) >> 48) <= 1) { ++ return 0; ++ } ++ asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); ++ asid &= ~USER_ASID_FLAG; ++ asid &= TTBR_ASID_MASK; ++ write_sysreg(koi_swapper_ttbr1 | asid, ttbr1_el1); ++ isb(); ++ asm volatile(ALTERNATIVE("nop; nop; nop", "ic iallu; dsb nsh; isb", ++ ARM64_WORKAROUND_CAVIUM_27456)); ++ return 1; ++} ++#else ++__koi_code noinline unsigned long koi_do_switch_to_ko_pgtbl(void) ++{ ++ struct koi_mem_hash_node *ko; ++ struct mm_struct *ko_mm; ++ unsigned long addr, ttbr1, asid, pan_flag, current_ttbr1; ++ unsigned long *ptr; ++ int bkt; ++ asm volatile("mrs %0, pan\n" ++ "msr pan, 0x0\n" ++ : "=r"(pan_flag) ++ :); ++ current_ttbr1 = _iee_read_token_ttbr1(current); ++ asm volatile("msr pan, %0\n" : : "r"(pan_flag)); ++ ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, __kern_my_cpu_offset()); ++ if (current_ttbr1 == 0 && *ptr == 0) ++ return 0; ++ asm volatile(" mrs %0, elr_el1\n" : "=r"(addr)); ++ rcu_read_lock(); ++ hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { ++ if (ko->mod->init_layout.base != NULL) { ++ if (addr >= (unsigned long)ko->mod->init_layout.base && ++ addr < (unsigned long)(ko->mod->init_layout.base + ++ ko->mod->init_layout.size)) { ++ rcu_read_unlock(); ++ if (current_ttbr1 == ko->ko_ttbr1 || *ptr == ko->ko_ttbr1) { ++ // ko_mm = ko->ko_mm; ++ // koi_check_and_switch_context(ko_mm); ++ // asid = ASID(ko_mm); ++ // ttbr1 = ko->ko_ttbr1; ++ // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, ++ // asid); ++ return ko->ko_ttbr1; ++ } ++ return 0; ++ } ++ } ++ if (addr >= (unsigned long)ko->mod->core_layout.base && ++ addr < (unsigned long)ko->mod->core_layout.base + ++ ko->mod->core_layout.size) { ++ rcu_read_unlock(); ++ if (current_ttbr1 == ko->ko_ttbr1 || *ptr == ko->ko_ttbr1) { ++ // ko_mm = ko->ko_mm; ++ // koi_check_and_switch_context(ko_mm); ++ // asid = ASID(ko_mm); ++ // ttbr1 = ko->ko_ttbr1; ++ // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); ++ return ko->ko_ttbr1; ++ } ++ return 0; ++ } ++ } ++ rcu_read_unlock(); ++ return 0; ++} ++ ++__koi_code noinline int koi_do_switch_to_kernel_pgtbl(void) ++{ ++ unsigned long curr_ttbr1; ++ // if (!cpu_online(smp_processor_id())) ++ // return 0; ++ asm volatile(" mrs %0, ttbr1_el1\n" : "=r"(curr_ttbr1)); ++ if ((curr_ttbr1 & KOI_SWAPPER_MASK) == ++ (koi_swapper_ttbr1 & KOI_SWAPPER_MASK)) { ++ return 0; ++ } ++ if (((curr_ttbr1 & TTBR_ASID_MASK) >> 48) <= 1) { ++ return 0; ++ } ++ iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); ++ return 1; ++} ++#endif ++/** ++* koi_save_ttbr - save ttbr of each driver module ++* @mod: driver module ++* @pgdp:pointer to driver module top page table,pgd ++*/ ++static void koi_save_ttbr(struct module *mod, pgd_t *pgdp, ++ struct koi_mem_hash_node *node) ++{ ++ phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); ++ if (system_supports_cnp()) ++ ttbr1 |= TTBR_CNP_BIT; ++ node->ko_ttbr1 = ttbr1; ++} ++/** ++*kio_normal_page - to obtain the pointer of the corresponding struct page structure ++*from a given page table entry(pte) ++*/ ++struct page *koi_normal_page(pte_t pte) ++{ ++ unsigned long pfn = pte_pfn(pte); ++ ++ if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { ++ if (likely(!pte_special(pte))) ++ goto check_pfn; ++ if (is_zero_pfn(pfn)) { ++ printk(KERN_ERR "zero pfn found! pte=0x%16lx\n", pte); ++ return NULL; ++ } ++ if (pte_devmap(pte)) { ++ printk(KERN_ERR "pte for dev found! pte=0x%16lx\n", ++ pte); ++ return NULL; ++ } ++ return NULL; ++ } ++ ++check_pfn: ++ return pfn_to_page(pfn); ++} ++ ++/** ++ * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page ++ * is required to copy this pte. ++*/ ++static inline int koi_copy_present_pte(pte_t *dst_pte, pte_t *src_pte, ++ unsigned long addr, ++ struct page **prealloc) ++{ ++ pte_t pte = *src_pte; ++ struct page *page; ++ ++ page = koi_normal_page(pte); ++ if (!page) { ++ printk(KERN_ERR "pte_page unavailable. Impossible.....\n"); ++ return -1; ++ } ++ ++ set_pte(dst_pte, pte); ++ return 0; ++} ++/** ++* copy huge pmd from kernel space to driver space. ++*/ ++static int koi_copy_huge_pmd(struct mm_struct *ko_mm, pmd_t *dst_pmd, ++ pmd_t *src_pmd, unsigned long addr) ++{ ++ spinlock_t *src_ptl; ++ pmd_t pmd; ++ int ret = -ENOMEM; ++ ++ src_ptl = pmd_lockptr(&init_mm, src_pmd); ++ spin_lock_bh(src_ptl); ++ ++ ret = -EAGAIN; ++ pmd = *src_pmd; ++ ++ set_pte((pte_t *)dst_pmd, pmd_pte(pmd)); ++ ret = 0; ++ spin_unlock_bh(src_ptl); ++ return ret; ++} ++ ++int __koi_pte_alloc(struct mm_struct *mm, pmd_t *pmd) ++{ ++ spinlock_t *ptl; ++ pgtable_t new = pte_alloc_one(mm); ++ if (!new) ++ return -ENOMEM; ++ ++ /* ++ * Ensure all pte setup (eg. pte page lock and page clearing) are ++ * visible before the pte is made visible to other CPUs by being ++ * put into page tables. ++ * ++ * The other side of the story is the pointer chasing in the page ++ * table walking code (when walking the page table without locking; ++ * ie. most of the time). Fortunately, these data accesses consist ++ * of a chain of data-dependent loads, meaning most CPUs (alpha ++ * being the notable exception) will already guarantee loads are ++ * seen in-order. See the alpha page table accessors for the ++ * smp_rmb() barriers in page table walking code. ++ */ ++ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ++ ++ ptl = pmd_lockptr(mm, pmd); ++ spin_lock_bh(ptl); ++ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ ++ #ifdef CONFIG_PTP ++ pte_t *pte = (pte_t *)page_address(new); ++ unsigned long iee_addr = __phys_to_iee(__pa(pte)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)pte); ++ #endif ++ mm_inc_nr_ptes(mm); ++ pmd_populate(mm, pmd, new); ++ new = NULL; ++ } ++ spin_unlock_bh(ptl); ++ if (new) ++ pte_free(mm, new); ++ return 0; ++} ++ ++#define koi_pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __koi_pte_alloc(mm, pmd)) ++ ++#define koi_pte_offset_map_lock(mm, pmd, address, ptlp) \ ++({ \ ++ spinlock_t *__ptl = pte_lockptr(mm, pmd); \ ++ pte_t *__pte = pte_offset_map(pmd, address); \ ++ *(ptlp) = __ptl; \ ++ spin_lock_bh(__ptl); \ ++ __pte; \ ++}) ++ ++#define koi_pte_alloc_map_lock(mm, pmd, address, ptlp) \ ++ (koi_pte_alloc(mm, pmd) ? \ ++ NULL : koi_pte_offset_map_lock(mm, pmd, address, ptlp)) ++ ++/** ++*koi_copy_pte_range - copy pte from kernel space to driver space ++*/ ++static int koi_copy_pte_range(struct mm_struct *ko_mm, pmd_t *dst_pmd, ++ pmd_t *src_pmd, unsigned long addr, ++ unsigned long end) ++{ ++ pte_t *src_pte, *dst_pte; ++ spinlock_t *src_ptl, *dst_ptl; ++ int ret = 0; ++ struct page *prealloc = NULL; ++again: ++ dst_pte = koi_pte_alloc_map_lock(ko_mm, dst_pmd, addr, &dst_ptl); ++ if (!dst_pte) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ src_pte = pte_offset_map(src_pmd, addr); ++ src_ptl = pte_lockptr(&init_mm, src_pmd); ++ spin_lock_bh(src_ptl); ++ arch_enter_lazy_mmu_mode(); ++ ++ do { ++ if (pte_none(*src_pte)) ++ continue; ++ if (unlikely(!pte_present(*src_pte))) { ++ continue; ++ } ++ /* koi_copy_present_pte() will clear `*prealloc` if consumed */ ++ ret = koi_copy_present_pte(dst_pte, src_pte, addr, &prealloc); ++ if (unlikely(ret == -EAGAIN)) ++ break; ++ if (unlikely(prealloc)) { ++ put_page(prealloc); ++ prealloc = NULL; ++ } ++ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); ++ arch_leave_lazy_mmu_mode(); ++ spin_unlock_bh(src_ptl); ++ spin_unlock_bh(dst_ptl); ++ ++ if (ret) { ++ WARN_ON_ONCE(ret != -EAGAIN); ++ ret = 0; ++ } ++ if (addr != end) ++ goto again; ++out: ++ if (unlikely(prealloc)) ++ put_page(prealloc); ++ return ret; ++} ++ ++int __koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) ++{ ++ spinlock_t *ptl; ++ pmd_t *new = pmd_alloc_one(mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ ptl = pud_lockptr(mm, pud); ++ spin_lock_bh(ptl); ++ if (!pud_present(*pud)) { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(new)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)new); ++ #endif ++ mm_inc_nr_pmds(mm); ++ pud_populate(mm, pud, new); ++ } else /* Another has populated it */ ++ pmd_free(mm, new); ++ spin_unlock_bh(ptl); ++ return 0; ++} ++ ++static inline pmd_t *koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) ++{ ++ return (unlikely(pud_none(*pud)) && __koi_pmd_alloc(mm, pud, address))? ++ NULL: pmd_offset(pud, address); ++} ++ ++/** ++*kio_copy_pmd_range - copy pmd from kernel to driver space ++*/ ++static inline int koi_copy_pmd_range(struct mm_struct *ko_mm, pud_t *dst_pud, ++ pud_t *src_pud, unsigned long addr, ++ unsigned long end) ++{ ++ pmd_t *src_pmd, *dst_pmd; ++ unsigned long next; ++ int err; ++ ++ dst_pmd = koi_pmd_alloc(ko_mm, dst_pud, addr); ++ if (!dst_pmd) { ++ return -ENOMEM; ++ } ++ src_pmd = pmd_offset(src_pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ // CONFIG_TRANSPARENT_HUGEPAGE is enabled, so we must add copy_huge_pmd ++ if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) || ++ (pmd_devmap(*src_pmd))) { ++ err = koi_copy_huge_pmd(ko_mm, dst_pmd, src_pmd, addr); ++ if (err == -ENOMEM) ++ return -ENOMEM; ++ if (!err) ++ continue; ++ } ++ if (pmd_none_or_clear_bad(src_pmd)) { ++ continue; ++ } ++ if (koi_copy_pte_range(ko_mm, dst_pmd, src_pmd, addr, next)) ++ return -ENOMEM; ++ } while (dst_pmd++, src_pmd++, addr = next, addr != end); ++ return 0; ++} ++ ++int __koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) ++{ ++ pud_t *new = pud_alloc_one(mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ spin_lock_bh(&mm->page_table_lock); ++ if (!p4d_present(*p4d)) { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(new)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)new); ++ #endif ++ mm_inc_nr_puds(mm); ++ p4d_populate(mm, p4d, new); ++ } else /* Another has populated it */ ++ pud_free(mm, new); ++ spin_unlock_bh(&mm->page_table_lock); ++ return 0; ++} ++ ++static inline pud_t *koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, ++ unsigned long address) ++{ ++ return (unlikely(p4d_none(*p4d)) && __koi_pud_alloc(mm, p4d, address)) ? ++ NULL : pud_offset(p4d, address); ++} ++ ++/** ++*koi_copy_pud_range - copy pud from kernel to driver ++*/ ++static inline int koi_copy_pud_range(struct mm_struct *ko_mm, p4d_t *dst_p4d, ++ p4d_t *src_p4d, unsigned long addr, ++ unsigned long end) ++{ ++ pud_t *src_pud, *dst_pud; ++ unsigned long next; ++ dst_pud = koi_pud_alloc(ko_mm, dst_p4d, addr); ++ if (!dst_pud) ++ return -ENOMEM; ++ src_pud = pud_offset(src_p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { ++ continue; ++ /* fall through */ ++ } ++ if (pud_none_or_clear_bad(src_pud)) ++ continue; ++ if (koi_copy_pmd_range(ko_mm, dst_pud, src_pud, addr, next)) ++ return -ENOMEM; ++ } while (dst_pud++, src_pud++, addr = next, addr != end); ++ return 0; ++} ++ ++/** ++* koi_copy_p4d_range - map the kernel pagetable to the driver space level by level ++* @ko_mm: the mm_struct of driver module ++* @dst_pgd: destination pgd ++* @src_pgd: source pgd ++* @addr: the start of address ++* @end: the end of address ++*/ ++static inline int koi_copy_p4d_range(struct mm_struct *ko_mm, pgd_t *dst_pgd, ++ pgd_t *src_pgd, unsigned long addr, ++ unsigned long end) ++{ ++ p4d_t *src_p4d, *dst_p4d; ++ unsigned long next; ++ dst_p4d = p4d_alloc(ko_mm, dst_pgd, addr); ++ if (!dst_p4d) ++ return -ENOMEM; ++ src_p4d = p4d_offset(src_pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ if (p4d_none_or_clear_bad(src_p4d)) ++ continue; ++ if (koi_copy_pud_range(ko_mm, dst_p4d, src_p4d, addr, next)) { ++ return -ENOMEM; ++ } ++ } while (dst_p4d++, src_p4d++, addr = next, addr != end); ++ return 0; ++} ++ ++/** ++*int koi_copy_pagetable - map the address range from "addr" to "end" to the driver pagetable ++*@ko_mm: the mm_struct of the driver module ++*@koi_pg_dir: koi_pg_dir, related to the driver module, the entry for driver pagetable ++*@addr: the starting address of mapping zone ++*@end: the end address of mapping zone ++*/ ++int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, ++ unsigned long addr, unsigned long end) ++{ ++ int ret = 0; ++ unsigned long next; ++ ++ pgd_t *src_pgd, *dst_pgd; ++ ++ src_pgd = pgd_offset_pgd(swapper_pg_dir, addr); ++ dst_pgd = pgd_offset_pgd(koi_pg_dir, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(src_pgd)) ++ continue; ++ if (unlikely(koi_copy_p4d_range(ko_mm, dst_pgd, src_pgd, addr, ++ next))) { ++ ret = -ENOMEM; ++ break; ++ } ++ } while (dst_pgd++, src_pgd++, addr = next, addr != end); ++ ++ return ret; ++} ++ ++void koi_set_rdonly(unsigned long addr, pgd_t *pgdir) ++{ ++ p4d_t *p4dp; ++ pud_t *pudp; ++ pmd_t *pmdp; ++ pte_t *ptep; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ if (pgd_none(*pgdp) || pgd_bad(*pgdp)) { ++ return; ++ } ++ ++ p4dp = p4d_offset(pgdp, addr); ++ if (p4d_none(*p4dp) || p4d_bad(*p4dp)) { ++ return; ++ } ++ ++ pudp = pud_offset(p4dp, addr); ++ if (pud_none(*pudp) || pud_bad(*pudp)) { ++ return; ++ } ++ pmdp = pmd_offset(pudp, addr); ++ if (pmd_none(*pmdp) || pmd_bad(*pmdp)) { ++ return; ++ } ++ ++ ptep = pte_offset_kernel(pmdp, addr); ++ if (pte_none(*ptep)) { ++ printk(KERN_ERR "ptep 0x%llx not available\n", ptep); ++ return; ++ } ++ set_pte(ptep, __pte(pte_val(*ptep) | PTE_RDONLY)); ++ printk(KERN_ERR "set_readonly successfully\n"); ++ return; ++} ++ ++/** ++* koi_create_pagetable - create pagetable for driver ++* @mod: driver module ++* 1.create a new koi_mem_hash_node new_node ++* 2.create page table return the pgd address, init the new_node->pgdp ++* 3.create and init the new_node->ko_mm ++* 4.map swapper_ttbr1 to the newly created pagetable ++* 5.map the interrupt vector table to the newly created pagetable ++* 6.map the init_layout of the module ++* 7.map the core_layout of the module ++* 8.map switch_to_kernel_pgtable into driver view ++* 9.map share memory ++*/ ++void koi_create_pagetable(struct module *mod) ++{ ++ int ret = 0, cpu; ++ unsigned long vbar, addr, ttbr1; ++ pgd_t *pgdp; ++ unsigned long *ptr; ++ struct koi_mem_list *new_mem_node; ++ struct koi_mem_hash_node *new_node = ++ kzalloc(sizeof(struct koi_mem_hash_node), GFP_KERNEL); ++ if (!new_node) { ++ printk(KERN_ERR "NULL new_node\n"); ++ return; ++ }; ++ if (koi_swapper_ttbr1 == 0) { ++ pgdp = lm_alias(swapper_pg_dir); ++ ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); ++ if (system_supports_cnp() && ++ !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) ++ ttbr1 |= TTBR_CNP_BIT; ++#ifdef CONFIG_IEE ++ ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, 1); ++#endif ++ koi_swapper_ttbr1 = ttbr1; ++ // __WRITE_ONCE(koi_swapper_ttbr1, ttbr1); ++ // koi_set_rdonly(&koi_swapper_ttbr1, swapper_pg_dir); ++ } ++ new_node->pgdp = koi_pgd_alloc(); ++ new_node->ko_mm = ++ kzalloc(sizeof(struct mm_struct) + ++ sizeof(unsigned long) * BITS_TO_LONGS(NR_CPUS), ++ GFP_KERNEL); ++ init_ko_mm(new_node->ko_mm, new_node->pgdp); ++ new_node->mod = mod; ++ koi_save_ttbr(mod, new_node->pgdp, new_node); ++ printk(KERN_ERR "copying koi_data, start=0x%16llx, end=0x%16llx\n", ++ (unsigned long)__koi_data_start, (unsigned long)__koi_data_end); ++ // copy koi_swapper_ttbr1, which records page dir base for kernel view ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)__koi_data_start, ++ (unsigned long)__koi_data_end); ++ asm volatile("mrs %0, VBAR_EL1\n" : "=r"(vbar) :); ++ ++ // copy interrupt vectors ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, vbar & PAGE_MASK, ++ (vbar + PAGE_SIZE) & PAGE_MASK); ++ ++ // copy module init_layout, which contains init data and text in driver ++ ret = koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)mod->init_layout.base, ++ (unsigned long)mod->init_layout.base + ++ mod->init_layout.size); ++ if (ret != 0) ++ printk(KERN_ERR ++ "\033[33mError occur when copying init_layout, Eno:%d\033[0m\n", ++ ret); ++ ++ // copy module core_layout, which contains non-init data and text in driver ++ ret = koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)mod->core_layout.base, ++ (unsigned long)mod->core_layout.base + ++ mod->core_layout.size); ++ if (ret != 0) ++ printk(KERN_ERR ++ "\033[33mError occur when copying core_layout, Eno: %d\033[0m\n", ++ ret); ++ ++ // mapping switch_to_kernel_pgtable into driver view, which is used to switch to kernel view when entering INT ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)__koi_code_start, ++ (unsigned long)__koi_code_end); ++ ++ for_each_possible_cpu (cpu) { ++ ptr = per_cpu(irq_stack_ptr, cpu); ++ printk(KERN_ERR ++ "\033[33mirq_stack_ptr on cpu %d addr=0x%16llx, end=0x%16llx\033[0m\n", ++ cpu, (unsigned long)ptr, ++ (unsigned long)ptr + IRQ_STACK_SIZE); ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)ptr, ++ (unsigned long)ptr + IRQ_STACK_SIZE); ++ } ++ ++ for_each_possible_cpu (cpu) { ++ ptr = per_cpu(koi_irq_current_ttbr1, cpu); ++ printk(KERN_ERR ++ "\033[33mirq_current_ptr on cpu %d addr=0x%16llx, end=0x%16llx\033[0m\n", ++ cpu, (unsigned long)ptr, (unsigned long)ptr + PAGE_SIZE); ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)ptr, ++ (unsigned long)ptr + PAGE_SIZE); ++ } ++ ++#ifdef CONFIG_IEE ++ // mapping iee_rwx_gate_entry and iee_si_base to ko's pagetable ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, ++ (unsigned long)__iee_si_base_start, ++ (unsigned long)__iee_exec_entry_end); ++#endif ++ ++ // alloc 16KB memory for new ko, and add it into hashtable ++ addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); ++ if ((void *)addr == NULL) { ++ printk(KERN_ERR "alloc buffer error\n"); ++ } ++ koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, ++ addr + THREAD_SIZE); ++ ++ new_mem_node = kmalloc(sizeof(struct koi_mem_list), GFP_KERNEL); ++ if (new_mem_node == NULL) { ++ printk(KERN_ERR "alloc new_mem_node error\n"); ++ } ++ new_mem_node->addr = addr; ++ new_mem_node->size = THREAD_SIZE; ++ ++ new_node->mem_list_head = ++ (struct list_head)LIST_HEAD_INIT(new_node->mem_list_head); ++ hash_init(new_node->addr_htbl); ++ spin_lock_init(&new_node->addr_htbl_spin_lock); ++ spin_lock_init(&new_node->spin_lock); ++ ++ spin_lock(&new_node->spin_lock); ++ list_add_rcu(&new_mem_node->list, &new_node->mem_list_head); ++ spin_unlock(&new_node->spin_lock); ++ ++ spin_lock(&koi_mem_htbl_spin_lock); ++ hash_add_rcu(koi_mem_htbl, &new_node->node, ++ (unsigned long)new_node->mod); ++ spin_unlock(&koi_mem_htbl_spin_lock); ++} ++/** ++* koi_mem_alloc ++*@mod: driver module ++*@orig_addr: the starting address of the parameter in kernel ++*@size: the size of the parameter ++*/ ++unsigned long koi_mem_alloc(struct module *mod, unsigned long orig_addr, ++ unsigned long size) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ struct koi_mem_list *mem_node; ++ struct koi_addr_map *new_addr_node; ++ unsigned long addr = 0, flags; ++ struct koi_mem_list *new_mem_node; ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk(KERN_ERR "mem node for module: %s not found\n", ++ mod->name); ++ return 0; ++ } ++ spin_lock_irqsave(&target->spin_lock, flags); ++ list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { ++ if (mem_node->size >= size) { ++ addr = mem_node->addr; ++ mem_node->size -= size; ++ if (mem_node->size == 0) { ++ list_del_rcu(&mem_node->list); ++ } else { ++ new_mem_node = ++ kmalloc(sizeof(struct koi_mem_list), ++ GFP_ATOMIC); ++ new_mem_node->addr = addr + size; ++ new_mem_node->size = mem_node->size; ++ list_replace_rcu(&mem_node->list, ++ &new_mem_node->list); ++ } ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } ++ } ++ spin_unlock_irqrestore(&target->spin_lock, flags); ++ if (!addr) { ++ addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); ++ if ((void *)addr == NULL) { ++ return 0; ++ } ++ koi_copy_pagetable(target->ko_mm, target->pgdp, addr, ++ addr + THREAD_SIZE); ++ mem_node = kmalloc(sizeof(struct koi_mem_list), GFP_KERNEL); ++ if (!mem_node) { ++ printk(KERN_ERR "NULL mem_node\n"); ++ } ++ if (size > THREAD_SIZE) { ++ return 0; ++ } ++ mem_node->addr = addr + size; ++ mem_node->size = THREAD_SIZE - size; ++ spin_lock_irqsave(&target->spin_lock, flags); ++ list_add_tail_rcu(&mem_node->list, &target->mem_list_head); ++ spin_unlock_irqrestore(&target->spin_lock, flags); ++ } ++ ++ new_addr_node = kzalloc(sizeof(struct koi_addr_map), GFP_KERNEL); ++ new_addr_node->buffer_addr = addr; ++ new_addr_node->orig_addr = orig_addr; ++ spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); ++ hash_add_rcu(target->addr_htbl, &new_addr_node->node, ++ new_addr_node->buffer_addr); ++ spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); ++ return addr; ++} ++EXPORT_SYMBOL(koi_mem_alloc); ++// find the parameter pointer corresponding to the copy ++noinline void *koi_mem_lookup(struct module *mod, unsigned long addr) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ struct koi_addr_map *addr_map_node; ++ unsigned long orig_addr = addr; ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk(KERN_ERR "mem node for module: %s not found\n", ++ mod->name); ++ return NULL; ++ } ++ ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, ++ orig_addr) { ++ if (addr_map_node->buffer_addr == addr) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (addr_map_node) { ++ return (void *)(addr_map_node->orig_addr); ++ } else { ++ return NULL; ++ } ++} ++EXPORT_SYMBOL(koi_mem_lookup); ++/** ++* kio_mem_free - recycle a copy of the copied parameters and synchronize the parameters ++* @mod: driver module ++* @addr: the starting addr of parameter ++* @size: the size of the parameter ++* @is_const: const pointers or not ++* @count: contry the number of parameters ++*/ ++noinline void koi_mem_free(struct module *mod, unsigned long addr, ++ unsigned long size, bool is_const, int count, ...) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ struct koi_mem_list *mem_node; ++ struct list_head *pos = NULL; ++ struct koi_addr_map *addr_map_node; ++ unsigned long orig_size = size; ++ unsigned long orig_addr = addr; ++ va_list valist; ++ int i; ++ unsigned int offset; ++ unsigned long flags; ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk(KERN_ERR "mem node for module: %s not found\n", ++ mod->name); ++ return; ++ } ++ ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, ++ orig_addr) { ++ if (addr_map_node->buffer_addr == orig_addr) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ va_start(valist, count); ++ for (i = 0; i < count; i++) { ++ offset = va_arg(valist, int); ++ *(unsigned long *)(addr_map_node->buffer_addr + offset) = ++ *(unsigned long *)(addr_map_node->orig_addr + offset); ++ } ++ va_end(valist); ++ memcpy((void *)addr_map_node->orig_addr, ++ (void *)addr_map_node->buffer_addr, orig_size); ++ ++ spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); ++ hlist_del_init_rcu(&addr_map_node->node); ++ call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); ++ spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); ++ ++ spin_lock_irqsave(&target->spin_lock, flags); ++ list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { ++ if (mem_node->addr + mem_node->size == addr) { ++ pos = mem_node->list.prev; ++ addr = mem_node->addr; ++ size += mem_node->size; ++ list_del_rcu(&mem_node->list); ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } else if (addr + size == mem_node->addr) { ++ if (!pos) ++ pos = mem_node->list.prev; ++ size += mem_node->size; ++ list_del_rcu(&mem_node->list); ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } else if (addr + size < mem_node->addr) { ++ if (!pos) ++ pos = mem_node->list.prev; ++ break; ++ } ++ } ++ mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); ++ mem_node->addr = addr; ++ mem_node->size = size; ++ if (pos) ++ list_add_rcu(&mem_node->list, pos); ++ else ++ list_add_tail_rcu(&mem_node->list, &target->mem_list_head); ++ spin_unlock_irqrestore(&target->spin_lock, flags); ++} ++EXPORT_SYMBOL(koi_mem_free); ++/** ++* koi_mem_free_callback - used to recycle the copy of parameter. ++*@addr: the address of the parameter ++*@(*func)(void*): callback func, used to release the copy of the parameter pointer ++*/ ++noinline void koi_mem_free_callback(struct module *mod, unsigned long addr, ++ unsigned long size, void (*func)(void *)) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ struct koi_mem_list *mem_node; ++ struct list_head *pos = NULL; ++ struct koi_addr_map *addr_map_node; ++ unsigned long flags; ++ unsigned long orig_size = size; ++ unsigned long orig_addr = addr; ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk("mem node for module: %s not found\n", mod->name); ++ return; ++ } ++ ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, ++ orig_addr) { ++ if (addr_map_node->buffer_addr == orig_addr) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (addr_map_node != NULL) { ++ memcpy((void *)addr_map_node->orig_addr, ++ (void *)addr_map_node->buffer_addr, orig_size); ++ func((void *)addr_map_node->orig_addr); ++ } else { ++ printk("Cannot find addr_map_node in addr_htbl, maybe addr is in kernel space!!\n"); ++ func((void *)orig_addr); ++ } ++ ++ spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); ++ if (addr_map_node != NULL) { ++ hlist_del_init_rcu(&addr_map_node->node); ++ call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); ++ } ++ spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); ++ spin_lock_irqsave(&target->spin_lock, flags); ++ list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { ++ if (mem_node->addr + mem_node->size == addr) { ++ pos = mem_node->list.prev; ++ addr = mem_node->addr; ++ size += mem_node->size; ++ list_del_rcu(&mem_node->list); ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } else if (addr + size == mem_node->addr) { ++ if (!pos) ++ pos = mem_node->list.prev; ++ size += mem_node->size; ++ list_del_rcu(&mem_node->list); ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } else if (addr + size < mem_node->addr) { ++ if (!pos) ++ pos = mem_node->list.prev; ++ break; ++ } ++ } ++ mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); ++ mem_node->addr = addr; ++ mem_node->size = size; ++ if (pos) ++ list_add_rcu(&mem_node->list, pos); ++ else ++ list_add_tail_rcu(&mem_node->list, &target->mem_list_head); ++ spin_unlock_irqrestore(&target->spin_lock, flags); ++} ++EXPORT_SYMBOL(koi_mem_free_callback); ++ ++void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) ++ break; ++ } ++ rcu_read_unlock(); ++ ++ if (target == NULL) { ++ printk(KERN_ERR "mem node for module: %s not found\n", ++ mod->name); ++ return; ++ } ++ koi_copy_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, ++ (addr + size + PAGE_SIZE) & PAGE_MASK); ++} ++EXPORT_SYMBOL(koi_map_mem); ++/** ++* koi_mem_free_to_user - function 'copy_to_user' in driver space ++*/ ++void koi_mem_free_to_user(struct module *mod, unsigned long addr, ++ unsigned long size) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ struct koi_mem_list *mem_node; ++ struct list_head *pos = NULL; ++ struct koi_addr_map *addr_map_node; ++ unsigned long flags; ++ unsigned long orig_size = size; ++ unsigned long orig_addr = addr; ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk(KERN_ERR "mem node for module: %s not found\n", ++ mod->name); ++ return; ++ } ++ ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, ++ orig_addr) { ++ if (addr_map_node->buffer_addr == orig_addr) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (copy_to_user((void *)addr_map_node->orig_addr, ++ (void *)addr_map_node->buffer_addr, orig_size)) { ++ return; ++ } ++ ++ spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); ++ hlist_del_init_rcu(&addr_map_node->node); ++ call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); ++ spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); ++ spin_lock_irqsave(&target->spin_lock, flags); ++ list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { ++ if (mem_node->addr + mem_node->size == addr) { ++ pos = mem_node->list.prev; ++ addr = mem_node->addr; ++ size += mem_node->size; ++ list_del_rcu(&mem_node->list); ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } else if (addr + size == mem_node->addr) { ++ if (!pos) ++ pos = mem_node->list.prev; ++ size += mem_node->size; ++ list_del_rcu(&mem_node->list); ++ call_rcu(&mem_node->rcu, koi_mem_node_free); ++ } else if (addr + size < mem_node->addr) { ++ if (!pos) ++ pos = mem_node->list.prev; ++ break; ++ } ++ } ++ mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); ++ mem_node->addr = addr; ++ mem_node->size = size; ++ if (pos) ++ list_add_rcu(&mem_node->list, pos); ++ else ++ list_add_tail_rcu(&mem_node->list, &target->mem_list_head); ++ spin_unlock_irqrestore(&target->spin_lock, flags); ++} ++EXPORT_SYMBOL(koi_mem_free_to_user); ++// map the driver stack to kernel ++void koi_map_kostack(struct module *mod) ++{ ++ struct koi_mem_hash_node *target = NULL; ++ void *koi_stack; ++ unsigned long cur_sp; ++ asm volatile("mov %0, sp\n" : "=r"(cur_sp) :); ++ if (on_irq_stack(cur_sp, NULL)) { ++ return; ++ } ++#ifndef CONFIG_IEE ++ unsigned long res, alloc_token; ++ struct task_token *token_addr = ++ (struct task_token *)((unsigned long)current + ++ (unsigned long)koi_offset); ++ if (token_addr->koi_stack_base != NULL) ++ return; ++#else ++ koi_stack = iee_rw_gate(IEE_READ_KOI_STACK, current); ++ if (koi_stack != NULL) ++ return; ++#endif ++ koi_stack = ++ (void *)__get_free_pages(THREADINFO_GFP & ~__GFP_ACCOUNT, 3); ++ free_pages(koi_stack + 4 * PAGE_SIZE, 2); ++ printk(KERN_ERR "alloc dstack start=0x%16llx, end=0x%16llx\n", ++ koi_stack, koi_stack + THREAD_SIZE); ++#ifndef CONFIG_IEE ++ token_addr->koi_stack = ++ (struct pt_regs *)(THREAD_SIZE + (unsigned long)koi_stack) - 1; ++ token_addr->koi_stack_base = koi_stack; ++#else ++ iee_rw_gate( ++ IEE_WRITE_KOI_STACK, current, ++ (unsigned long)((struct pt_regs *)(THREAD_SIZE + ++ (unsigned long)koi_stack) - ++ 1)); ++ iee_rw_gate(IEE_WRITE_KOI_STACK_BASE, current, ++ (unsigned long)koi_stack); ++#endif ++ rcu_read_lock(); ++ hash_for_each_possible_rcu (koi_mem_htbl, target, node, ++ (unsigned long)mod) { ++ if (target->mod == mod) { ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ if (target == NULL) { ++ printk(KERN_ERR "mem node for module: %s not found\n", ++ mod->name); ++ return; ++ } ++ koi_copy_pagetable(target->ko_mm, target->pgdp, ++ (unsigned long)koi_stack, ++ (unsigned long)koi_stack + THREAD_SIZE); ++ printk(KERN_ERR "create ko stack: 0x%16llx\n", ++ (unsigned long)koi_stack); ++} ++EXPORT_SYMBOL(koi_map_kostack); +\ No newline at end of file +diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c +index cea96ee75d22..cbddc8e464e4 100644 +--- a/arch/arm64/kernel/mte.c ++++ b/arch/arm64/kernel/mte.c +@@ -79,8 +79,13 @@ int memcmp_pages(struct page *page1, struct page *page2) + static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) + { + /* Enable MTE Sync Mode for EL1. */ ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_TCF_MASK, ++ SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); ++#else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); ++#endif + isb(); + + pr_info_once("MTE: enabled in %s mode at EL1\n", mode); +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index 068e5bb2661b..c98bc1a24fc7 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -467,11 +467,24 @@ static void ssbs_thread_switch(struct task_struct *next) + * This is *only* for exception entry from EL0, and is not valid until we + * __switch_to() a user task. + */ ++#ifdef CONFIG_IEE ++// Put __entry_task in a isolated page to protect it. ++__attribute__((aligned(PAGE_SIZE))) DEFINE_PER_CPU(struct task_struct *[PAGE_SIZE/sizeof(struct task_struct *)], __entry_task); ++extern void iee_write_entry_task(struct task_struct *tsk); ++#else + DEFINE_PER_CPU(struct task_struct *, __entry_task); ++#endif + + static void entry_task_switch(struct task_struct *next) + { ++ #ifdef CONFIG_IEE ++ if(next == &init_task) ++ iee_write_entry_task((struct task_struct *)__va(__pa_symbol(next))); ++ else ++ iee_write_entry_task(next); ++ #else + __this_cpu_write(__entry_task, next); ++ #endif + } + + /* +@@ -506,11 +519,15 @@ static void erratum_1418040_new_exec(void) + */ + void update_sctlr_el1(u64 sctlr) + { +- /* ++ /* + * EnIA must not be cleared while in the kernel as this is necessary for + * in-kernel PAC. It will be cleared on kernel exit if needed. + */ ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); ++ #else + sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); ++ #endif + + /* ISB required for the kernel uaccess routines when setting TCF0. */ + isb(); +diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c +index 58a97861bfc5..c7839247327d 100644 +--- a/arch/arm64/kernel/proton-pack.c ++++ b/arch/arm64/kernel/proton-pack.c +@@ -551,7 +551,11 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) + return state; + + if (spectre_v4_mitigations_off()) { ++#ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_ELx_DSSBS); ++#else + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); ++#endif + set_pstate_ssbs(1); + return SPECTRE_VULNERABLE; + } +@@ -975,7 +979,11 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) + if (arm64_kernel_unmapped_at_el0()) + return; + ++#ifdef CONFIG_IEE ++ iee_rwx_gate_entry(IEE_WRITE_vbar_el1, v); ++#else + write_sysreg(v, vbar_el1); ++#endif + isb(); + } + +diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c +index 95cb22c083c8..df73a583a733 100644 +--- a/arch/arm64/kernel/setup.c ++++ b/arch/arm64/kernel/setup.c +@@ -33,6 +33,11 @@ + #include <linux/scs.h> + #include <linux/mm.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#include <asm/iee-si.h> ++#endif ++ + #include <asm/acpi.h> + #include <asm/fixmap.h> + #include <asm/cpu.h> +@@ -335,10 +340,30 @@ u64 cpu_logical_map(unsigned int cpu) + return __cpu_logical_map[cpu]; + } + ++#ifdef CONFIG_IEE ++/* used for secure modification of vbar*/ ++extern char __bp_harden_el1_vectors[]; ++/* prepare iee rwx gate for senario of ttbr1=init_pg_dir */ ++static void __init iee_si_init_early(void) ++{ ++ /* prepare data used for iee rwx gate. */ ++ iee_base_idmap_pg_dir = phys_to_ttbr(__pa_symbol(idmap_pg_dir)); ++ iee_base_reserved_pg_dir = phys_to_ttbr(__pa_symbol(reserved_pg_dir)) ++ | FIELD_PREP(TTBR_ASID_MASK, 1); ++ iee_base__bp_harden_el1_vectors = (unsigned long)__bp_harden_el1_vectors; ++ iee_si_tcr = 0; ++} ++#endif ++ + void __init __no_sanitize_address setup_arch(char **cmdline_p) + { + setup_initial_init_mm(_stext, _etext, _edata, _end); + ++ #ifdef CONFIG_IEE ++ init_new_context(&init_task, &init_mm); ++ atomic64_set(&init_mm.context.id, (1UL << get_cpu_asid_bits()) | INIT_ASID); ++ #endif ++ + *cmdline_p = boot_command_line; + + kaslr_init(); +@@ -371,6 +396,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + ++#ifdef CONFIG_IEE ++ /* ++ * Map iee si codes to init_pg_dir to run the following ++ * cpu_uninstall_idmap() which writes ttbr0. ++ */ ++ iee_si_init_early(); ++#endif ++ + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. +diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c +index b7b7afb4a8c7..168a9390d6e9 100644 +--- a/arch/arm64/kernel/traps.c ++++ b/arch/arm64/kernel/traps.c +@@ -902,6 +902,32 @@ const char *esr_get_class_string(unsigned long esr) + return esr_class_str[ESR_ELx_EC(esr)]; + } + ++#ifdef CONFIG_IEE ++extern void arm64_enter_nmi(struct pt_regs *regs); ++static const char *handler[]= { ++ "SP_EL0", ++ "ELR_EL1", ++ "TCR_EL1", ++ "TTBR0 ASID" ++ "IEE_SI" ++}; ++ ++asmlinkage void notrace iee_bad_mode(struct pt_regs *regs, int reason, unsigned int esr) ++{ ++ arm64_enter_nmi(regs); ++ ++ console_verbose(); ++ ++ pr_crit("IEE : Bad mode in %s check detected on CPU%d, code 0x%08x -- %s\n", ++ handler[reason], smp_processor_id(), esr, ++ esr_get_class_string(esr)); ++ ++ __show_regs(regs); ++ local_daif_mask(); ++ panic("bad mode"); ++} ++#endif ++ + /* + * bad_el0_sync handles unexpected, but potentially recoverable synchronous + * exceptions taken from EL0. +diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S +index 3cd7e76cc562..287eaba7a15b 100644 +--- a/arch/arm64/kernel/vmlinux.lds.S ++++ b/arch/arm64/kernel/vmlinux.lds.S +@@ -134,6 +134,52 @@ jiffies = jiffies_64; + #define UNWIND_DATA_SECTIONS + #endif + ++#ifdef CONFIG_IEE ++#define IEE_TEXT \ ++ . = ALIGN(PAGE_SIZE); \ ++ __iee_code_start = .; \ ++ *(.iee.text.header) \ ++ *(.iee.text) \ ++ . = ALIGN(PAGE_SIZE); \ ++ __iee_code_end = .; ++#else ++#define IEE_TEXT ++#endif ++ ++#ifdef CONFIG_IEE ++#define IEE_SI_TEXT \ ++ . = ALIGN(PAGE_SIZE); \ ++ __iee_si_data_start = .; \ ++ *(.iee.si_data) \ ++ . = ALIGN(PAGE_SIZE); \ ++ __iee_exec_entry_start = .; \ ++ __iee_si_no_irq = . + (16); \ ++ *(.iee.exec_entry) \ ++ . = ALIGN(PAGE_SIZE); \ ++ __iee_si_start = .; \ ++ *(.iee.si_text) \ ++ . = ALIGN(PAGE_SIZE); \ ++ . += PAGE_SIZE - (24); \ ++ __iee_si_end = . + (24); \ ++ __iee_exec_exit = .; \ ++ *(.iee.exec_exit) \ ++ . = ALIGN(PAGE_SIZE); ++ ++#else ++#define IEE_SI_TEXT ++#endif ++ ++#ifdef CONFIG_KOI ++#define KOI_TEXT \ ++ . = ALIGN(PAGE_SIZE); \ ++ __koi_code_start = .; \ ++ *(.koi.text) \ ++ . = ALIGN(PAGE_SIZE); \ ++ __koi_code_end = .; ++#else ++#define KOI_TEXT ++#endif ++ + /* + * The size of the PE/COFF section that covers the kernel image, which + * runs from _stext to _edata, must be a round multiple of the PE/COFF +@@ -176,10 +222,13 @@ SECTIONS + SOFTIRQENTRY_TEXT + ENTRY_TEXT + TEXT_TEXT ++ IEE_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + HYPERVISOR_TEXT ++ IEE_SI_TEXT ++ KOI_TEXT + *(.gnu.warning) + } + +@@ -318,6 +367,18 @@ SECTIONS + . += INIT_DIR_SIZE; + init_pg_end = .; + ++ #ifdef CONFIG_IEE ++ . = ALIGN(PAGE_SIZE*8); ++ init_iee_stack_begin = .; ++ . += PAGE_SIZE*4; ++ init_iee_stack_end = .; ++ ++ . = ALIGN(PAGE_SIZE); ++ init_iee_si_stack_begin = .; ++ . += PAGE_SIZE*4; ++ init_iee_si_stack_end = .; ++ #endif ++ + . = ALIGN(SEGMENT_ALIGN); + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); + _end = .; +diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c +index 188197590fc9..97e1f86046cf 100644 +--- a/arch/arm64/mm/context.c ++++ b/arch/arm64/mm/context.c +@@ -17,6 +17,10 @@ + #include <asm/smp.h> + #include <asm/tlbflush.h> + ++#ifdef CONFIG_IEE ++#include <asm/iee-si.h> ++#endif ++ + static u32 asid_bits; + static DEFINE_RAW_SPINLOCK(cpu_asid_lock); + +@@ -39,7 +43,11 @@ static unsigned long *pinned_asid_map; + #define asid2ctxid(asid, genid) ((asid) | (genid)) + + /* Get the ASIDBits supported by the current CPU */ ++#ifdef CONFIG_IEE ++u32 get_cpu_asid_bits(void) ++#else + static u32 get_cpu_asid_bits(void) ++#endif + { + u32 asid; + int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1), +@@ -212,6 +220,38 @@ static u64 new_context(struct mm_struct *mm) + return asid2ctxid(asid, generation); + } + ++#ifdef CONFIG_KOI ++/* ++ * This function is used to check and allocate ASID for ko's pgd ++ * The mm MUST point to the isolated kos' mm_struct, other behaviours are undefined. ++ */ ++void koi_check_and_switch_context(struct mm_struct *mm) { ++ u64 asid = atomic64_read(&mm->context.id); ++ u64 old_active_asid; ++ unsigned long flags; ++ unsigned int cpu; ++ ++ old_active_asid = atomic64_read(this_cpu_ptr(&active_asids)); ++ if (old_active_asid && asid_gen_match(asid) && atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids), old_active_asid, asid)) { ++ return; ++ } ++ ++ raw_spin_lock_irqsave(&cpu_asid_lock, flags); ++ asid = atomic64_read(&mm->context.id); ++ if (!asid_gen_match(asid)) { ++ asid = new_context(mm); ++ atomic64_set(&mm->context.id, asid); ++ } ++ ++ cpu = smp_processor_id(); ++ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) ++ local_flush_tlb_all(); ++ ++ atomic64_set(this_cpu_ptr(&active_asids), asid); ++ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); ++} ++#endif ++ + void check_and_switch_context(struct mm_struct *mm) + { + unsigned long flags; +@@ -348,7 +388,9 @@ asmlinkage void post_ttbr_update_workaround(void) + + void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) + { ++ #ifndef CONFIG_IEE + unsigned long ttbr1 = read_sysreg(ttbr1_el1); ++ #endif + unsigned long asid = ASID(mm); + unsigned long ttbr0 = phys_to_ttbr(pgd_phys); + +@@ -360,14 +402,28 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) + if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN)) + ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid); + +- /* Set ASID in TTBR1 since TCR.A1 is set */ ++ #ifdef CONFIG_IEE ++ ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid+1); ++ iee_rwx_gate_entry(IEE_CONTEXT_SWITCH, ttbr0); ++ // TODO : if defined CONFIG_IEE and defined CONFIG_KOI ++ #else ++ /* Set ASID in TTBR0 since TCR.A1 is set 0*/ ++ ++ #ifdef CONFIG_KOI ++ ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid+1); ++ ttbr1 &= ~TTBR_ASID_MASK; ++ ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); ++ #else + ttbr1 &= ~TTBR_ASID_MASK; + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); +- ++ ++ #endif + cpu_set_reserved_ttbr0_nosync(); + write_sysreg(ttbr1, ttbr1_el1); + write_sysreg(ttbr0, ttbr0_el1); + isb(); ++ #endif ++ + post_ttbr_update_workaround(); + } + +@@ -375,11 +431,21 @@ static int asids_update_limit(void) + { + unsigned long num_available_asids = NUM_USER_ASIDS; + +- if (arm64_kernel_unmapped_at_el0()) { +- num_available_asids /= 2; +- if (pinned_asid_map) +- set_kpti_asid_bits(pinned_asid_map); +- } ++ #if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ num_available_asids /= 2; ++ if (pinned_asid_map) { ++ unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); ++ memset(pinned_asid_map, 0xaa, len); ++ __set_bit(INIT_ASID, pinned_asid_map); ++ } ++ #else ++ if (arm64_kernel_unmapped_at_el0()) { ++ num_available_asids /= 2; ++ if (pinned_asid_map) ++ set_kpti_asid_bits(pinned_asid_map); ++ } ++ #endif ++ + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is reserved for init_mm. +@@ -400,6 +466,10 @@ arch_initcall(asids_update_limit); + + static int asids_init(void) + { ++ #if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ unsigned int len; ++ #endif ++ + asid_bits = get_cpu_asid_bits(); + atomic64_set(&asid_generation, ASID_FIRST_VERSION); + asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); +@@ -410,6 +480,11 @@ static int asids_init(void) + pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); + nr_pinned_asids = 0; + ++ #if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); ++ memset(asid_map, 0xaa, len); ++ __set_bit(INIT_ASID, asid_map); ++ #else + /* + * We cannot call set_reserved_asid_bits() here because CPU + * caps are not finalized yet, so it is safer to assume KPTI +@@ -417,6 +492,8 @@ static int asids_init(void) + */ + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + set_kpti_asid_bits(asid_map); ++ #endif ++ + return 0; + } + early_initcall(asids_init); +diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c +index 4ea07caba71c..de1d57c2b30f 100644 +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -261,7 +261,11 @@ int __ptep_set_access_flags(struct vm_area_struct *vma, + pteval ^= PTE_RDONLY; + pteval |= pte_val(entry); + pteval ^= PTE_RDONLY; ++ #ifdef CONFIG_PTP ++ pteval = iee_set_cmpxchg_relaxed(ptep, old_pteval, pteval); ++ #else + pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); ++ #endif + } while (pteval != old_pteval); + + /* Invalidate a stale read-only entry */ +@@ -376,8 +380,13 @@ static void do_tag_recovery(unsigned long addr, unsigned long esr, + * It will be done lazily on the other CPUs when they will hit a + * tag fault. + */ ++ #ifdef CONFIG_IEE ++ sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_TCF_MASK, ++ SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); ++ #else + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); ++ #endif + isb(); + } + +diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c +index bfc02568805a..580ecb596d2d 100644 +--- a/arch/arm64/mm/fixmap.c ++++ b/arch/arm64/mm/fixmap.c +@@ -32,6 +32,22 @@ static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; + static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; + static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; + ++#ifdef CONFIG_IEE ++void *bm_pte_addr = (void *)bm_pte; ++void *bm_pmd_addr = (void *)bm_pmd; ++void *bm_pud_addr = (void *)bm_pud; ++#endif ++ ++#ifdef CONFIG_PTP ++extern void __iee_p4d_populate_pre_init(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot); ++extern void __iee_pud_populate_pre_init(pud_t *pudp, phys_addr_t pmdp, pudval_t prot); ++extern void __iee_pmd_populate_pre_init(pmd_t *pmdp, phys_addr_t ptep, ++ pmdval_t prot); ++ ++extern void iee_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d); ++#define set_pgd_init(pgdptr, pgdval) iee_set_p4d_pre_init((p4d_t *)(pgdptr), (p4d_t) { pgdval }) ++#endif ++ + static inline pte_t *fixmap_pte(unsigned long addr) + { + return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)]; +@@ -44,7 +60,11 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) + + if (pmd_none(pmd)) { + ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; ++ #ifdef CONFIG_PTP ++ __iee_pmd_populate_pre_init(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); ++ #else + __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); ++ #endif + } + } + +@@ -55,8 +75,13 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + +- if (pud_none(pud)) ++ if (pud_none(pud)) { ++ #ifdef CONFIG_PTP ++ __iee_pud_populate_pre_init(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); ++ #else + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); ++ #endif ++ } + + pmdp = pmd_offset_kimg(pudp, addr); + do { +@@ -82,8 +107,13 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + } + +- if (p4d_none(p4d)) ++ if (p4d_none(p4d)) { ++ #ifdef CONFIG_PTP ++ __iee_p4d_populate_pre_init(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); ++ #else + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); ++ #endif ++ } + + pudp = pud_offset_kimg(p4dp, addr); + early_fixmap_init_pmd(pudp, addr, end); +@@ -106,6 +136,27 @@ void __init early_fixmap_init(void) + early_fixmap_init_pud(p4dp, addr, end); + } + ++#ifdef CONFIG_PTP ++extern void iee_set_pte_pre_init(pte_t *ptep, pte_t pte); ++void __iee_set_fixmap_pre_init(enum fixed_addresses idx, ++ phys_addr_t phys, pgprot_t flags) ++{ ++ unsigned long addr = __fix_to_virt(idx); ++ pte_t *ptep; ++ ++ BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); ++ ++ ptep = fixmap_pte(addr); ++ ++ if (pgprot_val(flags)) { ++ iee_set_pte_pre_init(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); ++ } else { ++ iee_set_pte_pre_init(ptep, __pte(0)); ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ } ++} ++#endif ++ + /* + * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we + * ever need to use IPIs for TLB broadcasting, then we're in trouble here. +@@ -121,9 +172,17 @@ void __set_fixmap(enum fixed_addresses idx, + ptep = fixmap_pte(addr); + + if (pgprot_val(flags)) { ++ #ifdef CONFIG_PTP ++ iee_set_bm_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); ++ #else + __set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); ++ #endif + } else { ++ #ifdef CONFIG_PTP ++ iee_set_bm_pte(ptep, __pte(0)); ++ #else + __pte_clear(&init_mm, addr, ptep); ++ #endif + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } + } +@@ -179,8 +238,13 @@ void __init fixmap_copy(pgd_t *pgdir) + * live in the carveout for the swapper_pg_dir. We can simply + * re-use the existing dir for the fixmap. + */ ++ #ifdef CONFIG_PTP ++ set_pgd_init(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), ++ READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); ++ #else + set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), + READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); ++ #endif + } else if (CONFIG_PGTABLE_LEVELS > 3) { + pgd_t *bm_pgdp; + p4d_t *bm_p4dp; +@@ -194,9 +258,15 @@ void __init fixmap_copy(pgd_t *pgdir) + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START); + bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START); ++ #ifdef CONFIG_PTP ++ bm_pudp = pud_set_fixmap_offset_init(bm_p4dp, FIXADDR_TOT_START); ++ __iee_pud_populate_pre_init(bm_pudp, __pa(lm_alias(bm_pmd)), PMD_TYPE_TABLE); ++ pud_clear_fixmap_init(); ++ #else + bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START); + pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); + pud_clear_fixmap(); ++ #endif + } else { + BUG(); + } +diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c +index 8c8d7653ba84..4190f5c10d68 100644 +--- a/arch/arm64/mm/init.c ++++ b/arch/arm64/mm/init.c +@@ -55,8 +55,19 @@ + * that cannot be mistaken for a real physical address. + */ + s64 memstart_addr __ro_after_init = -1; ++#if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++s64 memstart_addr_init __ro_after_init = -1; ++#endif ++#ifdef CONFIG_KOI ++s64 koi_offset __ro_after_init = -1; ++EXPORT_SYMBOL(koi_offset); ++#endif ++#ifdef CONFIG_IEE ++s64 iee_offset __ro_after_init = -1; ++#endif + EXPORT_SYMBOL(memstart_addr); + ++ + /* + * If the corresponding config options are enabled, we create both ZONE_DMA + * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory +@@ -421,7 +432,11 @@ early_param("memmap", parse_memmap_opt); + + void __init arm64_memblock_init(void) + { ++ #if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ s64 linear_region_size = BIT(vabits_actual - 2); ++ #else + s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); ++ #endif + + /* + * Corner case: 52-bit VA capable systems running KVM in nVHE mode may +@@ -438,13 +453,24 @@ void __init arm64_memblock_init(void) + } + + /* Remove memory above our supported physical address size */ ++ #ifdef CONFIG_IEE ++ // If config iee, phys size can not be above 0x400000000000 ++ if(__pa_symbol(_end) > BIT_ULL(vabits_actual - 2)) ++ panic("Image on too high phys mem.\n"); ++ else ++ memblock_remove(BIT_ULL(vabits_actual - 2), ULLONG_MAX); ++ #else + memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); ++ #endif + + /* + * Select a suitable value for the base of physical memory. + */ + memstart_addr = round_down(memblock_start_of_DRAM(), + ARM64_MEMSTART_ALIGN); ++ #if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++ memstart_addr_init = memstart_addr; ++ #endif + + if ((memblock_end_of_DRAM() - memstart_addr) > linear_region_size) + pr_warn("Memory doesn't fit in the linear mapping, VA_BITS too small\n"); +@@ -531,6 +557,14 @@ void __init arm64_memblock_init(void) + ((range * memstart_offset_seed) >> 16); + } + } ++ ++ #ifdef CONFIG_KOI ++ koi_offset = memstart_addr - memstart_addr_init + KOI_OFFSET; ++ #endif ++ #ifdef CONFIG_IEE ++ iee_offset = memstart_addr - memstart_addr_init + ((unsigned long)BIT(vabits_actual - 2)); ++ #endif ++ //printk(KERN_ERR "koi_offset: 0x%16llx\n", koi_offset); + + /* + * Register the kernel text, kernel data, initrd, and initial +diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c +index 4142a75a414e..094f3798441d 100644 +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -6,6 +6,7 @@ + * Copyright (C) 2012 ARM Ltd. + */ + ++#include "asm/pgtable.h" + #include <linux/cache.h> + #include <linux/export.h> + #include <linux/kernel.h> +@@ -40,6 +41,11 @@ + #include <asm/tlbflush.h> + #include <asm/pgalloc.h> + #include <asm/kfence.h> ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#include <asm/iee.h> ++#include <asm/iee-si.h> ++#endif + + #define NO_BLOCK_MAPPINGS BIT(0) + #define NO_CONT_MAPPINGS BIT(1) +@@ -76,8 +82,282 @@ EXPORT_SYMBOL(empty_zero_page); + static DEFINE_SPINLOCK(swapper_pgdir_lock); + static DEFINE_MUTEX(fixmap_lock); + ++#ifdef CONFIG_IEE ++extern struct cred init_cred; ++ ++extern unsigned long init_iee_stack_begin[]; ++extern unsigned long init_iee_stack_end[]; ++extern unsigned long __iee_si_data_start[]; ++extern unsigned long __iee_exec_entry_start[]; ++extern unsigned long __iee_si_start[]; ++extern unsigned long __iee_si_end[]; ++ ++extern void *bm_pte_addr; ++extern void *bm_pmd_addr; ++extern void *bm_pud_addr; ++ ++#ifdef CONFIG_PTP ++ ++/* Funcs to set pgtable before iee initialized. */ ++static void iee_set_swapper_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) ++{ ++ pgd_t *fixmap_pgdp; ++ ++ spin_lock(&swapper_pgdir_lock); ++ fixmap_pgdp = pgd_set_fixmap_init(__pa_symbol(pgdp)); ++ WRITE_ONCE(*fixmap_pgdp, pgd); ++ /* ++ * We need dsb(ishst) here to ensure the page-table-walker sees ++ * our new entry before set_p?d() returns. The fixmap's ++ * flush_tlb_kernel_range() via clear_fixmap() does this for us. ++ */ ++ pgd_clear_fixmap_init(); ++ spin_unlock(&swapper_pgdir_lock); ++} ++ ++void iee_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d) ++{ ++ if (in_swapper_pgdir(p4dp)) { ++ iee_set_swapper_pgd_pre_init((pgd_t *)p4dp, __pgd(p4d_val(p4d))); ++ return; ++ } ++ ++ WRITE_ONCE(*p4dp, p4d); ++ dsb(ishst); ++ isb(); ++} ++ ++static inline void iee_set_pud_pre_init(pud_t *pudp, pud_t pud) ++{ ++#ifdef __PAGETABLE_PUD_FOLDED ++ if (in_swapper_pgdir(pudp)) { ++ iee_set_swapper_pgd_pre_init((pgd_t *)pudp, __pgd(pud_val(pud))); ++ return; ++ } ++#endif /* __PAGETABLE_PUD_FOLDED */ ++#ifdef CONFIG_KOI ++ pudval_t val = pud_val(pud); ++ if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { ++ // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. ++ pud = __pud(val | PMD_SECT_NG); ++ } ++#endif ++ WRITE_ONCE(*pudp, pud); ++ ++ if (pud_valid(pud)) { ++ dsb(ishst); ++ isb(); ++ } ++} ++ ++static inline void iee_set_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) ++{ ++#ifdef __PAGETABLE_PMD_FOLDED ++ if (in_swapper_pgdir(pmdp)) { ++ iee_set_swapper_pgd_pre_init((pgd_t *)pmdp, __pgd(pmd_val(pmd))); ++ return; ++ } ++#endif /* __PAGETABLE_PMD_FOLDED */ ++#ifdef CONFIG_KOI ++ pmdval_t val = pmd_val(pmd); ++ if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { ++ pmd = __pmd(val | PMD_SECT_NG); ++ } ++#endif ++ WRITE_ONCE(*pmdp, pmd); ++ ++ if (pmd_valid(pmd)) { ++ dsb(ishst); ++ isb(); ++ } ++} ++ ++ ++void __iee_p4d_populate_pre_init(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) ++{ ++ iee_set_p4d_pre_init(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); ++} ++ ++void __iee_pud_populate_pre_init(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) ++{ ++ iee_set_pud_pre_init(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); ++} ++ ++void __iee_pmd_populate_pre_init(pmd_t *pmdp, phys_addr_t ptep, ++ pmdval_t prot) ++{ ++ iee_set_pmd_pre_init(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); ++} ++ ++/* Funcs to set fixmap before iee initialized. */ ++bool pgattr_change_is_safe(u64 old, u64 new); ++static int iee_pud_set_huge_fixmap(pud_t *pudp, phys_addr_t phys, pgprot_t prot) ++{ ++ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); ++ ++ /* Only allow permission changes for now */ ++ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), ++ pud_val(new_pud))) ++ return 0; ++ ++ VM_BUG_ON(phys & ~PUD_MASK); ++ iee_set_fixmap_pud_pre_init(pudp, new_pud); ++ return 1; ++} ++ ++static int iee_pmd_set_huge_fixmap(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) ++{ ++ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); ++ ++ /* Only allow permission changes for now */ ++ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), ++ pmd_val(new_pmd))) ++ return 0; ++ ++ VM_BUG_ON(phys & ~PMD_MASK); ++ iee_set_fixmap_pmd_pre_init(pmdp, new_pmd); ++ return 1; ++} ++ ++static inline void __iee_pmd_populate_fixmap(pmd_t *pmdp, phys_addr_t ptep, ++ pmdval_t prot) ++{ ++ iee_set_fixmap_pmd_pre_init(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); ++} ++ ++static inline void __iee_pud_populate_fixmap(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) ++{ ++ iee_set_fixmap_pud_pre_init(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); ++} ++#endif /* END CONFIG_PTP*/ ++ ++void iee_set_pte_pre_init(pte_t *ptep, pte_t pte) ++{ ++#ifdef CONFIG_KOI ++ if (!pte_none(pte)) { ++ pte = __pte(pte_val(pte) | PTE_NG); ++ } ++#endif ++ WRITE_ONCE(*ptep, pte); ++ ++ /* ++ * Only if the new pte is valid and kernel, otherwise TLB maintenance ++ * or update_mmu_cache() have the necessary barriers. ++ */ ++ if (pte_valid_not_user(pte)) { ++ dsb(ishst); ++ isb(); ++ } ++} ++ ++static void __init iee_set_token_page_valid_pre_init(void *token, void *new) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); ++ ++ pud_t *pudp = pud_offset(p4dp, (unsigned long)token); ++ ++ pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); ++ pte_t pte = READ_ONCE(*ptep); ++ pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); ++ iee_set_pte_pre_init(ptep, pte); ++ flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); ++ isb(); ++} ++#endif /* END CONFIG_IEE*/ ++ ++#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) ++int koi_add_page_mapping(unsigned long dst, unsigned long src) ++{ ++ pgd_t *src_pgdp, *dst_pgdp; ++ p4d_t *src_p4dp, *dst_p4dp; ++ pud_t *src_pudp, *dst_pudp; ++ pmd_t *src_pmdp, *dst_pmdp; ++ pte_t *src_ptep, *dst_ptep; ++ ++ src_pgdp = pgd_offset_pgd(swapper_pg_dir, src); ++ dst_pgdp = pgd_offset_pgd(swapper_pg_dir, dst); ++ ++ src_p4dp = p4d_offset(src_pgdp, src); ++ dst_p4dp = p4d_alloc(&init_mm, dst_pgdp, dst); ++ if (!dst_p4dp) { ++ return -ENOMEM; ++ } ++ src_pudp = pud_offset(src_p4dp, src); ++ dst_pudp = pud_alloc(&init_mm, dst_p4dp, dst); ++ if (!dst_pudp) { ++ return -ENOMEM; ++ } ++ if (pud_val(*src_pudp) & PMD_TABLE_BIT) { ++ src_pmdp = pmd_offset(src_pudp, src); ++ dst_pmdp = pmd_alloc(&init_mm, dst_pudp, dst); ++ if (!dst_pmdp) { ++ return -ENOMEM; ++ } ++ if (pmd_val(*src_pmdp) & PMD_TABLE_BIT) { ++ src_ptep = pte_offset_kernel(src_pmdp, src); ++ dst_ptep = pte_alloc_map(&init_mm, dst_pmdp, dst); ++ set_pte(dst_ptep, *src_ptep); ++ } else { ++ set_pte((pte_t *)dst_pmdp, pmd_pte(*src_pmdp)); ++ } ++ } else { ++ set_pte((pte_t *)dst_pudp, pud_pte(*src_pudp)); ++ } ++ ++ ++ flush_tlb_kernel_range(dst, dst+PAGE_SIZE); ++ isb(); ++ return 0; ++} ++ ++void koi_remove_page_mapping(unsigned long addr) { ++ pgd_t *src_pgdp; ++ p4d_t *src_p4dp; ++ pud_t *src_pudp; ++ pmd_t *src_pmdp; ++ pte_t *src_ptep; ++ ++ src_pgdp = pgd_offset_pgd(swapper_pg_dir, addr); ++ if (pgd_none(*src_pgdp) || pgd_bad(*src_pgdp)) ++ return; ++ src_p4dp = p4d_offset(src_pgdp, addr); ++ if (p4d_none(*src_p4dp) || p4d_bad(*src_p4dp)) ++ return; ++ src_pudp = pud_offset(src_p4dp, addr); ++ if (pud_none(*src_pudp)) ++ return; ++ if (pud_val(*src_pudp) & PMD_TABLE_BIT) { ++ src_pmdp = pmd_offset(src_pudp, addr); ++ if (pmd_none(*src_pmdp)) ++ return; ++ if (pmd_val(*src_pmdp) & PMD_TABLE_BIT) { ++ src_ptep = pte_offset_kernel(src_pmdp, addr); ++ if(!pte_none(*src_ptep)) ++ pte_clear(&init_mm, addr, src_ptep); ++ } else { ++ pmd_clear(src_pmdp); ++ } ++ } else { ++ pud_clear(src_pudp); ++ } ++ ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ isb(); ++} ++#endif ++ + void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) + { ++ #ifdef CONFIG_PTP ++ spin_lock(&swapper_pgdir_lock); ++ iee_rw_gate(IEE_OP_SET_SWAPPER_PGD, pgdp, pgd); ++ spin_unlock(&swapper_pgdir_lock); ++ #else + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); +@@ -90,6 +370,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); ++ #endif + } + + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, +@@ -118,7 +399,11 @@ static phys_addr_t __init early_pgtable_alloc(int shift) + * slot will be free, so we can (ab)use the FIX_PTE slot to initialise + * any level of table. + */ ++ #ifdef CONFIG_PTP ++ ptr = pte_set_fixmap_init(phys); ++ #else + ptr = pte_set_fixmap(phys); ++ #endif + + memset(ptr, 0, PAGE_SIZE); + +@@ -126,11 +411,16 @@ static phys_addr_t __init early_pgtable_alloc(int shift) + * Implicit barriers also ensure the zeroed page is visible to the page + * table walker + */ ++ #ifdef CONFIG_PTP ++ pte_clear_fixmap_init(); ++ #else + pte_clear_fixmap(); ++ #endif + + return phys; + } + ++ + bool pgattr_change_is_safe(u64 old, u64 new) + { + /* +@@ -178,7 +468,11 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, + do { + pte_t old_pte = __ptep_get(ptep); + ++ #ifdef CONFIG_PTP ++ iee_set_fixmap_pte_pre_init(ptep, pfn_pte(__phys_to_pfn(phys), prot)); ++ #else + __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); ++ #endif + + /* + * After the PTE entry has been populated once, we +@@ -211,7 +505,11 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, + pmdval |= PMD_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pte_phys = pgtable_alloc(PAGE_SHIFT); ++ #ifdef CONFIG_PTP ++ __iee_pmd_populate_fixmap(pmdp, pte_phys, pmdval); ++ #else + __pmd_populate(pmdp, pte_phys, pmdval); ++ #endif + pmd = READ_ONCE(*pmdp); + } + BUG_ON(pmd_bad(pmd)); +@@ -248,7 +546,11 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, + /* try section mapping first */ + if (((addr | next | phys) & ~PMD_MASK) == 0 && + (flags & NO_BLOCK_MAPPINGS) == 0) { ++ #ifdef CONFIG_PTP ++ iee_pmd_set_huge_fixmap(pmdp, phys, prot); ++ #else + pmd_set_huge(pmdp, phys, prot); ++ #endif + + /* + * After the PMD entry has been populated once, we +@@ -289,7 +591,11 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, + pudval |= PUD_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(PMD_SHIFT); +- __pud_populate(pudp, pmd_phys, pudval); ++ #ifdef CONFIG_PTP ++ __iee_pud_populate_fixmap(pudp, pmd_phys, PUD_TYPE_TABLE); ++ #else ++ __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); ++ #endif + pud = READ_ONCE(*pudp); + } + BUG_ON(pud_bad(pud)); +@@ -345,7 +651,11 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, + if (pud_sect_supported() && + ((addr | next | phys) & ~PUD_MASK) == 0 && + (flags & NO_BLOCK_MAPPINGS) == 0) { ++ #ifdef CONFIG_PTP ++ iee_pud_set_huge_fixmap(pudp, phys, prot); ++ #else + pud_set_huge(pudp, phys, prot); ++ #endif + + /* + * After the PUD entry has been populated once, we +@@ -374,6 +684,10 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, + { + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); ++ #ifdef CONFIG_IEE ++ p4d_t *p4dp; ++ p4d_t p4d; ++ #endif + + /* + * If the virtual and physical address don't have the same offset +@@ -390,10 +704,14 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, + next = pgd_addr_end(addr, end); + alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); ++ #ifdef CONFIG_IEE ++ p4dp = p4d_offset(pgdp, addr); ++ p4d = READ_ONCE(*p4dp); ++ __p4d_populate(p4dp, __p4d_to_phys(p4d), (PGD_APT | PUD_TYPE_TABLE)); ++ #endif + phys += next - addr; + } while (pgdp++, addr = next, addr != end); + } +- + static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, +@@ -413,162 +731,844 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t (*pgtable_alloc)(int), int flags); + #endif + +-static phys_addr_t __pgd_pgtable_alloc(int shift) ++#ifdef CONFIG_PTP ++static int __init iee_pud_set_huge_pre_init(pud_t *pudp, phys_addr_t phys, pgprot_t prot) + { +- void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); +- BUG_ON(!ptr); ++ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); + +- /* Ensure the zeroed page is visible to the page table walker */ +- dsb(ishst); +- return __pa(ptr); ++ /* Only allow permission changes for now */ ++ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), ++ pud_val(new_pud))) ++ return 0; ++ ++ VM_BUG_ON(phys & ~PUD_MASK); ++ iee_set_pud_pre_init(pudp, new_pud); ++ return 1; + } + +-static phys_addr_t pgd_pgtable_alloc(int shift) ++static int __init iee_pmd_set_huge_pre_init(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) + { +- phys_addr_t pa = __pgd_pgtable_alloc(shift); +- struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); +- +- /* +- * Call proper page table ctor in case later we need to +- * call core mm functions like apply_to_page_range() on +- * this pre-allocated page table. +- * +- * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is +- * folded, and if so pagetable_pte_ctor() becomes nop. +- */ +- if (shift == PAGE_SHIFT) +- BUG_ON(!pagetable_pte_ctor(ptdesc)); +- else if (shift == PMD_SHIFT) +- BUG_ON(!pagetable_pmd_ctor(ptdesc)); ++ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); + +- return pa; +-} ++ /* Only allow permission changes for now */ ++ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), ++ pmd_val(new_pmd))) ++ return 0; + +-/* +- * This function can only be used to modify existing table entries, +- * without allocating new levels of table. Note that this permits the +- * creation of new section or page entries. +- */ +-void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, +- phys_addr_t size, pgprot_t prot) +-{ +- if (virt < PAGE_OFFSET) { +- pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", +- &phys, virt); +- return; +- } +- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, +- NO_CONT_MAPPINGS); ++ VM_BUG_ON(phys & ~PMD_MASK); ++ iee_set_pmd_pre_init(pmdp, new_pmd); ++ return 1; + } + +-void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, +- unsigned long virt, phys_addr_t size, +- pgprot_t prot, bool page_mappings_only) ++static __init void iee_init_pte_pre_init(pmd_t *pmdp, unsigned long addr, unsigned long end, ++ phys_addr_t phys, pgprot_t prot) + { +- int flags = 0; +- +- BUG_ON(mm == &init_mm); ++ pte_t *ptep; + +- if (page_mappings_only) +- flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; ++ ptep = pte_set_fixmap_offset_init(pmdp, addr); ++ do { ++ pte_t old_pte = READ_ONCE(*ptep); + +- __create_pgd_mapping(mm->pgd, phys, virt, size, prot, +- pgd_pgtable_alloc, flags); +-} ++ iee_set_pte_pre_init(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + +-static void update_mapping_prot(phys_addr_t phys, unsigned long virt, +- phys_addr_t size, pgprot_t prot) +-{ +- if (virt < PAGE_OFFSET) { +- pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", +- &phys, virt); +- return; +- } ++ /* ++ * After the PTE entry has been populated once, we ++ * only allow updates to the permission attributes. ++ */ ++ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), ++ READ_ONCE(pte_val(*ptep)))); + +- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, +- NO_CONT_MAPPINGS); ++ phys += PAGE_SIZE; ++ } while (ptep++, addr += PAGE_SIZE, addr != end); + +- /* flush the TLBs after updating live kernel mappings */ +- flush_tlb_kernel_range(virt, virt + size); ++ pte_clear_fixmap_init(); + } + +-static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, +- phys_addr_t end, pgprot_t prot, int flags) ++static __init void iee_alloc_init_cont_pte_pre_init(pmd_t *pmdp, unsigned long addr, ++ unsigned long end, phys_addr_t phys, ++ pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), ++ int flags) + { +- __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, +- prot, early_pgtable_alloc, flags); +-} ++ unsigned long next; ++ pmd_t pmd = READ_ONCE(*pmdp); + +-void __init mark_linear_text_alias_ro(void) +-{ +- /* +- * Remove the write permissions from the linear alias of .text/.rodata +- */ +- update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), +- (unsigned long)__init_begin - (unsigned long)_stext, +- PAGE_KERNEL_RO); +-} ++ BUG_ON(pmd_sect(pmd)); ++ if (pmd_none(pmd)) { ++ pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; ++ phys_addr_t pte_phys; + +-#ifdef CONFIG_KFENCE ++ if (flags & NO_EXEC_MAPPINGS) ++ pmdval |= PMD_TABLE_PXN; ++ BUG_ON(!pgtable_alloc); ++ pte_phys = pgtable_alloc(PAGE_SHIFT); ++ __iee_pmd_populate_pre_init(pmdp, pte_phys, pmdval); ++ pmd = READ_ONCE(*pmdp); ++ } ++ BUG_ON(pmd_bad(pmd)); + +-bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; ++ do { ++ pgprot_t __prot = prot; + +-/* early_param() will be parsed before map_mem() below. */ +-static int __init parse_kfence_early_init(char *arg) +-{ +- int val; ++ next = pte_cont_addr_end(addr, end); + +- if (get_option(&arg, &val)) +- kfence_early_init = !!val; ++ /* use a contiguous mapping if the range is suitably aligned */ ++ if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && ++ (flags & NO_CONT_MAPPINGS) == 0) ++ __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + +-#if IS_ENABLED(CONFIG_KFENCE_MUST_EARLY_INIT) +- kfence_must_early_init = (val == -1) ? true : false; +-#endif ++ iee_init_pte_pre_init(pmdp, addr, next, phys, __prot); + +- return 0; ++ phys += next - addr; ++ } while (addr = next, addr != end); + } +-early_param("kfence.sample_interval", parse_kfence_early_init); + +-static phys_addr_t __init arm64_kfence_alloc_pool(void) ++static __init void iee_init_pmd_pre_init(pud_t *pudp, unsigned long addr, unsigned long end, ++ phys_addr_t phys, pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), int flags) + { +- phys_addr_t kfence_pool; ++ unsigned long next; ++ pmd_t *pmdp; + +- if (!kfence_early_init) +- return 0; ++ pmdp = pmd_set_fixmap_offset_init(pudp, addr); ++ do { ++ pmd_t old_pmd = READ_ONCE(*pmdp); + +- kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); +- if (!kfence_pool) { +- pr_err("failed to allocate kfence pool\n"); +- kfence_early_init = false; +- return 0; +- } ++ next = pmd_addr_end(addr, end); + +- /* Temporarily mark as NOMAP. */ +- memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); ++ /* try section mapping first */ ++ if (((addr | next | phys) & ~PMD_MASK) == 0 && ++ (flags & NO_BLOCK_MAPPINGS) == 0) { ++ iee_pmd_set_huge_pre_init(pmdp, phys, prot); ++ ++ /* ++ * After the PMD entry has been populated once, we ++ * only allow updates to the permission attributes. ++ */ ++ BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), ++ READ_ONCE(pmd_val(*pmdp)))); ++ } else { ++ iee_alloc_init_cont_pte_pre_init(pmdp, addr, next, phys, prot, ++ pgtable_alloc, flags); ++ ++ BUG_ON(pmd_val(old_pmd) != 0 && ++ pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); ++ } ++ phys += next - addr; ++ } while (pmdp++, addr = next, addr != end); ++ ++ pmd_clear_fixmap_init(); ++} ++ ++static __init void iee_alloc_init_cont_pmd_pre_init(pud_t *pudp, unsigned long addr, ++ unsigned long end, phys_addr_t phys, ++ pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), int flags) ++{ ++ unsigned long next; ++ pud_t pud = READ_ONCE(*pudp); ++ ++ /* ++ * Check for initial section mappings in the pgd/pud. ++ */ ++ BUG_ON(pud_sect(pud)); ++ if (pud_none(pud)) { ++ pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; ++ phys_addr_t pmd_phys; ++ ++ if (flags & NO_EXEC_MAPPINGS) ++ pudval |= PUD_TABLE_PXN; ++ BUG_ON(!pgtable_alloc); ++ pmd_phys = pgtable_alloc(PMD_SHIFT); ++ __iee_pud_populate_pre_init(pudp, pmd_phys, pudval); ++ pud = READ_ONCE(*pudp); ++ } ++ BUG_ON(pud_bad(pud)); ++ ++ do { ++ pgprot_t __prot = prot; ++ ++ next = pmd_cont_addr_end(addr, end); ++ ++ /* use a contiguous mapping if the range is suitably aligned */ ++ if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && ++ (flags & NO_CONT_MAPPINGS) == 0) ++ __prot = __pgprot(pgprot_val(prot) | PTE_CONT); ++ ++ iee_init_pmd_pre_init(pudp, addr, next, phys, __prot, pgtable_alloc, flags); ++ ++ phys += next - addr; ++ } while (addr = next, addr != end); ++} ++ ++static __init void iee_alloc_init_pud_pre_init(pgd_t *pgdp, unsigned long addr, unsigned long end, ++ phys_addr_t phys, pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), ++ int flags) ++{ ++ unsigned long next; ++ pud_t *pudp; ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ p4d_t p4d = READ_ONCE(*p4dp); ++ ++ if (p4d_none(p4d)) { ++ p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; ++ phys_addr_t pud_phys; ++ ++ if (flags & NO_EXEC_MAPPINGS) ++ p4dval |= P4D_TABLE_PXN; ++ BUG_ON(!pgtable_alloc); ++ pud_phys = pgtable_alloc(PUD_SHIFT); ++ __iee_p4d_populate_pre_init(p4dp, pud_phys, p4dval); ++ p4d = READ_ONCE(*p4dp); ++ } ++ BUG_ON(p4d_bad(p4d)); ++ ++ pudp = pud_set_fixmap_offset_init(p4dp, addr); ++ do { ++ pud_t old_pud = READ_ONCE(*pudp); ++ ++ next = pud_addr_end(addr, end); ++ ++ /* ++ * For 4K granule only, attempt to put down a 1GB block ++ */ ++ if (pud_sect_supported() && ++ ((addr | next | phys) & ~PUD_MASK) == 0 && ++ (flags & NO_BLOCK_MAPPINGS) == 0) { ++ iee_pud_set_huge_pre_init(pudp, phys, prot); ++ ++ /* ++ * After the PUD entry has been populated once, we ++ * only allow updates to the permission attributes. ++ */ ++ BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), ++ READ_ONCE(pud_val(*pudp)))); ++ } else { ++ iee_alloc_init_cont_pmd_pre_init(pudp, addr, next, phys, prot, ++ pgtable_alloc, flags); ++ ++ BUG_ON(pud_val(old_pud) != 0 && ++ pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); ++ } ++ phys += next - addr; ++ } while (pudp++, addr = next, addr != end); ++ ++ pud_clear_fixmap_init(); ++} ++ ++static __init void __iee_create_pgd_mapping_locked_pre_init(pgd_t *pgdir, phys_addr_t phys, ++ unsigned long virt, phys_addr_t size, ++ pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), ++ int flags) ++{ ++ unsigned long addr, end, next; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); ++ p4d_t *p4dp; ++ p4d_t p4d; ++ ++ /* ++ * If the virtual and physical address don't have the same offset ++ * within a page, we cannot map the region as the caller expects. ++ */ ++ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) ++ return; ++ ++ phys &= PAGE_MASK; ++ addr = virt & PAGE_MASK; ++ end = PAGE_ALIGN(virt + size); ++ ++ do { ++ next = pgd_addr_end(addr, end); ++ iee_alloc_init_pud_pre_init(pgdp, addr, next, phys, prot, pgtable_alloc, ++ flags); ++ p4dp = p4d_offset(pgdp, addr); ++ p4d = READ_ONCE(*p4dp); ++ __iee_p4d_populate_pre_init(p4dp, __p4d_to_phys(p4d), (PGD_APT | PUD_TYPE_TABLE)); ++ phys += next - addr; ++ } while (pgdp++, addr = next, addr != end); ++} ++ ++static __init void __iee_create_pgd_mapping_pre_init(pgd_t *pgdir, phys_addr_t phys, ++ unsigned long virt, phys_addr_t size, ++ pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), ++ int flags) ++{ ++ mutex_lock(&fixmap_lock); ++ __iee_create_pgd_mapping_locked_pre_init(pgdir, phys, virt, size, prot, ++ pgtable_alloc, flags); ++ mutex_unlock(&fixmap_lock); ++} ++#endif ++ ++static phys_addr_t __pgd_pgtable_alloc(int shift) ++{ ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif ++ void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); ++ BUG_ON(!ptr); ++ ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(ptr)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)ptr); ++ #endif ++ ++ /* Ensure the zeroed page is visible to the page table walker */ ++ dsb(ishst); ++ return __pa(ptr); ++} ++ ++static phys_addr_t pgd_pgtable_alloc(int shift) ++{ ++ phys_addr_t pa = __pgd_pgtable_alloc(shift); ++ struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); ++ ++ /* ++ * Call proper page table ctor in case later we need to ++ * call core mm functions like apply_to_page_range() on ++ * this pre-allocated page table. ++ * ++ * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is ++ * folded, and if so pagetable_pte_ctor() becomes nop. ++ */ ++ if (shift == PAGE_SHIFT) ++ BUG_ON(!pagetable_pte_ctor(ptdesc)); ++ else if (shift == PMD_SHIFT) ++ BUG_ON(!pagetable_pmd_ctor(ptdesc)); ++ ++ return pa; ++} ++ ++/* ++ * This function can only be used to modify existing table entries, ++ * without allocating new levels of table. Note that this permits the ++ * creation of new section or page entries. ++ */ ++void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, ++ phys_addr_t size, pgprot_t prot) ++{ ++ if (virt < PAGE_OFFSET) { ++ pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", ++ &phys, virt); ++ return; ++ } ++ ++ #ifdef CONFIG_PTP ++ __iee_create_pgd_mapping_pre_init(init_mm.pgd, phys, virt, size, prot, NULL, ++ NO_CONT_MAPPINGS); ++ #else ++ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, ++ NO_CONT_MAPPINGS); ++ #endif ++} ++ ++void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, ++ unsigned long virt, phys_addr_t size, ++ pgprot_t prot, bool page_mappings_only) ++{ ++ int flags = 0; ++ ++ BUG_ON(mm == &init_mm); ++ ++ if (page_mappings_only) ++ flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; ++ ++ __create_pgd_mapping(mm->pgd, phys, virt, size, prot, ++ pgd_pgtable_alloc, flags); ++} ++ ++static void update_mapping_prot(phys_addr_t phys, unsigned long virt, ++ phys_addr_t size, pgprot_t prot) ++{ ++ if (virt < PAGE_OFFSET) { ++ pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", ++ &phys, virt); ++ return; ++ } ++ ++ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, ++ NO_CONT_MAPPINGS); ++ ++ /* flush the TLBs after updating live kernel mappings */ ++ flush_tlb_kernel_range(virt, virt + size); ++} ++ ++static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, ++ phys_addr_t end, pgprot_t prot, int flags) ++{ ++ #ifdef CONFIG_PTP ++ __iee_create_pgd_mapping_pre_init(pgdp, start, __phys_to_virt(start), end - start, ++ prot, early_pgtable_alloc, flags); ++ #else ++ __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, ++ prot, early_pgtable_alloc, flags); ++ #endif ++} ++ ++void __init mark_linear_text_alias_ro(void) ++{ ++ /* ++ * Remove the write permissions from the linear alias of .text/.rodata ++ */ ++ update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), ++ (unsigned long)__init_begin - (unsigned long)_stext, ++ PAGE_KERNEL_RO); ++} ++ ++#ifdef CONFIG_KFENCE ++ ++bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; ++ ++/* early_param() will be parsed before map_mem() below. */ ++static int __init parse_kfence_early_init(char *arg) ++{ ++ int val; ++ ++ if (get_option(&arg, &val)) ++ kfence_early_init = !!val; ++ ++#if IS_ENABLED(CONFIG_KFENCE_MUST_EARLY_INIT) ++ kfence_must_early_init = (val == -1) ? true : false; ++#endif ++ ++ return 0; ++} ++early_param("kfence.sample_interval", parse_kfence_early_init); ++ ++static phys_addr_t __init arm64_kfence_alloc_pool(void) ++{ ++ phys_addr_t kfence_pool; ++ ++ if (!kfence_early_init) ++ return 0; ++ ++ kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); ++ if (!kfence_pool) { ++ pr_err("failed to allocate kfence pool\n"); ++ kfence_early_init = false; ++ return 0; ++ } ++ ++ /* Temporarily mark as NOMAP. */ ++ memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); ++ ++ return kfence_pool; ++} ++ ++static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) ++{ ++ if (!kfence_pool) ++ return; ++ ++ /* KFENCE pool needs page-level mapping. */ ++ __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE, ++ pgprot_tagged(PAGE_KERNEL), ++ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); ++ memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); ++ __kfence_pool = phys_to_virt(kfence_pool); ++} ++#else /* CONFIG_KFENCE */ ++ ++static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } ++static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } ++ ++#endif /* CONFIG_KFENCE */ ++ ++static void __init map_mem(pgd_t *pgdp) ++{ ++ static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); ++ phys_addr_t kernel_start = __pa_symbol(_stext); ++ phys_addr_t kernel_end = __pa_symbol(__init_begin); ++ phys_addr_t start, end; ++ phys_addr_t early_kfence_pool; ++ int flags = NO_EXEC_MAPPINGS; ++ u64 i; ++ ++ /* ++ * Setting hierarchical PXNTable attributes on table entries covering ++ * the linear region is only possible if it is guaranteed that no table ++ * entries at any level are being shared between the linear region and ++ * the vmalloc region. Check whether this is true for the PGD level, in ++ * which case it is guaranteed to be true for all other levels as well. ++ */ ++ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); ++ ++ early_kfence_pool = arm64_kfence_alloc_pool(); ++ ++ if (can_set_direct_map()) ++ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; ++ ++ #ifdef CONFIG_IEE ++ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; ++ #endif ++ /* ++ * Take care not to create a writable alias for the ++ * read-only text and rodata sections of the kernel image. ++ * So temporarily mark them as NOMAP to skip mappings in ++ * the following for-loop ++ */ ++ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); ++ ++ /* map all the memory banks */ ++ for_each_mem_range(i, &start, &end) { ++ if (start >= end) ++ break; ++ /* ++ * The linear map must allow allocation tags reading/writing ++ * if MTE is present. Otherwise, it has the same attributes as ++ * PAGE_KERNEL. ++ */ ++ __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), ++ flags); ++ } + +- return kfence_pool; ++ /* ++ * Map the linear alias of the [_stext, __init_begin) interval ++ * as non-executable now, and remove the write permission in ++ * mark_linear_text_alias_ro() below (which will be called after ++ * alternative patching has completed). This makes the contents ++ * of the region accessible to subsystems such as hibernate, ++ * but protects it from inadvertent modification or execution. ++ * Note that contiguous mappings cannot be remapped in this way, ++ * so we should avoid them here. ++ */ ++ #ifdef CONFIG_IEE ++ __map_memblock(pgdp, kernel_start, kernel_end, ++ PAGE_KERNEL, flags); ++ #else ++ __map_memblock(pgdp, kernel_start, kernel_end, ++ PAGE_KERNEL, NO_CONT_MAPPINGS); ++ #endif ++ memblock_clear_nomap(kernel_start, kernel_end - kernel_start); ++ arm64_kfence_map_pool(early_kfence_pool, pgdp); ++} ++ ++void mark_rodata_ro(void) ++{ ++ unsigned long section_size; ++ ++ /* ++ * mark .rodata as read only. Use __init_begin rather than __end_rodata ++ * to cover NOTES and EXCEPTION_TABLE. ++ */ ++ section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; ++ update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, ++ section_size, PAGE_KERNEL_RO); ++ ++ debug_checkwx(); ++} ++ ++static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, ++ pgprot_t prot, struct vm_struct *vma, ++ int flags, unsigned long vm_flags) ++{ ++ phys_addr_t pa_start = __pa_symbol(va_start); ++ unsigned long size = va_end - va_start; ++ ++ BUG_ON(!PAGE_ALIGNED(pa_start)); ++ BUG_ON(!PAGE_ALIGNED(size)); ++ ++ #ifdef CONFIG_PTP ++ __iee_create_pgd_mapping_pre_init(pgdp, pa_start, (unsigned long)va_start, size, prot, ++ early_pgtable_alloc, flags); ++ #else ++ __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, ++ early_pgtable_alloc, flags); ++ #endif ++ ++ if (!(vm_flags & VM_NO_GUARD)) ++ size += PAGE_SIZE; ++ ++ vma->addr = va_start; ++ vma->phys_addr = pa_start; ++ vma->size = size; ++ vma->flags = VM_MAP | vm_flags; ++ vma->caller = __builtin_return_address(0); ++ ++ vm_area_add_early(vma); ++} ++ ++static pgprot_t kernel_exec_prot(void) ++{ ++ return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; ++} ++ ++#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 ++static int __init map_entry_trampoline(void) ++{ ++ int i; ++ ++ pgprot_t prot = kernel_exec_prot(); ++ phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); ++ ++ /* The trampoline is always mapped and can therefore be global */ ++ pgprot_val(prot) &= ~PTE_NG; ++ ++ /* Map only the text into the trampoline page table */ ++ memset(tramp_pg_dir, 0, PGD_SIZE); ++ #ifdef CONFIG_PTP ++ iee_set_logical_mem_ro((unsigned long)tramp_pg_dir); ++ #endif ++ __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, ++ entry_tramp_text_size(), prot, ++ __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); ++ ++ /* Map both the text and data into the kernel page table */ ++ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) ++ __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, ++ pa_start + i * PAGE_SIZE, prot); ++ ++ if (IS_ENABLED(CONFIG_RELOCATABLE)) ++ __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, ++ pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); ++ ++ return 0; ++} ++core_initcall(map_entry_trampoline); ++#endif ++ ++/* ++ * Open coded check for BTI, only for use to determine configuration ++ * for early mappings for before the cpufeature code has run. ++ */ ++static bool arm64_early_this_cpu_has_bti(void) ++{ ++ u64 pfr1; ++ ++ if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) ++ return false; ++ ++ pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); ++ return cpuid_feature_extract_unsigned_field(pfr1, ++ ID_AA64PFR1_EL1_BT_SHIFT); ++} ++ ++#ifdef CONFIG_IEE ++/* Set PMD APTable of iee si codes as (1,1) to revert it to ROX P pages when HPD1=0. */ ++static void __init iee_si_set_pmd_APtable(unsigned long addr, pgd_t *pgdir) ++{ ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); ++ ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ ++ #ifdef CONFIG_PTP ++ pud_t *pudp = pud_set_fixmap_offset_init(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_set_fixmap_offset_init(pudp, addr); ++ ++ pmd_t pmd = READ_ONCE(*pmdp); ++ ++ __iee_pmd_populate_pre_init(pmdp, __pmd_to_phys(pmd), PGD_APT_RO | PGD_APT | PMD_TYPE_TABLE); ++ ++ pud_clear_fixmap_init(); ++ pmd_clear_fixmap_init(); ++ #else ++ pud_t *pudp = pud_set_fixmap_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_set_fixmap_offset(pudp, addr); ++ ++ pmd_t pmd = READ_ONCE(*pmdp); ++ ++ __pmd_populate(pmdp, __pmd_to_phys(pmd), PGD_APT_RO | PGD_APT | PMD_TYPE_TABLE); ++ ++ pud_clear_fixmap(); ++ pmd_clear_fixmap(); ++ #endif ++} ++/* Set PMD APTable of iee si codes as (1,1) to revert it to ROX P pages when HPD1=0. */ ++static void __init mark_iee_si_pmd_APtable(pgd_t *pgdir) ++{ ++ unsigned long addr = (unsigned long)__iee_si_start; ++ iee_si_set_pmd_APtable(addr, pgdir); ++ // iee rwx gate exit may be mapped by another pmd. ++ iee_si_set_pmd_APtable(addr + PAGE_SIZE, pgdir); ++} ++#endif ++ ++/* ++ * Create fine-grained mappings for the kernel. ++ */ ++static void __init map_kernel(pgd_t *pgdp) ++{ ++ static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, ++ vmlinux_initdata, vmlinux_data; ++ ++ #ifdef CONFIG_IEE ++ static struct vm_struct vmlinux_iee_code, vmlinux_iee_data, vmlinux_iee_gate, vmlinux_text_end; ++ #endif ++ ++ /* ++ * External debuggers may need to write directly to the text ++ * mapping to install SW breakpoints. Allow this (only) when ++ * explicitly requested with rodata=off. ++ */ ++ pgprot_t text_prot = kernel_exec_prot(); ++ ++ /* ++ * If we have a CPU that supports BTI and a kernel built for ++ * BTI then mark the kernel executable text as guarded pages ++ * now so we don't have to rewrite the page tables later. ++ */ ++ if (arm64_early_this_cpu_has_bti()) ++ text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); ++ ++ /* ++ * Only rodata will be remapped with different permissions later on, ++ * all other segments are allowed to use contiguous mappings. ++ */ ++ #ifdef CONFIG_IEE ++ map_kernel_segment(pgdp, _stext, __iee_si_data_start, text_prot, &vmlinux_text, ++ 0, VM_NO_GUARD); ++ /* Set iee si data RW. */ ++ map_kernel_segment(pgdp, __iee_si_data_start, __iee_exec_entry_start, SET_NG(PAGE_KERNEL), ++ &vmlinux_iee_data, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); ++ /* Set iee entry codes NG. */ ++ map_kernel_segment(pgdp, __iee_exec_entry_start, __iee_si_start, SET_NG(text_prot), &vmlinux_iee_gate, ++ NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); ++ /* Map __iee_si_start - __iee_si_end as U RWX pages and set PMD APTABLE = (1,1). */ ++ map_kernel_segment(pgdp, __iee_si_start, __iee_si_end, SET_NG((PAGE_KERNEL_EXEC)), ++ &vmlinux_iee_code, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); ++ mark_iee_si_pmd_APtable(pgdp); ++ ++ map_kernel_segment(pgdp, __iee_si_end, _etext, text_prot, &vmlinux_text_end, 0, ++ VM_NO_GUARD); ++ ++ map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, ++ &vmlinux_rodata, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); ++ map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, ++ &vmlinux_inittext, 0, VM_NO_GUARD); ++ map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, ++ &vmlinux_initdata, 0, VM_NO_GUARD); ++ map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, 0); ++ #else ++ map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, ++ VM_NO_GUARD); ++ map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, ++ &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); ++ map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, ++ &vmlinux_inittext, 0, VM_NO_GUARD); ++ map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, ++ &vmlinux_initdata, 0, VM_NO_GUARD); ++ map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); ++ #endif ++ ++ ++ fixmap_copy(pgdp); ++ kasan_copy_shadow(pgdp); ++} ++ ++static void __init create_idmap(void) ++{ ++ u64 start = __pa_symbol(__idmap_text_start); ++ u64 size = __pa_symbol(__idmap_text_end) - start; ++ pgd_t *pgd = idmap_pg_dir; ++ u64 pgd_phys; ++ ++ /* check if we need an additional level of translation */ ++ if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { ++ pgd_phys = early_pgtable_alloc(PAGE_SHIFT); ++ set_pgd(&idmap_pg_dir[start >> VA_BITS], ++ __pgd(pgd_phys | P4D_TYPE_TABLE)); ++ pgd = __va(pgd_phys); ++ } ++ #ifdef CONFIG_PTP ++ __iee_create_pgd_mapping_pre_init(pgd, start, start, size, PAGE_KERNEL_ROX, ++ early_pgtable_alloc, 0); ++ #else ++ __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, ++ early_pgtable_alloc, 0); ++ #endif ++ ++ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { ++ extern u32 __idmap_kpti_flag; ++ u64 pa = __pa_symbol(&__idmap_kpti_flag); ++ ++ /* ++ * The KPTI G-to-nG conversion code needs a read-write mapping ++ * of its synchronization flag in the ID map. ++ */ ++ #ifdef CONFIG_PTP ++ __iee_create_pgd_mapping_pre_init(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, ++ early_pgtable_alloc, 0); ++ #else ++ __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, ++ early_pgtable_alloc, 0); ++ #endif ++ } ++} ++ ++#ifdef CONFIG_IEE ++static void __create_pgd_mapping_for_iee_locked(pgd_t *pgdir, phys_addr_t phys, ++ unsigned long virt, phys_addr_t size, ++ pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), ++ int flags) ++{ ++ unsigned long addr, end, next; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); ++ p4d_t *p4dp; ++ p4d_t p4d; ++ ++ /* ++ * If the virtual and physical address don't have the same offset ++ * within a page, we cannot map the region as the caller expects. ++ */ ++ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) ++ return; ++ ++ phys &= PAGE_MASK; ++ addr = virt & PAGE_MASK; ++ end = PAGE_ALIGN(virt + size); ++ ++ do { ++ next = pgd_addr_end(addr, end); ++ #ifdef CONFIG_PTP ++ iee_alloc_init_pud_pre_init(pgdp, addr, next, phys, prot, pgtable_alloc, ++ flags); ++ #else ++ alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, ++ flags); ++ #endif ++ p4dp = p4d_offset(pgdp, addr); ++ p4d = READ_ONCE(*p4dp); ++ #ifdef CONFIG_PTP ++ __iee_p4d_populate_pre_init(p4dp, __p4d_to_phys(p4d), (PGD_APT | PGD_PXN | PGD_UXN | PUD_TYPE_TABLE)); ++ #else ++ __p4d_populate(p4dp, __p4d_to_phys(p4d), (PGD_APT | PGD_PXN | PGD_UXN | PUD_TYPE_TABLE)); ++ #endif ++ phys += next - addr; ++ } while (pgdp++, addr = next, addr != end); ++} ++ ++static void __create_pgd_mapping_for_iee(pgd_t *pgdir, phys_addr_t phys, ++ unsigned long virt, phys_addr_t size, ++ pgprot_t prot, ++ phys_addr_t (*pgtable_alloc)(int), ++ int flags) ++{ ++ mutex_lock(&fixmap_lock); ++ __create_pgd_mapping_for_iee_locked(pgdir, phys, virt, size, prot, ++ pgtable_alloc, flags); ++ mutex_unlock(&fixmap_lock); + } + +-static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) ++static void __init __map_memblock_for_iee(pgd_t *pgdp, phys_addr_t start, ++ phys_addr_t end, pgprot_t prot, int flags) + { +- if (!kfence_pool) +- return; +- +- /* KFENCE pool needs page-level mapping. */ +- __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE, +- pgprot_tagged(PAGE_KERNEL), +- NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); +- memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); +- __kfence_pool = phys_to_virt(kfence_pool); ++ #ifdef CONFIG_PTP ++ __create_pgd_mapping_for_iee(pgdp, start, __phys_to_iee(start), end - start, ++ prot, early_pgtable_alloc, flags); ++ #else ++ __create_pgd_mapping_for_iee(pgdp, start, __phys_to_iee(start), end - start, ++ prot, early_pgtable_alloc, flags); ++ #endif + } +-#else /* CONFIG_KFENCE */ + +-static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } +-static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } +- +-#endif /* CONFIG_KFENCE */ +- +-static void __init map_mem(pgd_t *pgdp) ++static void __init map_iee(pgd_t *pgdp) + { + static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); + phys_addr_t kernel_start = __pa_symbol(_stext); +@@ -578,6 +1578,8 @@ static void __init map_mem(pgd_t *pgdp) + int flags = NO_EXEC_MAPPINGS; + u64 i; + ++ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; ++ + /* + * Setting hierarchical PXNTable attributes on table entries covering + * the linear region is only possible if it is guaranteed that no table +@@ -589,9 +1591,6 @@ static void __init map_mem(pgd_t *pgdp) + + early_kfence_pool = arm64_kfence_alloc_pool(); + +- if (can_set_direct_map()) +- flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; +- + /* + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. +@@ -609,12 +1608,11 @@ static void __init map_mem(pgd_t *pgdp) + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ +- __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), +- flags); ++ __map_memblock_for_iee(pgdp, start, end, SET_NG(SET_INVALID(SET_UPAGE(PAGE_KERNEL))), flags); + } + + /* +- * Map the linear alias of the [_stext, __init_begin) interval ++ * Map the linear alias of the [_text, __init_begin) interval + * as non-executable now, and remove the write permission in + * mark_linear_text_alias_ro() below (which will be called after + * alternative patching has completed). This makes the contents +@@ -623,178 +1621,384 @@ static void __init map_mem(pgd_t *pgdp) + * Note that contiguous mappings cannot be remapped in this way, + * so we should avoid them here. + */ +- __map_memblock(pgdp, kernel_start, kernel_end, +- PAGE_KERNEL, NO_CONT_MAPPINGS); ++ __map_memblock_for_iee(pgdp, kernel_start, kernel_end, ++ SET_NG(SET_INVALID(SET_UPAGE(PAGE_KERNEL))), flags); + memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + arm64_kfence_map_pool(early_kfence_pool, pgdp); + } + +-void mark_rodata_ro(void) ++/* ++ * Change page access permission, whereas not handling huge pages. ++ * Only used on IEE init functions. ++ */ ++static void __init iee_si_set_page_attr(unsigned long addr, pteval_t attr) + { +- unsigned long section_size; ++ unsigned long flag; ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + +- /* +- * mark .rodata as read only. Use __init_begin rather than __end_rodata +- * to cover NOTES and EXCEPTION_TABLE. +- */ +- section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; +- update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, +- section_size, PAGE_KERNEL_RO); ++ p4d_t *p4dp = p4d_offset(pgdp, addr); + +- debug_checkwx(); ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ pte_t pte = READ_ONCE(*ptep); ++ ++ if(attr & PTE_RDONLY) ++ pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); ++ pte = __pte(pte_val(pte) | attr); ++ #ifdef CONFIG_PTP ++ // Write pgtable in IEE directly. ++ flag = local_daif_save(); ++ asm volatile ("msr pan, #0"); ++ WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); ++ asm volatile ("msr pan, #1"); ++ local_daif_restore(flag); ++ #else ++ WRITE_ONCE(*ptep, pte); ++ #endif ++} ++ ++/* Prepare data used for iee rwx gates. These data are setted only once. */ ++void __init iee_si_prepare_data(void) ++{ ++ unsigned long va; ++ // Record current TCR val after system init. ++ iee_si_tcr = read_sysreg(tcr_el1) & ~(SYS_TCR_IEE_SI); ++ // Mark iee data as RO and move it to iee after setting up. ++ va = (unsigned long)__iee_si_data_start; ++ iee_si_set_page_attr(va, PTE_RDONLY); ++ iee_si_set_page_attr(lm_alias(va)+iee_offset, 0x1 | PTE_RDONLY); ++ // Set iee sensitive inst code page U RWX here to hide it from kernel. ++ va = (unsigned long)__iee_si_start; ++ iee_si_set_page_attr(va, PTE_USER); ++ va = (unsigned long)__iee_si_start + PAGE_SIZE; ++ iee_si_set_page_attr(va, PTE_USER); ++ flush_tlb_all(); + } + +-static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, +- pgprot_t prot, struct vm_struct *vma, +- int flags, unsigned long vm_flags) ++#endif ++ ++#ifdef CONFIG_PTP ++// Attention : Using set_xxx without adding offset. ++static void __init set_iee_valid_pre_init(unsigned long addr) + { +- phys_addr_t pa_start = __pa_symbol(va_start); +- unsigned long size = va_end - va_start; ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + +- BUG_ON(!PAGE_ALIGNED(pa_start)); +- BUG_ON(!PAGE_ALIGNED(size)); ++ p4d_t *p4dp = p4d_offset(pgdp, addr); + +- __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, +- early_pgtable_alloc, flags); ++ pud_t *pudp = pud_offset(p4dp, addr); + +- if (!(vm_flags & VM_NO_GUARD)) +- size += PAGE_SIZE; ++ pmd_t *pmdp = pmd_offset(pudp, addr); + +- vma->addr = va_start; +- vma->phys_addr = pa_start; +- vma->size = size; +- vma->flags = VM_MAP | vm_flags; +- vma->caller = __builtin_return_address(0); ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ pte_t pte = READ_ONCE(*ptep); + +- vm_area_add_early(vma); ++ if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) ++ return; ++ ++ pte = __pte(pte_val(pte) | 0x1); ++ iee_set_pte_pre_init(ptep, pte); ++ flush_tlb_kernel_range(addr, addr+PAGE_SIZE); ++ isb(); + } + +-static pgprot_t kernel_exec_prot(void) ++static void __init move_pte_table_into_iee(pmd_t *pmdp, unsigned long addr, unsigned long end) + { +- return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; ++ pmd_t pmd = READ_ONCE(*pmdp); ++ unsigned long iee_addr = __phys_to_iee(__pmd_to_phys(pmd)); ++ set_iee_valid_pre_init(iee_addr); + } + +-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +-static int __init map_entry_trampoline(void) ++static void __init move_pmd_table_into_iee(pud_t *pudp, unsigned long addr, unsigned long end) + { +- int i; ++ unsigned long next; ++ pud_t pud = READ_ONCE(*pudp); ++ pmd_t *pmdp; ++ pmd_t pmd; ++ unsigned long iee_addr = __phys_to_iee(__pud_to_phys(pud)); ++ set_iee_valid_pre_init(iee_addr); + +- pgprot_t prot = kernel_exec_prot(); +- phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); ++ pmdp = pmd_offset(pudp, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ pmd = READ_ONCE(*pmdp); ++ if((pmd_val(pmd) & PMD_TABLE_BIT) == 0) ++ { ++ continue; ++ } ++ else ++ { ++ move_pte_table_into_iee(pmdp, addr, next); ++ } ++ } while (pmdp++, addr = next, addr != end); ++} + +- /* The trampoline is always mapped and can therefore be global */ +- pgprot_val(prot) &= ~PTE_NG; ++static void __init move_pud_table_into_iee(pgd_t *pgdp, unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ p4d_t p4d = READ_ONCE(*p4dp); ++ pud_t *pudp; ++ pud_t pud; ++ unsigned long iee_addr = __phys_to_iee(__p4d_to_phys(p4d)); ++ set_iee_valid_pre_init(iee_addr); + +- /* Map only the text into the trampoline page table */ +- memset(tramp_pg_dir, 0, PGD_SIZE); +- __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, +- entry_tramp_text_size(), prot, +- __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); ++ pudp = pud_offset(p4dp, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ pud = READ_ONCE(*pudp); ++ if ((pud_val(pud) & PUD_TABLE_BIT) == 0) ++ { ++ continue; ++ } ++ else ++ { ++ move_pmd_table_into_iee(pudp, addr, next); ++ } ++ } while (pudp++, addr = next, addr != end); ++} + +- /* Map both the text and data into the kernel page table */ +- for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) +- __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, +- pa_start + i * PAGE_SIZE, prot); ++static void __init init_iee_for_one_region(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) ++{ ++ unsigned long addr, end, next; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); + +- if (IS_ENABLED(CONFIG_RELOCATABLE)) +- __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, +- pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); ++ addr = va_start & PAGE_MASK; ++ end = PAGE_ALIGN(va_end); + +- return 0; ++ do { ++ next = pgd_addr_end(addr, end); ++ move_pud_table_into_iee(pgdp, addr, next); ++ } while (pgdp++, addr = next, addr != end); + } +-core_initcall(map_entry_trampoline); +-#endif + +-/* +- * Open coded check for BTI, only for use to determine configuration +- * for early mappings for before the cpufeature code has run. +- */ +-static bool arm64_early_this_cpu_has_bti(void) ++static void __init init_iee(void) + { +- u64 pfr1; ++ unsigned long iee_addr; ++ phys_addr_t start, end; ++ u64 i; ++ pgd_t *pgdp; ++ ++ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 ++ // handling 1-level tramp page table tramp_pg_dir ++ iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(tramp_pg_dir)); ++ set_iee_valid_pre_init(iee_addr); ++ #endif ++ // handling 1-level page table swapper_pg_dir ++ pgdp = swapper_pg_dir; ++ iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(swapper_pg_dir)); ++ set_iee_valid_pre_init(iee_addr); ++ // handling 2/3/4-level page table for kernel ++ init_iee_for_one_region(pgdp, (unsigned long)_text, (unsigned long)_etext); ++ init_iee_for_one_region(pgdp, (unsigned long)__start_rodata, (unsigned long)__inittext_begin); ++ init_iee_for_one_region(pgdp, (unsigned long)__inittext_begin, (unsigned long)__inittext_end); ++ init_iee_for_one_region(pgdp, (unsigned long)__initdata_begin, (unsigned long)__initdata_end); ++ init_iee_for_one_region(pgdp, (unsigned long)_data, (unsigned long)_end); ++ // handling 2/3/4-level page table for fixmap i.e. remap bm_xxx ++ iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pte_addr)); ++ set_iee_valid_pre_init(iee_addr); ++ iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pmd_addr)); ++ set_iee_valid_pre_init(iee_addr); ++ iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pud_addr)); ++ set_iee_valid_pre_init(iee_addr); ++ // handling 2/3/4-level page table for logical mem and iee ++ for_each_mem_range(i, &start, &end) { ++ if (start >= end) ++ break; ++ /* ++ * The linear map must allow allocation tags reading/writing ++ * if MTE is present. Otherwise, it has the same attributes as ++ * PAGE_KERNEL. ++ */ ++ init_iee_for_one_region(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); ++ init_iee_for_one_region(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); ++ } ++} + +- if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) +- return false; ++static void iee_set_kernel_upage_pre_init(unsigned long addr) ++{ ++ pgd_t *pgdir = swapper_pg_dir; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + +- pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); +- return cpuid_feature_extract_unsigned_field(pfr1, +- ID_AA64PFR1_EL1_BT_SHIFT); ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ p4d_t p4d = READ_ONCE(*p4dp); ++ ++ pud_t *pudp = pud_offset(p4dp, addr); ++ ++ pmd_t *pmdp = pmd_offset(pudp, addr); ++ ++ pte_t *ptep = pte_offset_kernel(pmdp, addr); ++ ++ int i; ++ for(i = 0; i < 4; i++) ++ { ++ pte_t pte = READ_ONCE(*ptep); ++ pte = __pte(pte_val(pte) | PTE_USER | PTE_NG); ++ iee_set_pte_pre_init(ptep, pte); ++ ptep++; ++ } ++ flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); ++ isb(); + } + +-/* +- * Create fine-grained mappings for the kernel. +- */ +-static void __init map_kernel(pgd_t *pgdp) ++static void __init iee_set_pte_table_ro(pmd_t *pmdp, unsigned long addr, unsigned long end) + { +- static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, +- vmlinux_initdata, vmlinux_data; ++ pmd_t pmd = READ_ONCE(*pmdp); ++ unsigned long logical_addr = (unsigned long)__va(__pmd_to_phys(pmd)); ++ iee_set_logical_mem_ro(logical_addr); ++} + +- /* +- * External debuggers may need to write directly to the text +- * mapping to install SW breakpoints. Allow this (only) when +- * explicitly requested with rodata=off. +- */ +- pgprot_t text_prot = kernel_exec_prot(); ++static void __init iee_set_pmd_table_ro(pud_t *pudp, unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ pud_t pud = READ_ONCE(*pudp); ++ pmd_t *pmdp; ++ pmd_t pmd; ++ unsigned long logical_addr = (unsigned long)__va(__pud_to_phys(pud)); ++ iee_set_logical_mem_ro(logical_addr); + +- /* +- * If we have a CPU that supports BTI and a kernel built for +- * BTI then mark the kernel executable text as guarded pages +- * now so we don't have to rewrite the page tables later. +- */ +- if (arm64_early_this_cpu_has_bti()) +- text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); ++ pmdp = pmd_offset(pudp, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ pmd = READ_ONCE(*pmdp); ++ if((pmd_val(pmd) & PMD_TABLE_BIT) == 0) ++ { ++ continue; ++ } ++ else ++ { ++ iee_set_pte_table_ro(pmdp, addr, next); ++ } ++ } while (pmdp++, addr = next, addr != end); ++} + +- /* +- * Only rodata will be remapped with different permissions later on, +- * all other segments are allowed to use contiguous mappings. +- */ +- map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, +- VM_NO_GUARD); +- map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, +- &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); +- map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, +- &vmlinux_inittext, 0, VM_NO_GUARD); +- map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, +- &vmlinux_initdata, 0, VM_NO_GUARD); +- map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); ++static void __init iee_set_pud_table_ro(pgd_t *pgdp, unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ p4d_t *p4dp = p4d_offset(pgdp, addr); ++ p4d_t p4d = READ_ONCE(*p4dp); ++ pud_t *pudp; ++ pud_t pud; ++ unsigned long logical_addr = (unsigned long)__va(__p4d_to_phys(p4d)); ++ iee_set_logical_mem_ro(logical_addr); + +- fixmap_copy(pgdp); +- kasan_copy_shadow(pgdp); ++ pudp = pud_offset(p4dp, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ pud = READ_ONCE(*pudp); ++ if ((pud_val(pud) & PUD_TABLE_BIT) == 0) ++ { ++ continue; ++ } ++ else ++ { ++ iee_set_pmd_table_ro(pudp, addr, next); ++ } ++ } while (pudp++, addr = next, addr != end); + } + +-static void __init create_idmap(void) ++static void __init iee_mark_pgtable_for_one_region_ro(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) + { +- u64 start = __pa_symbol(__idmap_text_start); +- u64 size = __pa_symbol(__idmap_text_end) - start; +- pgd_t *pgd = idmap_pg_dir; +- u64 pgd_phys; ++ unsigned long addr, end, next; ++ pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); + +- /* check if we need an additional level of translation */ +- if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { +- pgd_phys = early_pgtable_alloc(PAGE_SHIFT); +- set_pgd(&idmap_pg_dir[start >> VA_BITS], +- __pgd(pgd_phys | P4D_TYPE_TABLE)); +- pgd = __va(pgd_phys); +- } +- __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, +- early_pgtable_alloc, 0); ++ addr = va_start & PAGE_MASK; ++ end = PAGE_ALIGN(va_end); + +- if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { +- extern u32 __idmap_kpti_flag; +- u64 pa = __pa_symbol(&__idmap_kpti_flag); ++ do { ++ next = pgd_addr_end(addr, end); ++ iee_set_pud_table_ro(pgdp, addr, next); ++ } while (pgdp++, addr = next, addr != end); ++} + ++static void __init iee_mark_all_lm_pgtable_ro(void) ++{ ++ unsigned long logical_addr; ++ phys_addr_t start, end; ++ u64 i; ++ pgd_t *pgdp; ++ ++ // handling static allocated page table ++ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 ++ // handling 1-level tramp page table tramp_pg_dir ++ logical_addr = (unsigned long)__va(__pa_symbol(tramp_pg_dir)); ++ iee_set_logical_mem_ro(logical_addr); ++ #endif ++ // handling 1-level page table swapper_pg_dir ++ pgdp = swapper_pg_dir; ++ iee_set_logical_mem_ro((unsigned long)swapper_pg_dir); ++ logical_addr = (unsigned long)__va(__pa_symbol(swapper_pg_dir)); ++ iee_set_logical_mem_ro(logical_addr); ++ ++ // handling 2/3/4-level page table for kernel ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_text, (unsigned long)_etext); ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__start_rodata, (unsigned long)__inittext_begin); ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__inittext_begin, (unsigned long)__inittext_end); ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__initdata_begin, (unsigned long)__initdata_end); ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_data, (unsigned long)_end); ++ ++ // handling 2/3/4-level page table for fixmap i.e. remap bm_xxx ++ logical_addr = (unsigned long)__va(__pa_symbol(bm_pte_addr)); ++ iee_set_logical_mem_ro(logical_addr); ++ ++ iee_set_logical_mem_ro((unsigned long)bm_pmd_addr); ++ logical_addr = (unsigned long)__va(__pa_symbol(bm_pmd_addr)); ++ iee_set_logical_mem_ro(logical_addr); ++ ++ iee_set_logical_mem_ro((unsigned long)bm_pud_addr); ++ logical_addr = (unsigned long)__va(__pa_symbol(bm_pud_addr)); ++ iee_set_logical_mem_ro(logical_addr); ++ ++ // handling 2/3/4-level page table for logical mem and iee ++ for_each_mem_range(i, &start, &end) { ++ if (start >= end) ++ break; + /* +- * The KPTI G-to-nG conversion code needs a read-write mapping +- * of its synchronization flag in the ID map. ++ * The linear map must allow allocation tags reading/writing ++ * if MTE is present. Otherwise, it has the same attributes as ++ * PAGE_KERNEL. + */ +- __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, +- early_pgtable_alloc, 0); ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); ++ iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); + } + } ++#endif ++ ++#ifdef CONFIG_KOI ++extern s64 koi_offset; ++#endif + + void __init paging_init(void) + { ++ #ifdef CONFIG_IEE ++ unsigned long SP_EL0; ++ void *new; ++ void *init_token; ++ struct task_token *token; ++ unsigned long tcr; ++ ++ // Check if cpu has PAN and HPDS. ++ if(!cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR1_EL1), ++ ID_AA64MMFR1_EL1_PAN_SHIFT)) ++ panic("Architecture doesn't support PAN, please disable CONFIG_IEE.\n"); ++ ++ if(!cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR1_EL1), ++ ID_AA64MMFR1_EL1_HPDS_SHIFT)) ++ panic("Architecture doesn't support HPDS, please disable CONFIG_IEE.\n"); ++ #endif ++ ++ // Avoid using iee code to modify pgtable before iee initialized. ++ #ifdef CONFIG_PTP ++ pgd_t *pgdp = pgd_set_fixmap_init(__pa_symbol(swapper_pg_dir)); ++ #else + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); ++ #endif ++ ++ + extern pgd_t init_idmap_pg_dir[]; + + idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); +@@ -802,7 +2006,17 @@ void __init paging_init(void) + map_kernel(pgdp); + map_mem(pgdp); + ++ // Map the whole physical mem into IEE, but set invalid. ++ #ifdef CONFIG_IEE ++ map_iee(pgdp); ++ #endif ++ ++ // Avoid using iee code to modify pgtable before iee initialized. ++ #ifdef CONFIG_PTP ++ pgd_clear_fixmap_init(); ++ #else + pgd_clear_fixmap(); ++ #endif + + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); + init_mm.pgd = swapper_pg_dir; +@@ -813,6 +2027,80 @@ void __init paging_init(void) + memblock_allow_resize(); + + create_idmap(); ++ ++ #ifdef CONFIG_IEE ++ // test iee_exec_entry ++ iee_rwx_gate_entry(IEE_SI_TEST); ++ // Initialize init iee stack. ++ #ifdef CONFIG_PTP ++ iee_set_kernel_upage_pre_init((unsigned long)init_iee_stack_begin); ++ iee_set_kernel_upage_pre_init((unsigned long)__va(__pa_symbol(init_iee_stack_begin))); ++ #else ++ iee_set_kernel_upage((unsigned long)init_iee_stack_begin); ++ iee_set_kernel_upage((unsigned long)__va(__pa_symbol(init_iee_stack_begin))); ++ #endif ++ #endif ++ ++ // Init token for init_task. ++ #ifdef CONFIG_IEE ++ // Change SP_EL0 from Image VA to Logical VA. ++ SP_EL0 = (unsigned long)__va(__pa_symbol(&init_task)); ++ write_sysreg(SP_EL0, sp_el0); ++ init_task.cpus_ptr = &(((struct task_struct *)(__va(__pa_symbol(&init_task))))->cpus_mask); ++ init_task.children.prev = (__va(__pa_symbol(init_task.children.prev))); ++ init_task.children.next = (__va(__pa_symbol(init_task.children.next))); ++ // Set init_task into __entry_task before per_cpu init. ++ *(struct task_struct **)__entry_task = __va(__pa_symbol(&init_task)); ++ // Alloc a page for init_token. ++ new = __va(early_pgtable_alloc(0)); ++ init_token = (void *)__phys_to_iee(__pa_symbol(&init_task)); ++ #ifdef CONFIG_PTP ++ iee_set_token_page_valid_pre_init(init_token, new); ++ #else ++ iee_set_token_page_valid(init_token, new); ++ #endif ++ // Use lm to write token before IEE initialized. ++ token = (struct task_token *)((unsigned long)new + (((unsigned long)&init_task) & ~PAGE_MASK)); ++ token->mm = &init_mm; ++ token->pgd = NULL; ++ token->iee_stack = (void *)init_iee_stack_end; ++ token->valid = true; ++ #endif ++ ++ #ifdef CONFIG_PTP ++ // Map the existing pgtable into IEE, set valid. ++ init_iee(); ++ #endif ++ ++ #ifdef CONFIG_IEE ++ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SPAN); ++ #endif ++ ++ #ifdef CONFIG_PTP ++ // IEE ready. ++ // Pgtable writing before uses logical memory and after uses IEE memory. ++ ++ // Set the logical va of existing pgtable readonly. ++ iee_mark_all_lm_pgtable_ro(); ++ #endif ++ ++ // Set the init token readonly. ++ #ifdef CONFIG_IEE ++ set_iee_page_valid(__phys_to_iee(__pa(new))); ++ iee_set_logical_mem_ro((unsigned long)new); ++ ++ // Set HPD1 as 1. ++ tcr = read_sysreg(tcr_el1); ++ tcr |= ((unsigned long)0x1 << 42); ++ write_sysreg(tcr, tcr_el1); ++ isb(); ++ ++ // Flush tlb to enable IEE. ++ flush_tlb_all(); ++ ++ // mark that iee is prepared. ++ iee_init_done = true; ++ #endif + } + + #ifdef CONFIG_MEMORY_HOTPLUG +diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c +index 4a64089e5771..894bda11c389 100644 +--- a/arch/arm64/mm/pgd.c ++++ b/arch/arm64/mm/pgd.c +@@ -15,14 +15,44 @@ + #include <asm/page.h> + #include <asm/tlbflush.h> + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + static struct kmem_cache *pgd_cache __ro_after_init; + ++#ifdef CONFIG_KOI ++pgd_t *koi_pgd_alloc(void) ++{ ++ pgd_t *pgd; ++#ifdef CONFIG_PTP ++ pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); ++ unsigned long iee_addr = __phys_to_iee(__pa(pgd)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)pgd); ++#else ++ pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); ++#endif ++ return pgd; ++} ++#endif ++ + pgd_t *pgd_alloc(struct mm_struct *mm) + { + gfp_t gfp = GFP_PGTABLE_USER; + + if (PGD_SIZE == PAGE_SIZE) ++#ifdef CONFIG_PTP ++ { ++ pgd_t* new = (pgd_t *)__get_free_page(gfp); ++ unsigned long iee_addr = __phys_to_iee(__pa(new)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)new); ++ return new; ++ } ++#else + return (pgd_t *)__get_free_page(gfp); ++#endif + else + return kmem_cache_alloc(pgd_cache, gfp); + } +@@ -30,7 +60,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + void pgd_free(struct mm_struct *mm, pgd_t *pgd) + { + if (PGD_SIZE == PAGE_SIZE) ++#ifdef CONFIG_PTP ++ { ++ unsigned long iee_addr = __phys_to_iee(__pa(pgd)); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)pgd); ++ free_page((unsigned long)pgd); ++ } ++#else + free_page((unsigned long)pgd); ++#endif + else + kmem_cache_free(pgd_cache, pgd); + } +diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S +index 14fdf645edc8..230b2b883a51 100644 +--- a/arch/arm64/mm/proc.S ++++ b/arch/arm64/mm/proc.S +@@ -22,6 +22,8 @@ + #include <asm/smp.h> + #include <asm/sysreg.h> + ++ ++ + #ifdef CONFIG_ARM64_64K_PAGES + #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K + #elif defined(CONFIG_ARM64_16K_PAGES) +@@ -105,6 +107,19 @@ SYM_FUNC_START(cpu_do_suspend) + ret + SYM_FUNC_END(cpu_do_suspend) + ++ ++#ifdef CONFIG_IEE ++// SP_EL0 check failed. ++SYM_FUNC_START_LOCAL(sp_el0_check_failed) ++1: ++ nop ++ nop ++ nop ++ nop ++ b 1f ++SYM_FUNC_END(sp_el0_check_failed) ++#endif ++ + /** + * cpu_do_resume - restore CPU register context + * +@@ -148,6 +163,13 @@ SYM_FUNC_START(cpu_do_resume) + msr sctlr_el1, x12 + set_this_cpu_offset x13 + msr sp_el0, x14 ++#ifdef CONFIG_IEE ++ // tsk check. ++ ldr_this_cpu x2, __entry_task, x3 ++ mrs x3, sp_el0 ++ cmp x2, x3 ++ b.ne sp_el0_check_failed ++#endif + /* + * Restore oslsr_el1 by writing oslar_el1 + */ +@@ -190,6 +212,7 @@ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1) + __idmap_cpu_set_reserved_ttbr1 x1, x3 + + offset_ttbr1 x0, x3 ++ + msr ttbr1_el1, x0 + isb + +@@ -452,6 +475,11 @@ SYM_FUNC_START(__cpu_setup) + orr tcr, tcr, #TCR_HA // hardware Access flag update + 1: + #endif /* CONFIG_ARM64_HW_AFDBM */ ++ ++#ifdef CONFIG_IEE ++ orr tcr, tcr, #TCR_HPD1 // Hierarchical permission disables ++#endif ++ + msr mair_el1, mair + msr tcr_el1, tcr + +diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c +index 5139a28130c0..15d2a3faa048 100644 +--- a/arch/arm64/mm/trans_pgd.c ++++ b/arch/arm64/mm/trans_pgd.c +@@ -25,6 +25,9 @@ + #include <linux/mm.h> + #include <linux/mmzone.h> + #include <linux/kfence.h> ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif + + static void *trans_alloc(struct trans_pgd_info *info) + { +@@ -65,10 +68,18 @@ static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, + pte_t *src_ptep; + pte_t *dst_ptep; + unsigned long addr = start; ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + + dst_ptep = trans_alloc(info); + if (!dst_ptep) + return -ENOMEM; ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(dst_ptep)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)dst_ptep); ++ #endif + pmd_populate_kernel(NULL, dst_pmdp, dst_ptep); + dst_ptep = pte_offset_kernel(dst_pmdp, start); + +@@ -87,11 +98,19 @@ static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, + pmd_t *dst_pmdp; + unsigned long next; + unsigned long addr = start; ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + + if (pud_none(READ_ONCE(*dst_pudp))) { + dst_pmdp = trans_alloc(info); + if (!dst_pmdp) + return -ENOMEM; ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(dst_pmdp)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)dst_pmdp); ++ #endif + pud_populate(NULL, dst_pudp, dst_pmdp); + } + dst_pmdp = pmd_offset(dst_pudp, start); +@@ -123,11 +142,19 @@ static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, + pud_t *src_pudp; + unsigned long next; + unsigned long addr = start; ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + + if (p4d_none(READ_ONCE(*dst_p4dp))) { + dst_pudp = trans_alloc(info); + if (!dst_pudp) + return -ENOMEM; ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(dst_pudp)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)dst_pudp); ++ #endif + p4d_populate(NULL, dst_p4dp, dst_pudp); + } + dst_pudp = pud_offset(dst_p4dp, start); +@@ -212,6 +239,12 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, + return -ENOMEM; + } + ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(trans_pgd)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)trans_pgd); ++ #endif ++ + rc = copy_page_tables(info, trans_pgd, start, end); + if (!rc) + *dst_pgdp = trans_pgd; +@@ -238,6 +271,9 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, + int bits_mapped = PAGE_SHIFT - 4; + unsigned long level_mask, prev_level_entry, *levels[4]; + int this_level, index, level_lsb, level_msb; ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + + dst_addr &= PAGE_MASK; + prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX)); +@@ -247,12 +283,22 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, + if (!levels[this_level]) + return -ENOMEM; + ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(levels[this_level])); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)levels[this_level]); ++ #endif ++ + level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); + level_msb = min(level_lsb + bits_mapped, max_msb); + level_mask = GENMASK_ULL(level_msb, level_lsb); + + index = (dst_addr & level_mask) >> level_lsb; ++ #ifdef CONFIG_PTP ++ set_pte((pte_t *)(levels[this_level] + index), __pte(prev_level_entry)); ++ #else + *(levels[this_level] + index) = prev_level_entry; ++ #endif + + pfn = virt_to_pfn(levels[this_level]); + prev_level_entry = pte_val(pfn_pte(pfn, +diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c +index 83092d93f36a..fb12e7d0660a 100644 +--- a/drivers/firmware/efi/arm-runtime.c ++++ b/drivers/firmware/efi/arm-runtime.c +@@ -94,7 +94,11 @@ static int __init arm_enable_runtime_services(void) + return 0; + } + ++ #ifdef CONFIG_PTP ++ efi_memmap_unmap_after_init(); ++ #else + efi_memmap_unmap(); ++ #endif + + mapsize = efi.memmap.desc_size * efi.memmap.nr_map; + +diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c +index a1180461a445..4c64b6f15717 100644 +--- a/drivers/firmware/efi/memmap.c ++++ b/drivers/firmware/efi/memmap.c +@@ -105,6 +105,26 @@ void __init efi_memmap_unmap(void) + clear_bit(EFI_MEMMAP, &efi.flags); + } + ++#ifdef CONFIG_PTP ++void __init efi_memmap_unmap_after_init(void) ++{ ++ if (!efi_enabled(EFI_MEMMAP)) ++ return; ++ ++ if (!(efi.memmap.flags & EFI_MEMMAP_LATE)) { ++ unsigned long size; ++ ++ size = efi.memmap.desc_size * efi.memmap.nr_map; ++ early_iounmap_after_init((__force void __iomem *)efi.memmap.map, size); ++ } else { ++ memunmap(efi.memmap.map); ++ } ++ ++ efi.memmap.map = NULL; ++ clear_bit(EFI_MEMMAP, &efi.flags); ++} ++#endif ++ + /** + * efi_memmap_init_late - Map efi.memmap with memremap() + * @phys_addr: Physical address of the new EFI memory map +diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c +index a5fbb6ed38ae..81428783b9da 100644 +--- a/drivers/tty/serial/earlycon.c ++++ b/drivers/tty/serial/earlycon.c +@@ -40,7 +40,11 @@ static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size) + { + void __iomem *base; + #ifdef CONFIG_FIX_EARLYCON_MEM ++ #ifdef CONFIG_PTP ++ __iee_set_fixmap_pre_init(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK, FIXMAP_PAGE_IO); ++ #else + set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK); ++ #endif + base = (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); + base += paddr & ~PAGE_MASK; + #else +diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c +index 45b42d8f6453..b71072d6957e 100644 +--- a/drivers/usb/early/ehci-dbgp.c ++++ b/drivers/usb/early/ehci-dbgp.c +@@ -879,7 +879,11 @@ int __init early_dbgp_init(char *s) + * FIXME I don't have the bar size so just guess PAGE_SIZE is more + * than enough. 1K is the biggest I have seen. + */ ++ #ifdef CONFIG_PTP ++ __iee_set_fixmap_pre_init(FIX_DBGP_BASE, bar_val & PAGE_MASK, FIXMAP_PAGE_NOCACHE); ++ #else + set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); ++ #endif + ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); + ehci_bar += bar_val & ~PAGE_MASK; + dbgp_printk("ehci_bar: %p\n", ehci_bar); +diff --git a/fs/coredump.c b/fs/coredump.c +index 9d235fa14ab9..72be355903ca 100644 +--- a/fs/coredump.c ++++ b/fs/coredump.c +@@ -53,6 +53,10 @@ + + #include <trace/events/sched.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + static bool dump_vma_snapshot(struct coredump_params *cprm); + static void free_vma_snapshot(struct coredump_params *cprm); + +@@ -564,7 +568,11 @@ void do_coredump(const kernel_siginfo_t *siginfo) + */ + if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { + /* Setuid core dump mode */ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(cred,GLOBAL_ROOT_UID); ++ #else + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ ++ #endif + need_suid_safe = true; + } + +diff --git a/fs/exec.c b/fs/exec.c +index 04fb89656cc3..3689c5f008ba 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -76,6 +76,14 @@ + + #include <trace/events/sched.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ ++#ifdef CONFIG_IEE ++#include <asm/iee-token.h> ++#endif ++ + static int bprm_creds_from_file(struct linux_binprm *bprm); + + int suid_dumpable = 0; +@@ -1005,6 +1013,10 @@ static int exec_mmap(struct mm_struct *mm) + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); + activate_mm(active_mm, mm); ++ #ifdef CONFIG_IEE ++ iee_set_token_mm(tsk, mm); ++ iee_set_token_pgd(tsk, mm->pgd); ++ #endif + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); + lru_gen_add_mm(mm); +@@ -1618,12 +1630,20 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) + + if (mode & S_ISUID) { + bprm->per_clear |= PER_CLEAR_ON_SETID; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_euid(bprm->cred, vfsuid_into_kuid(vfsuid)); ++ #else + bprm->cred->euid = vfsuid_into_kuid(vfsuid); ++ #endif + } + + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { + bprm->per_clear |= PER_CLEAR_ON_SETID; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_egid(bprm->cred, vfsgid_into_kgid(vfsgid)); ++ #else + bprm->cred->egid = vfsgid_into_kgid(vfsgid); ++ #endif + } + } + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 3e724cb7ef01..e32e136e4271 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -15,6 +15,10 @@ + + #include <linux/sunrpc/metrics.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + #include "flexfilelayout.h" + #include "../nfs4session.h" + #include "../nfs4idmap.h" +@@ -502,8 +506,13 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, + rc = -ENOMEM; + if (!kcred) + goto out_err_free; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(kcred,uid); ++ iee_set_cred_fsgid(kcred,gid); ++ #else + kcred->fsuid = uid; + kcred->fsgid = gid; ++ #endif + cred = RCU_INITIALIZER(kcred); + + if (lgr->range.iomode == IOMODE_READ) +diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c +index 25a7c771cfd8..b15ab8e33e0e 100644 +--- a/fs/nfs/nfs4idmap.c ++++ b/fs/nfs/nfs4idmap.c +@@ -48,6 +48,10 @@ + #include <linux/module.h> + #include <linux/user_namespace.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + #include "internal.h" + #include "netns.h" + #include "nfs4idmap.h" +@@ -226,8 +230,13 @@ int nfs_idmap_init(void) + goto failed_reg_legacy; + + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(cred,keyring); ++ iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); ++ #else + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; ++ #endif + id_resolver_cache = cred; + return 0; + +diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c +index e6beaaf4f170..e89385fd81f1 100644 +--- a/fs/nfsd/auth.c ++++ b/fs/nfsd/auth.c +@@ -2,6 +2,9 @@ + /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ + + #include <linux/sched.h> ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif + #include "nfsd.h" + #include "auth.h" + +@@ -32,22 +35,40 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) + if (!new) + return -ENOMEM; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,rqstp->rq_cred.cr_uid); ++ iee_set_cred_fsgid(new,rqstp->rq_cred.cr_gid); ++ #else + new->fsuid = rqstp->rq_cred.cr_uid; + new->fsgid = rqstp->rq_cred.cr_gid; ++ #endif + + rqgi = rqstp->rq_cred.cr_group_info; + + if (flags & NFSEXP_ALLSQUASH) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,exp->ex_anon_uid); ++ iee_set_cred_fsgid(new,exp->ex_anon_gid); ++ #else + new->fsuid = exp->ex_anon_uid; + new->fsgid = exp->ex_anon_gid; ++ #endif + gi = groups_alloc(0); + if (!gi) + goto oom; + } else if (flags & NFSEXP_ROOTSQUASH) { + if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,exp->ex_anon_uid); ++ #else + new->fsuid = exp->ex_anon_uid; ++ #endif + if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsgid(new,exp->ex_anon_gid); ++ #else + new->fsgid = exp->ex_anon_gid; ++ #endif + + gi = groups_alloc(rqgi->ngroups); + if (!gi) +@@ -67,18 +88,35 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) + } + + if (uid_eq(new->fsuid, INVALID_UID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,exp->ex_anon_uid); ++ #else + new->fsuid = exp->ex_anon_uid; ++ #endif + if (gid_eq(new->fsgid, INVALID_GID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsgid(new,exp->ex_anon_gid); ++ #else + new->fsgid = exp->ex_anon_gid; ++ #endif + + set_groups(new, gi); + put_group_info(gi); + + if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,cap_drop_nfsd_set(new->cap_effective)); ++ #else + new->cap_effective = cap_drop_nfsd_set(new->cap_effective); ++ #endif + else ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,cap_raise_nfsd_set(new->cap_effective, ++ new->cap_permitted)); ++ #else + new->cap_effective = cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); ++ #endif + put_cred(override_creds(new)); + put_cred(new); + return 0; +diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c +index 4039ffcf90ba..6e0dfa01e01e 100644 +--- a/fs/nfsd/nfs4callback.c ++++ b/fs/nfsd/nfs4callback.c +@@ -41,6 +41,9 @@ + #include "trace.h" + #include "xdr4cb.h" + #include "xdr4.h" ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif + + #define NFSDDBG_FACILITY NFSDDBG_PROC + +@@ -946,8 +949,13 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r + if (!kcred) + return NULL; + +- kcred->fsuid = ses->se_cb_sec.uid; +- kcred->fsgid = ses->se_cb_sec.gid; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_uid(kcred,ses->se_cb_sec.uid); ++ iee_set_cred_gid(kcred,ses->se_cb_sec.gid); ++ #else ++ kcred->uid = ses->se_cb_sec.uid; ++ kcred->gid = ses->se_cb_sec.gid; ++ #endif + return kcred; + } + } +diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c +index 3509e73abe1f..798fe0352841 100644 +--- a/fs/nfsd/nfs4recover.c ++++ b/fs/nfsd/nfs4recover.c +@@ -44,6 +44,10 @@ + #include <linux/sunrpc/clnt.h> + #include <linux/nfsd/cld.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + #include "nfsd.h" + #include "state.h" + #include "vfs.h" +@@ -78,8 +82,13 @@ nfs4_save_creds(const struct cred **original_creds) + if (!new) + return -ENOMEM; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,GLOBAL_ROOT_UID); ++ iee_set_cred_fsgid(new,GLOBAL_ROOT_GID); ++ #else + new->fsuid = GLOBAL_ROOT_UID; + new->fsgid = GLOBAL_ROOT_GID; ++ #endif + *original_creds = override_creds(new); + put_cred(new); + return 0; +diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c +index 355bf0db3235..6cb276dde4e9 100644 +--- a/fs/nfsd/nfsfh.c ++++ b/fs/nfsd/nfsfh.c +@@ -16,6 +16,10 @@ + #include "auth.h" + #include "trace.h" + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + #define NFSDDBG_FACILITY NFSDDBG_FH + + +@@ -223,9 +227,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) + error = nfserrno(-ENOMEM); + goto out; + } ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,cap_raise_nfsd_set(new->cap_effective, ++ new->cap_permitted)); ++ #else + new->cap_effective = + cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); ++ #endif + put_cred(override_creds(new)); + put_cred(new); + } else { +diff --git a/fs/open.c b/fs/open.c +index 54723fceb776..d83901dc50ff 100644 +--- a/fs/open.c ++++ b/fs/open.c +@@ -35,6 +35,11 @@ + #include <linux/mnt_idmapping.h> + #include <linux/filelock.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ ++ + #include "internal.h" + + int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, +@@ -414,17 +419,34 @@ static const struct cred *access_override_creds(void) + * routine. + */ + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(override_cred,override_cred->uid); ++ iee_set_cred_fsgid(override_cred,override_cred->gid); ++ #else + override_cred->fsuid = override_cred->uid; + override_cred->fsgid = override_cred->gid; ++ #endif + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + /* Clear the capabilities if we switch to a non-root user */ + kuid_t root_uid = make_kuid(override_cred->user_ns, 0); + if (!uid_eq(override_cred->uid, root_uid)) ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = override_cred->cap_effective; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_effective(override_cred, tmp_cap); ++ } while (0); ++ #else + cap_clear(override_cred->cap_effective); ++ #endif + else ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(override_cred,override_cred->cap_permitted); ++ #else + override_cred->cap_effective = + override_cred->cap_permitted; ++ #endif + } + + /* +@@ -444,7 +466,11 @@ static const struct cred *access_override_creds(void) + * expecting RCU freeing. But normal thread-synchronous + * cred accesses will keep things non-RCY. + */ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_non_rcu(override_cred,1); ++ #else + override_cred->non_rcu = 1; ++ #endif + + old_cred = override_creds(override_cred); + +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +index 033fc0458a3d..2afa31ead2b5 100644 +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -16,6 +16,10 @@ + #include <linux/ratelimit.h> + #include "overlayfs.h" + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + static unsigned short ovl_redirect_max = 256; + module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); + MODULE_PARM_DESC(redirect_max, +@@ -593,8 +597,13 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, + * create a new inode, so just use the ovl mounter's + * fs{u,g}id. + */ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(override_cred,inode->i_uid); ++ iee_set_cred_fsgid(override_cred,inode->i_gid); ++ #else + override_cred->fsuid = inode->i_uid; + override_cred->fsgid = inode->i_gid; ++ #endif + err = security_dentry_create_files_as(dentry, + attr->mode, &dentry->d_name, old_cred, + override_cred); +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 2c056d737c27..9ede99ddb04b 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -21,6 +21,10 @@ + #include "overlayfs.h" + #include "params.h" + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); + MODULE_DESCRIPTION("Overlay filesystem"); + MODULE_LICENSE("GPL"); +@@ -1485,7 +1489,15 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) + sb->s_export_op = &ovl_export_fid_operations; + + /* Never override disk quota limits or use reserved space */ ++ #ifdef CONFIG_CREDP ++ { ++ kernel_cap_t tmp = cred->cap_effective; ++ cap_lower(tmp, CAP_SYS_RESOURCE); ++ iee_set_cred_cap_effective(cred, tmp); ++ } ++ #else + cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); ++ #endif + + sb->s_magic = OVERLAYFS_SUPER_MAGIC; + sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : +diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c +index af7849e5974f..4ac2f0e65955 100644 +--- a/fs/smb/client/cifs_spnego.c ++++ b/fs/smb/client/cifs_spnego.c +@@ -18,6 +18,10 @@ + #include "cifs_spnego.h" + #include "cifs_debug.h" + #include "cifsproto.h" ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + static const struct cred *spnego_cred; + + /* create a new cifs key */ +@@ -212,8 +216,13 @@ init_cifs_spnego(void) + * the results it looks up + */ + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(cred,keyring); ++ iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); ++ #else + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; ++ #endif + spnego_cred = cred; + + cifs_dbg(FYI, "cifs spnego keyring: %d\n", key_serial(keyring)); +diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c +index f5b6df82e857..9f0ff045836d 100644 +--- a/fs/smb/client/cifsacl.c ++++ b/fs/smb/client/cifsacl.c +@@ -26,6 +26,10 @@ + #include "cifs_fs_sb.h" + #include "cifs_unicode.h" + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + /* security id for everyone/world system group */ + static const struct cifs_sid sid_everyone = { + 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; +@@ -491,8 +495,13 @@ init_cifs_idmap(void) + /* instruct request_key() to use this special keyring as a cache for + * the results it looks up */ + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(cred,keyring); ++ iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); ++ #else + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; ++ #endif + root_cred = cred; + + cifs_dbg(FYI, "cifs idmap keyring: %d\n", key_serial(keyring)); +diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h +index 9d0479f50f97..f501e0f965f8 100644 +--- a/include/asm-generic/early_ioremap.h ++++ b/include/asm-generic/early_ioremap.h +@@ -17,6 +17,9 @@ extern void *early_memremap_ro(resource_size_t phys_addr, + extern void *early_memremap_prot(resource_size_t phys_addr, + unsigned long size, unsigned long prot_val); + extern void early_iounmap(void __iomem *addr, unsigned long size); ++#ifdef CONFIG_PTP ++extern void early_iounmap_after_init(void __iomem *addr, unsigned long size); ++#endif + extern void early_memunmap(void *addr, unsigned long size); + + #if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU) +diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h +index 8cc7b09c1bc7..83158589a545 100644 +--- a/include/asm-generic/fixmap.h ++++ b/include/asm-generic/fixmap.h +@@ -70,6 +70,24 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) + __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR) + #endif + ++#ifdef CONFIG_PTP ++#ifndef clear_fixmap_init ++#define clear_fixmap_init(idx) \ ++ __iee_set_fixmap_pre_init(idx, 0, FIXMAP_PAGE_CLEAR) ++#endif ++ ++#define __iee_set_fixmap_offset_pre_init(idx, phys, flags) \ ++({ \ ++ unsigned long ________addr; \ ++ __iee_set_fixmap_pre_init(idx, phys, flags); \ ++ ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ ++ ________addr; \ ++}) ++ ++#define iee_set_fixmap_offset_pre_init(idx, phys) \ ++ __iee_set_fixmap_offset_pre_init(idx, phys, FIXMAP_PAGE_NORMAL) ++#endif ++ + /* Return a pointer with offset calculated */ + #define __set_fixmap_offset(idx, phys, flags) \ + ({ \ +diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h +index c75d4a753849..506ff9662e02 100644 +--- a/include/asm-generic/pgalloc.h ++++ b/include/asm-generic/pgalloc.h +@@ -7,6 +7,10 @@ + #define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO) + #define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + /** + * __pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table + * @mm: the mm_struct of the current context +@@ -23,6 +27,13 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) + + if (!ptdesc) + return NULL; ++ ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(ptdesc_address(ptdesc))); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)ptdesc_address(ptdesc)); ++ #endif ++ + return ptdesc_address(ptdesc); + } + +@@ -46,6 +57,11 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) + */ + static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) + { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(pte)); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)pte); ++ #endif + pagetable_free(virt_to_ptdesc(pte)); + } + +@@ -73,6 +89,13 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) + return NULL; + } + ++ #ifdef CONFIG_PTP ++ pte_t *pte = (pte_t *)page_address(ptdesc_page(ptdesc)); ++ unsigned long iee_addr = __phys_to_iee(__pa(pte)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)pte); ++ #endif ++ + return ptdesc_page(ptdesc); + } + +@@ -103,9 +126,20 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) + */ + static inline void pte_free(struct mm_struct *mm, struct page *pte_page) + { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif ++ + struct ptdesc *ptdesc = page_ptdesc(pte_page); + + pagetable_pte_dtor(ptdesc); ++ ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(page_address(pte_page))); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)page_address(pte_page)); ++ #endif ++ + pagetable_free(ptdesc); + } + +@@ -145,10 +179,21 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) + #ifndef __HAVE_ARCH_PMD_FREE + static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) + { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif ++ + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); + + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + pagetable_pmd_dtor(ptdesc); ++ ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(pmd)); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)pmd); ++ #endif ++ + pagetable_free(ptdesc); + } + #endif +@@ -190,7 +235,16 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) + + static inline void __pud_free(struct mm_struct *mm, pud_t *pud) + { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif ++ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(pud)); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)pud); ++ #endif + pagetable_free(virt_to_ptdesc(pud)); + } + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 200853042fc7..9d733afced53 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -346,6 +346,17 @@ + KEEP(*(.dtb.init.rodata)) \ + __dtb_end = .; + ++#ifdef CONFIG_KOI ++#define KOI_DATA() \ ++ . = ALIGN(PAGE_SIZE); \ ++ __koi_data_start = .; \ ++ *(.data..koi) \ ++ . = ALIGN(PAGE_SIZE); \ ++ __koi_data_end = .; ++#else ++#define KOI_DATA() ++#endif ++ + /* + * .data section + */ +@@ -370,8 +381,8 @@ + BRANCH_PROFILE() \ + TRACE_PRINTKS() \ + BPF_RAW_TP() \ +- TRACEPOINT_STR() +- ++ TRACEPOINT_STR() \ ++ KOI_DATA() + /* + * Data section helpers + */ +@@ -1093,6 +1104,14 @@ + * They will fit only a subset of the architectures + */ + ++#ifdef CONFIG_CREDP ++ #define CRED_DATA \ ++ . = ALIGN(PAGE_SIZE); \ ++ *(.iee.cred) \ ++ . = ALIGN(PAGE_SIZE); ++#else ++ #define CRED_DATA ++#endif + + /* + * Writeable data. +@@ -1110,6 +1129,7 @@ + . = ALIGN(PAGE_SIZE); \ + .data : AT(ADDR(.data) - LOAD_OFFSET) { \ + INIT_TASK_DATA(inittask) \ ++ CRED_DATA \ + NOSAVE_DATA \ + PAGE_ALIGNED_DATA(pagealigned) \ + CACHELINE_ALIGNED_DATA(cacheline) \ +diff --git a/include/linux/cred.h b/include/linux/cred.h +index e01c6d094a30..cceb4842b619 100644 +--- a/include/linux/cred.h ++++ b/include/linux/cred.h +@@ -18,6 +18,10 @@ + #include <linux/sched/user.h> + #include <linux/kabi.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-def.h> ++#endif ++ + struct cred; + struct inode; + +@@ -153,6 +157,22 @@ struct cred { + KABI_RESERVE(4) + } __randomize_layout; + ++#ifdef CONFIG_CREDP ++extern unsigned long long iee_rw_gate(int flag, ...); ++static void iee_set_cred_non_rcu(struct cred *cred, int non_rcu) ++{ ++ iee_rw_gate(IEE_OP_SET_CRED_NON_RCU,cred,non_rcu); ++ *(int *)(&(((struct rcu_head *)(cred->rcu.func))->next)) = non_rcu; ++} ++ ++static bool noinline iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr) ++{ ++ bool ret; ++ ret = iee_rw_gate(IEE_OP_SET_CRED_ATOP_USAGE,cred,flag,nr); ++ return ret; ++} ++#endif ++ + extern void __put_cred(struct cred *); + extern void exit_creds(struct task_struct *); + extern int copy_creds(struct task_struct *, unsigned long); +@@ -189,7 +209,11 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) + */ + static inline struct cred *get_new_cred_many(struct cred *cred, int nr) + { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_atomic_op_usage(cred, AT_ADD, nr); ++ #else + atomic_long_add(nr, &cred->usage); ++ #endif + return cred; + } + +@@ -202,7 +226,7 @@ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) + */ + static inline struct cred *get_new_cred(struct cred *cred) + { +- return get_new_cred_many(cred, 1); ++ return get_new_cred_many(cred, 1); // XXXzgc atomic_inc -> get_new_cred_many + } + + /** +@@ -224,7 +248,11 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) + struct cred *nonconst_cred = (struct cred *) cred; + if (!cred) + return cred; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_non_rcu(nonconst_cred,0); ++ #else + nonconst_cred->non_rcu = 0; ++ #endif + return get_new_cred_many(nonconst_cred, nr); + } + +@@ -247,9 +275,19 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) + struct cred *nonconst_cred = (struct cred *) cred; + if (!cred) + return NULL; ++ #ifdef CONFIG_CREDP ++ if (!iee_set_cred_atomic_op_usage(nonconst_cred,AT_INC_NOT_ZERO,0)) ++ return NULL; ++ #else + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) + return NULL; ++ #endif ++ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_non_rcu(nonconst_cred,0); ++ #else + nonconst_cred->non_rcu = 0; ++ #endif + return cred; + } + +@@ -270,8 +308,13 @@ static inline void put_cred_many(const struct cred *_cred, int nr) + struct cred *cred = (struct cred *) _cred; + + if (cred) { ++ #ifdef CONFIG_CREDP ++ if (iee_set_cred_atomic_op_usage(cred,AT_SUB_AND_TEST,nr)) ++ __put_cred(cred); ++ #else + if (atomic_long_sub_and_test(nr, &cred->usage)) + __put_cred(cred); ++ #endif + } + } + +diff --git a/include/linux/efi.h b/include/linux/efi.h +index 9ed79128458c..970cc4f7068b 100644 +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -740,6 +740,15 @@ extern int __init __efi_memmap_init(struct efi_memory_map_data *data); + extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); + extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); + extern void __init efi_memmap_unmap(void); ++#ifdef CONFIG_PTP ++extern void __init efi_memmap_unmap_after_init(void); ++#endif ++extern int __init efi_memmap_install(struct efi_memory_map_data *data); ++extern int __init efi_memmap_split_count(efi_memory_desc_t *md, ++ struct range *range); ++extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, ++ void *buf, struct efi_mem_range *mem); ++extern void __init efi_print_memmap(void); + + #ifdef CONFIG_EFI_ESRT + extern void __init efi_esrt_init(void); +diff --git a/include/linux/iee-func.h b/include/linux/iee-func.h +new file mode 100644 +index 000000000000..79171de67c2a +--- /dev/null ++++ b/include/linux/iee-func.h +@@ -0,0 +1,27 @@ ++#ifndef _LINUX_IEE_FUNC_H ++#define _LINUX_IEE_FUNC_H ++ ++#ifdef CONFIG_IEE ++// Declare the __entry_task. ++__attribute__((aligned(PAGE_SIZE))) DECLARE_PER_CPU(struct task_struct *[PAGE_SIZE/sizeof(struct task_struct *)], __entry_task); ++ ++extern unsigned long long iee_rw_gate(int flag, ...); ++extern u32 get_cpu_asid_bits(void); ++extern unsigned long arm64_mm_context_get(struct mm_struct *mm); ++extern void set_iee_page_valid(unsigned long addr); ++extern void set_iee_page_invalid(unsigned long addr); ++extern void iee_set_logical_mem_ro(unsigned long addr); ++extern void iee_set_logical_mem_rw(unsigned long addr); ++extern void iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm); ++extern void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd); ++extern void iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack); ++extern void iee_free_token(struct task_struct *tsk); ++extern unsigned long iee_read_token_stack(struct task_struct *tsk); ++extern void iee_set_token_page_valid(void *token, void *new); ++extern void iee_set_token_page_invalid(void *token); ++extern void iee_set_kernel_ppage(unsigned long addr); ++extern void iee_set_kernel_upage(unsigned long addr); ++extern void iee_write_in_byte(void *ptr, u64 data, int length); ++#endif ++ ++#endif +\ No newline at end of file +diff --git a/include/linux/module.h b/include/linux/module.h +index 4db2878d9e42..ef8d51994017 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -606,6 +606,7 @@ struct module { + KABI_RESERVE(2) + KABI_RESERVE(3) + KABI_RESERVE(4) ++ + } ____cacheline_aligned __randomize_layout; + #ifndef MODULE_ARCH_INIT + #define MODULE_ARCH_INIT {} +diff --git a/include/linux/sched.h b/include/linux/sched.h +index f40411aa7b70..297becfbc8e3 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -773,6 +773,24 @@ struct task_struct_resvd { + struct task_struct *task; + }; + ++#if defined(CONFIG_IEE) || defined(CONFIG_KOI) ++struct task_token { ++#ifdef CONFIG_IEE ++ struct mm_struct *mm; /* VA */ ++ pgd_t *pgd; /* Logical VA */ ++ void *iee_stack; /* VA */ ++ bool valid; ++ void *kernel_stack; /* VA */ ++#endif ++#ifdef CONFIG_KOI ++ void *koi_kernel_stack; /* VA */ ++ void *koi_stack; /* VA */ ++ void *koi_stack_base; /* VA */ ++ unsigned long current_ttbr1; ++#endif ++}; ++#endif ++ + struct task_struct { + #ifdef CONFIG_THREAD_INFO_IN_TASK + /* +@@ -795,6 +813,7 @@ struct task_struct { + randomized_struct_fields_start + + void *stack; ++ + refcount_t usage; + /* Per task flags (PF_*), defined further below: */ + unsigned int flags; +diff --git a/init/main.c b/init/main.c +index 803332dd3d90..0f8d6e2744c2 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -102,6 +102,12 @@ + #include <linux/randomize_kstack.h> + #include <net/net_namespace.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#include <asm/iee-si.h> ++#include <linux/stop_machine.h> ++#endif ++ + #include <asm/io.h> + #include <asm/setup.h> + #include <asm/sections.h> +@@ -112,6 +118,10 @@ + + #include <kunit/test.h> + ++#ifdef CONFIG_PTP ++extern void *bm_pte_addr; ++#endif ++ + static int kernel_init(void *); + + /* +@@ -880,6 +890,9 @@ void start_kernel(void) + { + char *command_line; + char *after_dashes; ++ #ifdef CONFIG_IEE ++ unsigned int cpu; ++ #endif + + set_task_stack_end_magic(&init_task); + smp_setup_processor_id(); +@@ -904,6 +917,16 @@ void start_kernel(void) + setup_command_line(command_line); + setup_nr_cpu_ids(); + setup_per_cpu_areas(); ++ #ifdef CONFIG_IEE ++ for_each_possible_cpu(cpu) ++ { ++ // Map the __entry_task to IEE. ++ set_iee_page_valid((unsigned long)__phys_to_iee(__pa(SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu])))); ++ // Set the __entry_task of cpu 0 readonly in lm. ++ if(cpu == smp_processor_id()) ++ iee_set_logical_mem_ro((unsigned long)SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu])); ++ } ++ #endif + smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + boot_cpu_hotplug_init(); + +@@ -1446,6 +1469,9 @@ static int __ref kernel_init(void *unused) + wait_for_completion(&kthreadd_done); + + kernel_init_freeable(); ++ #ifdef CONFIG_PTP ++ iee_set_logical_mem_ro((unsigned long)bm_pte_addr); ++ #endif + /* need to finish all async __init code before freeing the memory */ + async_synchronize_full(); + +@@ -1462,7 +1488,7 @@ static int __ref kernel_init(void *unused) + * to finalize PTI. + */ + pti_finalize(); +- ++ + system_state = SYSTEM_RUNNING; + numa_default_policy(); + +diff --git a/kernel/cred.c b/kernel/cred.c +index c033a201c808..2e44530976d5 100644 +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -20,6 +20,11 @@ + #include <linux/cn_proc.h> + #include <linux/uidgid.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#include <linux/iee-func.h> ++#endif ++ + #if 0 + #define kdebug(FMT, ...) \ + printk("[%-5.5s%5u] " FMT "\n", \ +@@ -34,6 +39,9 @@ do { \ + #endif + + static struct kmem_cache *cred_jar; ++#ifdef CONFIG_CREDP ++static struct kmem_cache *rcu_jar; ++#endif + + /* init to 2 - one for init_task, one to ensure it is never freed */ + static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; +@@ -41,6 +49,32 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; + /* + * The initial credentials for the initial task + */ ++#ifdef CONFIG_CREDP ++struct cred init_cred __section(".iee.cred") = { ++ .usage = ATOMIC_INIT(4), ++#ifdef CONFIG_DEBUG_CREDENTIALS ++ .subscribers = ATOMIC_INIT(2), ++ .magic = CRED_MAGIC, ++#endif ++ .uid = GLOBAL_ROOT_UID, ++ .gid = GLOBAL_ROOT_GID, ++ .suid = GLOBAL_ROOT_UID, ++ .sgid = GLOBAL_ROOT_GID, ++ .euid = GLOBAL_ROOT_UID, ++ .egid = GLOBAL_ROOT_GID, ++ .fsuid = GLOBAL_ROOT_UID, ++ .fsgid = GLOBAL_ROOT_GID, ++ .securebits = SECUREBITS_DEFAULT, ++ .cap_inheritable = CAP_EMPTY_SET, ++ .cap_permitted = CAP_FULL_SET, ++ .cap_effective = CAP_FULL_SET, ++ .cap_bset = CAP_FULL_SET, ++ .user = INIT_USER, ++ .user_ns = &init_user_ns, ++ .group_info = &init_groups, ++ .ucounts = &init_ucounts, ++}; ++#else + struct cred init_cred = { + .usage = ATOMIC_INIT(4), + .uid = GLOBAL_ROOT_UID, +@@ -61,13 +95,43 @@ struct cred init_cred = { + .group_info = &init_groups, + .ucounts = &init_ucounts, + }; ++#endif ++ ++static inline void set_cred_subscribers(struct cred *cred, int n) ++{ ++#ifdef CONFIG_DEBUG_CREDENTIALS ++ atomic_set(&cred->subscribers, n); ++#endif ++} ++ ++static inline int read_cred_subscribers(const struct cred *cred) ++{ ++#ifdef CONFIG_DEBUG_CREDENTIALS ++ return atomic_read(&cred->subscribers); ++#else ++ return 0; ++#endif ++} ++ ++static inline void alter_cred_subscribers(const struct cred *_cred, int n) ++{ ++#ifdef CONFIG_DEBUG_CREDENTIALS ++ struct cred *cred = (struct cred *) _cred; ++ ++ atomic_add(n, &cred->subscribers); ++#endif ++} + + /* + * The RCU callback to actually dispose of a set of credentials + */ + static void put_cred_rcu(struct rcu_head *rcu) + { ++ #ifdef CONFIG_CREDP ++ struct cred *cred = *(struct cred **)(rcu + 1); ++ #else + struct cred *cred = container_of(rcu, struct cred, rcu); ++ #endif + + kdebug("put_cred_rcu(%p)", cred); + +@@ -86,6 +150,9 @@ static void put_cred_rcu(struct rcu_head *rcu) + if (cred->ucounts) + put_ucounts(cred->ucounts); + put_user_ns(cred->user_ns); ++ #ifdef CONFIG_CREDP ++ kmem_cache_free(rcu_jar, (struct rcu_head *)(cred->rcu.func)); ++ #endif + kmem_cache_free(cred_jar, cred); + } + +@@ -104,10 +171,22 @@ void __put_cred(struct cred *cred) + BUG_ON(cred == current->cred); + BUG_ON(cred == current->real_cred); + ++ #ifdef CONFIG_CREDP ++ if (*(int *)(&(((struct rcu_head *)(cred->rcu.func))->next))) ++ #else + if (cred->non_rcu) ++ #endif ++ #ifdef CONFIG_CREDP ++ put_cred_rcu((struct rcu_head *)(cred->rcu.func)); ++ #else + put_cred_rcu(&cred->rcu); ++ #endif + else ++ #ifdef CONFIG_CREDP ++ call_rcu((struct rcu_head *)(cred->rcu.func), put_cred_rcu); ++ #else + call_rcu(&cred->rcu, put_cred_rcu); ++ #endif + } + EXPORT_SYMBOL(__put_cred); + +@@ -178,7 +257,18 @@ struct cred *cred_alloc_blank(void) + if (!new) + return NULL; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_rcu(new,kmem_cache_zalloc(rcu_jar, GFP_KERNEL)); ++ *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; ++ iee_set_cred_atomic_set_usage(new,1); ++ #else + atomic_long_set(&new->usage, 1); ++ #endif ++ ++ #ifdef CONFIG_DEBUG_CREDENTIALS ++ new->magic = CRED_MAGIC; ++ #endif ++ + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) + goto error; + +@@ -213,13 +303,25 @@ struct cred *prepare_creds(void) + if (!new) + return NULL; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_rcu(new,kmem_cache_alloc(rcu_jar, GFP_KERNEL)); ++ *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; ++ #endif ++ + kdebug("prepare_creds() alloc %p", new); + + old = task->cred; ++ #ifdef CONFIG_CREDP ++ iee_copy_cred(old,new); ++ ++ iee_set_cred_non_rcu(new,0); ++ iee_set_cred_atomic_set_usage(new,1); ++ #else + memcpy(new, old, sizeof(struct cred)); + + new->non_rcu = 0; + atomic_long_set(&new->usage, 1); ++ #endif + get_group_info(new->group_info); + get_uid(new->user); + get_user_ns(new->user_ns); +@@ -232,10 +334,18 @@ struct cred *prepare_creds(void) + #endif + + #ifdef CONFIG_SECURITY ++#ifdef CONFIG_CREDP ++ iee_set_cred_security(new,NULL); ++#else + new->security = NULL; ++#endif + #endif + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); ++ #else + new->ucounts = get_ucounts(new->ucounts); ++ #endif + if (!new->ucounts) + goto error; + +@@ -265,15 +375,30 @@ struct cred *prepare_exec_creds(void) + #ifdef CONFIG_KEYS + /* newly exec'd tasks don't get a thread keyring */ + key_put(new->thread_keyring); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(new,NULL); ++ #else + new->thread_keyring = NULL; ++ #endif + + /* inherit the session keyring; new process keyring */ + key_put(new->process_keyring); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_process_keyring(new,NULL); ++ #else + new->process_keyring = NULL; ++ #endif + #endif + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,new->euid); ++ iee_set_cred_suid(new,new->euid); ++ iee_set_cred_fsgid(new,new->egid); ++ iee_set_cred_sgid(new,new->egid); ++ #else + new->suid = new->fsuid = new->euid; + new->sgid = new->fsgid = new->egid; ++ #endif + + return new; + } +@@ -327,7 +452,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) + * had one */ + if (new->thread_keyring) { + key_put(new->thread_keyring); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(new,NULL); ++ #else + new->thread_keyring = NULL; ++ #endif + if (clone_flags & CLONE_THREAD) + install_thread_keyring_to_cred(new); + } +@@ -337,7 +466,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) + */ + if (!(clone_flags & CLONE_THREAD)) { + key_put(new->process_keyring); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_process_keyring(new,NULL); ++ #else + new->process_keyring = NULL; ++ #endif + } + #endif + +@@ -594,7 +727,11 @@ int set_cred_ucounts(struct cred *new) + if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) + return -EAGAIN; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_ucounts(new, new_ucounts); ++ #else + new->ucounts = new_ucounts; ++ #endif + put_ucounts(old_ucounts); + + return 0; +@@ -606,8 +743,21 @@ int set_cred_ucounts(struct cred *new) + void __init cred_init(void) + { + /* allocate a slab in which we can store credentials */ ++ #ifdef CONFIG_CREDP + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT|SLAB_RED_ZONE, NULL); ++ rcu_jar = kmem_cache_create("rcu_jar", sizeof(struct rcu_head) + sizeof(struct cred *), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); ++ // Map init_cred ++ *((struct rcu_head **)(&(init_cred.rcu.func))) = (struct rcu_head *)kmem_cache_zalloc(rcu_jar, GFP_KERNEL); ++ *(struct cred **)(((struct rcu_head *)(init_cred.rcu.func)) + 1) = &init_cred; ++ set_iee_page_valid(__phys_to_iee(__pa_symbol(&init_cred))); ++ iee_set_logical_mem_ro((unsigned long)&init_cred); ++ iee_set_logical_mem_ro((unsigned long)__va(__pa_symbol(&init_cred))); ++ #else ++ cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); ++ #endif + } + + /** +@@ -638,29 +788,56 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) + if (!new) + return NULL; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_rcu(new,kmem_cache_alloc(rcu_jar, GFP_KERNEL)); ++ *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; ++ #endif ++ + kdebug("prepare_kernel_cred() alloc %p", new); + + old = get_task_cred(daemon); + ++ #ifdef CONFIG_CREDP ++ iee_copy_cred(old,new); ++ iee_set_cred_non_rcu(new,0); ++ iee_set_cred_atomic_set_usage(new,1); ++ #else + *new = *old; + new->non_rcu = 0; + atomic_long_set(&new->usage, 1); ++ #endif + get_uid(new->user); + get_user_ns(new->user_ns); + get_group_info(new->group_info); + + #ifdef CONFIG_KEYS ++#ifdef CONFIG_CREDP ++ iee_set_cred_session_keyring(new,NULL); ++ iee_set_cred_process_keyring(new,NULL); ++ iee_set_cred_thread_keyring(new,NULL); ++ iee_set_cred_request_key_auth(new,NULL); ++ iee_set_cred_jit_keyring(new,KEY_REQKEY_DEFL_THREAD_KEYRING); ++#else + new->session_keyring = NULL; + new->process_keyring = NULL; + new->thread_keyring = NULL; + new->request_key_auth = NULL; + new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif ++#endif + + #ifdef CONFIG_SECURITY ++#ifdef CONFIG_CREDP ++ iee_set_cred_security(new,NULL); ++#else + new->security = NULL; + #endif ++#endif ++ #ifdef CONFIG_CREDP ++ iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); ++ #else + new->ucounts = get_ucounts(new->ucounts); ++ #endif + if (!new->ucounts) + goto error; + +@@ -727,8 +904,13 @@ int set_create_files_as(struct cred *new, struct inode *inode) + { + if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) + return -EINVAL; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,inode->i_uid); ++ iee_set_cred_fsgid(new,inode->i_gid); ++ #else + new->fsuid = inode->i_uid; + new->fsgid = inode->i_gid; ++ #endif + return security_kernel_create_files_as(new, inode); + } + EXPORT_SYMBOL(set_create_files_as); +diff --git a/kernel/exit.c b/kernel/exit.c +index 21a59a6e1f2e..d21a109f0497 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -74,6 +74,10 @@ + #include <asm/unistd.h> + #include <asm/mmu_context.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#endif ++ + /* + * The default value should be high enough to not crash a system that randomly + * crashes its kernel from time to time, but low enough to at least not permit +@@ -558,6 +562,10 @@ static void exit_mm(void) + smp_mb__after_spinlock(); + local_irq_disable(); + current->mm = NULL; ++ #ifdef CONFIG_IEE ++ iee_set_token_mm(current, NULL); ++ iee_set_token_pgd(current, NULL); ++ #endif + membarrier_update_current_mm(NULL); + enter_lazy_tlb(mm, current); + local_irq_enable(); +diff --git a/kernel/fork.c b/kernel/fork.c +index e033388b11bd..c93e18a4f0b3 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -115,6 +115,10 @@ + #define CREATE_TRACE_POINTS + #include <trace/events/task.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#endif ++ + /* + * Minimum number of threads to boot the kernel + */ +@@ -128,14 +132,14 @@ + /* + * Protected counters by write_lock_irq(&tasklist_lock) + */ +-unsigned long total_forks; /* Handle normal Linux uptimes. */ +-int nr_threads; /* The idle threads do not count.. */ ++unsigned long total_forks; /* Handle normal Linux uptimes. */ ++int nr_threads; /* The idle threads do not count.. */ + +-static int max_threads; /* tunable limit on nr_threads */ ++static int max_threads; /* tunable limit on nr_threads */ + +-#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) ++#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) + +-static const char * const resident_page_types[] = { ++static const char *const resident_page_types[] = { + NAMED_ARRAY_INDEX(MM_FILEPAGES), + NAMED_ARRAY_INDEX(MM_ANONPAGES), + NAMED_ARRAY_INDEX(MM_SWAPENTS), +@@ -144,7 +148,7 @@ static const char * const resident_page_types[] = { + + DEFINE_PER_CPU(unsigned long, process_counts) = 0; + +-__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ ++__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ + + #ifdef CONFIG_PROVE_RCU + int lockdep_tasklist_lock_is_held(void) +@@ -159,7 +163,7 @@ int nr_processes(void) + int cpu; + int total = 0; + +- for_each_possible_cpu(cpu) ++ for_each_possible_cpu (cpu) + total += per_cpu(process_counts, cpu); + + return total; +@@ -190,7 +194,7 @@ static inline void free_task_struct(struct task_struct *tsk) + * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a + * kmemcache based allocator. + */ +-# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) ++#if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) + + # ifdef CONFIG_VMAP_STACK + /* +@@ -311,8 +315,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) + * so memcg accounting is performed manually on assigning/releasing + * stacks to tasks. Drop __GFP_ACCOUNT. + */ +- stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, +- VMALLOC_START, VMALLOC_END, ++ stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, ++ VMALLOC_END, + THREADINFO_GFP & ~__GFP_ACCOUNT, + PAGE_KERNEL, + 0, node, __builtin_return_address(0)); +@@ -410,9 +414,10 @@ static void free_thread_stack(struct task_struct *tsk) + + void thread_stack_cache_init(void) + { +- thread_stack_cache = kmem_cache_create_usercopy("thread_stack", +- THREAD_SIZE, THREAD_SIZE, 0, 0, +- THREAD_SIZE, NULL); ++ thread_stack_cache = ++ kmem_cache_create_usercopy("thread_stack", THREAD_SIZE, ++ THREAD_SIZE, 0, 0, THREAD_SIZE, ++ NULL); + BUG_ON(thread_stack_cache == NULL); + } + +@@ -502,7 +507,8 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) + + struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) + { +- struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); ++ struct vm_area_struct *new = ++ kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + + if (!new) + return NULL; +@@ -602,8 +608,15 @@ void put_task_stack(struct task_struct *tsk) + } + #endif + ++#ifdef CONFIG_KOI ++extern s64 koi_offset; ++#endif ++ + void free_task(struct task_struct *tsk) + { ++ #ifdef CONFIG_IEE ++ void *iee_stack; ++ #endif + #ifdef CONFIG_SECCOMP + WARN_ON_ONCE(tsk->seccomp.filter); + #endif +@@ -633,6 +646,45 @@ void free_task(struct task_struct *tsk) + if (dynamic_affinity_enabled()) + sched_prefer_cpus_free(tsk); + #endif ++#ifdef CONFIG_IEE ++ // Free iee stack. ++ iee_stack = (void *)iee_read_token_stack(tsk); ++ if (iee_stack) { ++ iee_set_kernel_ppage( ++ (unsigned long)(iee_stack - PAGE_SIZE * 4)); ++ free_pages((unsigned long)(iee_stack - PAGE_SIZE * 4), 3); ++ } ++ // Free task_token. ++ // Empty the token ++ iee_free_token(tsk); ++ ++#ifdef CONFIG_KOI ++ // Free koi stack. ++ unsigned long koi_stack = iee_rw_gate(IEE_READ_KOI_STACK_BASE, current); ++ if (koi_stack != 0) ++ free_pages(koi_stack, 2); ++#endif ++#else ++#ifdef CONFIG_KOI ++// free koi stack ++ struct task_token *token = (struct task_token *)((unsigned long)current + koi_offset); ++ unsigned long flags; ++ local_irq_save(flags); ++ asm volatile( ++ "at s1e1r, %0\n" ++ "isb\n" ++ : ++ :"r"(token)); ++ unsigned long res = read_sysreg(par_el1); ++ local_irq_restore(flags); ++ if (!(res & 0x1)) { ++ unsigned long koi_stack = token->koi_stack_base; ++ if (koi_stack != 0) ++ free_pages(koi_stack, 2); ++ } ++#endif ++#endif ++ + #ifdef CONFIG_QOS_SCHED_SMART_GRID + if (smart_grid_enabled()) + sched_grid_qos_free(tsk); +@@ -657,7 +709,7 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) + + #ifdef CONFIG_MMU + static __latent_entropy int dup_mmap(struct mm_struct *mm, +- struct mm_struct *oldmm) ++ struct mm_struct *oldmm) + { + struct vm_area_struct *mpnt, *tmp; + int retval; +@@ -773,7 +825,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + flush_dcache_mmap_lock(mapping); + /* insert tmp into the share list, just after mpnt */ + vma_interval_tree_insert_after(tmp, mpnt, +- &mapping->i_mmap); ++ &mapping->i_mmap); + flush_dcache_mmap_unlock(mapping); + i_mmap_unlock_write(mapping); + } +@@ -842,7 +894,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) + mmap_write_unlock(oldmm); + return 0; + } +-#define mm_alloc_pgd(mm) (0) ++#define mm_alloc_pgd(mm) (0) + #define mm_free_pgd(mm) + #endif /* CONFIG_MMU */ + +@@ -850,20 +902,22 @@ static void check_mm(struct mm_struct *mm) + { + int i; + +- BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, +- "Please make sure 'struct resident_page_types[]' is updated as well"); ++ BUILD_BUG_ON_MSG( ++ ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, ++ "Please make sure 'struct resident_page_types[]' is updated as well"); + + for (i = 0; i < NR_MM_COUNTERS; i++) { + long x = mm_counter_sum(mm, i); + + if (unlikely(x)) +- pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", +- mm, resident_page_types[i], x); ++ pr_alert( ++ "BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", ++ mm, resident_page_types[i], x); + } + + if (mm_pgtables_bytes(mm)) + pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", +- mm_pgtables_bytes(mm)); ++ mm_pgtables_bytes(mm)); + + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS + VM_BUG_ON_MM(mm->pmd_huge_pte, mm); +@@ -1014,14 +1068,6 @@ void __put_task_struct(struct task_struct *tsk) + } + EXPORT_SYMBOL_GPL(__put_task_struct); + +-void __put_task_struct_rcu_cb(struct rcu_head *rhp) +-{ +- struct task_struct *task = container_of(rhp, struct task_struct, rcu); +- +- __put_task_struct(task); +-} +-EXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb); +- + void __init __weak arch_task_cache_init(void) { } + + /* +@@ -1039,8 +1085,8 @@ static void set_max_threads(unsigned int max_threads_suggested) + if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64) + threads = MAX_THREADS; + else +- threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE, +- (u64) THREAD_SIZE * 8UL); ++ threads = div64_u64((u64)nr_pages * (u64)PAGE_SIZE, ++ (u64)THREAD_SIZE * 8UL); + + if (threads > max_threads_suggested) + threads = max_threads_suggested; +@@ -1075,17 +1121,24 @@ void __init fork_init(void) + int i; + #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR + #ifndef ARCH_MIN_TASKALIGN +-#define ARCH_MIN_TASKALIGN 0 ++#define ARCH_MIN_TASKALIGN 0 + #endif + int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); + unsigned long useroffset, usersize; + + /* create a slab on which task_structs can be allocated */ + task_struct_whitelist(&useroffset, &usersize); ++ #ifdef CONFIG_IEE + task_struct_cachep = kmem_cache_create_usercopy("task_struct", + arch_task_struct_size, align, +- SLAB_PANIC|SLAB_ACCOUNT, ++ SLAB_PANIC|SLAB_ACCOUNT|SLAB_RED_ZONE, + useroffset, usersize, NULL); ++ #else ++ task_struct_cachep = ++ kmem_cache_create_usercopy("task_struct", arch_task_struct_size, ++ align, SLAB_PANIC | SLAB_ACCOUNT, ++ useroffset, usersize, NULL); ++ #endif + #endif + + /* do the arch specific task caches init */ +@@ -1093,8 +1146,8 @@ void __init fork_init(void) + + set_max_threads(MAX_THREADS); + +- init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; +- init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; ++ init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads / 2; ++ init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads / 2; + init_task.signal->rlim[RLIMIT_SIGPENDING] = + init_task.signal->rlim[RLIMIT_NPROC]; + +@@ -1107,8 +1160,8 @@ void __init fork_init(void) + set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY); + + #ifdef CONFIG_VMAP_STACK +- cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", +- NULL, free_vm_stack_cache); ++ cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", NULL, ++ free_vm_stack_cache); + #endif + + scs_init(); +@@ -1118,7 +1171,7 @@ void __init fork_init(void) + } + + int __weak arch_dup_task_struct(struct task_struct *dst, +- struct task_struct *src) ++ struct task_struct *src) + { + *dst = *src; + return 0; +@@ -1129,14 +1182,14 @@ void set_task_stack_end_magic(struct task_struct *tsk) + unsigned long *stackend; + + stackend = end_of_stack(tsk); +- *stackend = STACK_END_MAGIC; /* for overflow detection */ ++ *stackend = STACK_END_MAGIC; /* for overflow detection */ + } + + static bool dup_resvd_task_struct(struct task_struct *dst, + struct task_struct *orig, int node) + { +- dst->_resvd = kzalloc_node(sizeof(struct task_struct_resvd), +- GFP_KERNEL, node); ++ dst->_resvd = kzalloc_node(sizeof(struct task_struct_resvd), GFP_KERNEL, ++ node); + if (!dst->_resvd) + return false; + +@@ -1309,7 +1362,7 @@ static void mm_init_uprobes_state(struct mm_struct *mm) + } + + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, +- struct user_namespace *user_ns) ++ struct user_namespace *user_ns) + { + mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); + mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); +@@ -1425,8 +1478,8 @@ EXPORT_SYMBOL_GPL(mmput); + #ifdef CONFIG_MMU + static void mmput_async_fn(struct work_struct *work) + { +- struct mm_struct *mm = container_of(work, struct mm_struct, +- async_put_work); ++ struct mm_struct *mm = ++ container_of(work, struct mm_struct, async_put_work); + + __mmput(mm); + } +@@ -1602,13 +1655,12 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) + struct mm_struct *mm; + int err; + +- err = down_read_killable(&task->signal->exec_update_lock); ++ err = down_read_killable(&task->signal->exec_update_lock); + if (err) + return ERR_PTR(err); + + mm = get_task_mm(task); +- if (mm && mm != current->mm && +- !ptrace_may_access(task, mode)) { ++ if (mm && mm != current->mm && !ptrace_may_access(task, mode)) { + mmput(mm); + mm = ERR_PTR(-EACCES); + } +@@ -1631,7 +1683,7 @@ static void complete_vfork_done(struct task_struct *tsk) + } + + static int wait_for_vfork_done(struct task_struct *child, +- struct completion *vfork) ++ struct completion *vfork) + { + unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE; + int killed; +@@ -1682,8 +1734,8 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) + * not set up a proper pointer then tough luck. + */ + put_user(0, tsk->clear_child_tid); +- do_futex(tsk->clear_child_tid, FUTEX_WAKE, +- 1, NULL, NULL, 0, 0); ++ do_futex(tsk->clear_child_tid, FUTEX_WAKE, 1, NULL, ++ NULL, 0, 0); + } + tsk->clear_child_tid = NULL; + } +@@ -1767,6 +1819,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) + #endif + + tsk->mm = NULL; ++#ifdef CONFIG_IEE ++ iee_set_token_mm(tsk, NULL); ++ iee_set_token_pgd(tsk, NULL); ++#endif + tsk->active_mm = NULL; + + /* +@@ -1798,6 +1854,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) + } + + tsk->mm = mm; ++#ifdef CONFIG_IEE ++ iee_set_token_mm(tsk, mm); ++ iee_set_token_pgd(tsk, mm->pgd); ++#endif + tsk->active_mm = mm; + sched_mm_cid_fork(tsk); + return 0; +@@ -2015,8 +2075,8 @@ static inline void init_task_pid_links(struct task_struct *task) + INIT_HLIST_NODE(&task->pid_links[type]); + } + +-static inline void +-init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) ++static inline void init_task_pid(struct task_struct *task, enum pid_type type, ++ struct pid *pid) + { + if (type == PIDTYPE_PID) + task->thread_pid = pid; +@@ -2277,6 +2337,12 @@ static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) + mutex_unlock(&oom_adj_mutex); + } + ++#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) ++extern s64 koi_offset; ++extern int koi_add_page_mapping(unsigned long dst, unsigned long src); ++#endif ++ ++ + #ifdef CONFIG_RV + static void rv_task_fork(struct task_struct *p) + { +@@ -2309,15 +2375,21 @@ __latent_entropy struct task_struct *copy_process( + struct file *pidfile = NULL; + const u64 clone_flags = args->flags; + struct nsproxy *nsp = current->nsproxy; ++ #ifdef CONFIG_IEE ++ gfp_t gfp; ++ void *pstack; ++ #endif + + /* + * Don't allow sharing the root directory with processes in a different + * namespace + */ +- if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) ++ if ((clone_flags & (CLONE_NEWNS | CLONE_FS)) == ++ (CLONE_NEWNS | CLONE_FS)) + return ERR_PTR(-EINVAL); + +- if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) ++ if ((clone_flags & (CLONE_NEWUSER | CLONE_FS)) == ++ (CLONE_NEWUSER | CLONE_FS)) + return ERR_PTR(-EINVAL); + + /* +@@ -2342,7 +2414,7 @@ __latent_entropy struct task_struct *copy_process( + * from creating siblings. + */ + if ((clone_flags & CLONE_PARENT) && +- current->signal->flags & SIGNAL_UNKILLABLE) ++ current->signal->flags & SIGNAL_UNKILLABLE) + return ERR_PTR(-EINVAL); + + /* +@@ -2387,6 +2459,15 @@ __latent_entropy struct task_struct *copy_process( + p = dup_task_struct(current, node); + if (!p) + goto fork_out; ++ #ifdef CONFIG_IEE ++ // Alloc iee stack. ++ gfp = GFP_KERNEL; ++ pstack = (void *)__get_free_pages(gfp, 3); ++ iee_set_kernel_upage((unsigned long)pstack); ++ // Init token. ++ iee_init_token(p, NULL, pstack + PAGE_SIZE * 4); ++ #endif ++ + p->flags &= ~PF_KTHREAD; + if (args->kthread) + p->flags |= PF_KTHREAD; +@@ -2408,7 +2489,8 @@ __latent_entropy struct task_struct *copy_process( + /* + * Clear TID on mm_release()? + */ +- p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; ++ p->clear_child_tid = ++ (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; + + ftrace_graph_init_task(p); + +@@ -2519,10 +2601,10 @@ __latent_entropy struct task_struct *copy_process( + #endif + #ifdef CONFIG_TRACE_IRQFLAGS + memset(&p->irqtrace, 0, sizeof(p->irqtrace)); +- p->irqtrace.hardirq_disable_ip = _THIS_IP_; +- p->irqtrace.softirq_enable_ip = _THIS_IP_; +- p->softirqs_enabled = 1; +- p->softirq_context = 0; ++ p->irqtrace.hardirq_disable_ip = _THIS_IP_; ++ p->irqtrace.softirq_enable_ip = _THIS_IP_; ++ p->softirqs_enabled = 1; ++ p->softirq_context = 0; + #endif + + p->pagefault_disabled = 0; +@@ -2535,8 +2617,8 @@ __latent_entropy struct task_struct *copy_process( + p->blocked_on = NULL; /* not blocked yet */ + #endif + #ifdef CONFIG_BCACHE +- p->sequential_io = 0; +- p->sequential_io_avg = 0; ++ p->sequential_io = 0; ++ p->sequential_io_avg = 0; + #endif + #ifdef CONFIG_BPF_SYSCALL + RCU_INIT_POINTER(p->bpf_storage, NULL); +@@ -2623,7 +2705,7 @@ __latent_entropy struct task_struct *copy_process( + /* + * sigaltstack should be cleared when sharing the same VM + */ +- if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) ++ if ((clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) + sas_ss_reset(p); + + /* +@@ -2702,7 +2784,7 @@ __latent_entropy struct task_struct *copy_process( + write_lock_irq(&tasklist_lock); + + /* CLONE_PARENT re-uses the old parent */ +- if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { ++ if (clone_flags & (CLONE_PARENT | CLONE_THREAD)) { + p->real_parent = current->real_parent; + p->parent_exec_id = current->parent_exec_id; + if (clone_flags & CLONE_THREAD) +@@ -2766,8 +2848,9 @@ __latent_entropy struct task_struct *copy_process( + * tasklist_lock with adding child to the process tree + * for propagate_has_child_subreaper optimization. + */ +- p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || +- p->real_parent->signal->is_child_subreaper; ++ p->signal->has_child_subreaper = ++ p->real_parent->signal->has_child_subreaper || ++ p->real_parent->signal->is_child_subreaper; + list_add_tail(&p->sibling, &p->real_parent->children); + list_add_tail_rcu(&p->tasks, &init_task.tasks); + attach_pid(p, PIDTYPE_TGID); +@@ -2918,8 +3001,8 @@ struct task_struct * __init fork_idle(int cpu) + */ + struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) + { +- unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| +- CLONE_IO; ++ unsigned long flags = CLONE_FS | CLONE_FILES | CLONE_SIGHAND | ++ CLONE_THREAD | CLONE_IO; + struct kernel_clone_args args = { + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), +@@ -3083,8 +3166,8 @@ SYSCALL_DEFINE0(fork) + SYSCALL_DEFINE0(vfork) + { + struct kernel_clone_args args = { +- .flags = CLONE_VFORK | CLONE_VM, +- .exit_signal = SIGCHLD, ++ .flags = CLONE_VFORK | CLONE_VM, ++ .exit_signal = SIGCHLD, + }; + + return kernel_clone(&args); +@@ -3094,35 +3177,30 @@ SYSCALL_DEFINE0(vfork) + #ifdef __ARCH_WANT_SYS_CLONE + #ifdef CONFIG_CLONE_BACKWARDS + SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, +- int __user *, parent_tidptr, +- unsigned long, tls, +- int __user *, child_tidptr) ++ int __user *, parent_tidptr, unsigned long, tls, int __user *, ++ child_tidptr) + #elif defined(CONFIG_CLONE_BACKWARDS2) + SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, +- int __user *, parent_tidptr, +- int __user *, child_tidptr, +- unsigned long, tls) +-#elif defined(CONFIG_CLONE_BACKWARDS3) +-SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, +- int, stack_size, +- int __user *, parent_tidptr, +- int __user *, child_tidptr, ++ int __user *, parent_tidptr, int __user *, child_tidptr, + unsigned long, tls) ++#elif defined(CONFIG_CLONE_BACKWARDS3) ++SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, ++ stack_size, int __user *, parent_tidptr, int __user *, ++ child_tidptr, unsigned long, tls) + #else + SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, +- int __user *, parent_tidptr, +- int __user *, child_tidptr, +- unsigned long, tls) ++ int __user *, parent_tidptr, int __user *, child_tidptr, ++ unsigned long, tls) + #endif + { + struct kernel_clone_args args = { +- .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), +- .pidfd = parent_tidptr, +- .child_tid = child_tidptr, +- .parent_tid = parent_tidptr, +- .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), +- .stack = newsp, +- .tls = tls, ++ .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), ++ .pidfd = parent_tidptr, ++ .child_tid = child_tidptr, ++ .parent_tid = parent_tidptr, ++ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), ++ .stack = newsp, ++ .tls = tls, + }; + + return kernel_clone(&args); +@@ -3178,21 +3256,21 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, + return -EINVAL; + + *kargs = (struct kernel_clone_args){ +- .flags = args.flags, +- .pidfd = u64_to_user_ptr(args.pidfd), +- .child_tid = u64_to_user_ptr(args.child_tid), +- .parent_tid = u64_to_user_ptr(args.parent_tid), +- .exit_signal = args.exit_signal, +- .stack = args.stack, +- .stack_size = args.stack_size, +- .tls = args.tls, +- .set_tid_size = args.set_tid_size, +- .cgroup = args.cgroup, ++ .flags = args.flags, ++ .pidfd = u64_to_user_ptr(args.pidfd), ++ .child_tid = u64_to_user_ptr(args.child_tid), ++ .parent_tid = u64_to_user_ptr(args.parent_tid), ++ .exit_signal = args.exit_signal, ++ .stack = args.stack, ++ .stack_size = args.stack_size, ++ .tls = args.tls, ++ .set_tid_size = args.set_tid_size, ++ .cgroup = args.cgroup, + }; + + if (args.set_tid && +- copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), +- (kargs->set_tid_size * sizeof(pid_t)))) ++ copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), ++ (kargs->set_tid_size * sizeof(pid_t)))) + return -EFAULT; + + kargs->set_tid = kset_tid; +@@ -3287,7 +3365,8 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) + } + #endif + +-void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data) ++void walk_process_tree(struct task_struct *top, proc_visitor visitor, ++ void *data) + { + struct task_struct *leader, *parent, *child; + int res; +@@ -3295,8 +3374,8 @@ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data + read_lock(&tasklist_lock); + leader = top = top->group_leader; + down: +- for_each_thread(leader, parent) { +- list_for_each_entry(child, &parent->children, sibling) { ++ for_each_thread (leader, parent) { ++ list_for_each_entry (child, &parent->children, sibling) { + res = visitor(child, data); + if (res) { + if (res < 0) +@@ -3304,8 +3383,7 @@ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data + leader = child; + goto down; + } +-up: +- ; ++ up:; + } + } + +@@ -3382,11 +3460,11 @@ void __init proc_caches_init(void) + */ + static int check_unshare_flags(unsigned long unshare_flags) + { +- if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| +- CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| +- CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| +- CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP| +- CLONE_NEWTIME)) ++ if (unshare_flags & ++ ~(CLONE_THREAD | CLONE_FS | CLONE_NEWNS | CLONE_SIGHAND | CLONE_VM | ++ CLONE_FILES | CLONE_SYSVSEM | CLONE_NEWUTS | CLONE_NEWIPC | ++ CLONE_NEWNET | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWCGROUP | ++ CLONE_NEWTIME)) + return -EINVAL; + /* + * Not implemented, but pretend it works if there is nothing +@@ -3497,7 +3575,7 @@ int ksys_unshare(unsigned long unshare_flags) + * to a new ipc namespace, the semaphore arrays from the old + * namespace are unreachable. + */ +- if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) ++ if (unshare_flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) + do_sysvsem = 1; + err = unshare_fs(unshare_flags, &new_fs); + if (err) +@@ -3508,8 +3586,8 @@ int ksys_unshare(unsigned long unshare_flags) + err = unshare_userns(unshare_flags, &new_cred); + if (err) + goto bad_unshare_cleanup_fd; +- err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, +- new_cred, new_fs); ++ err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_cred, ++ new_fs); + if (err) + goto bad_unshare_cleanup_cred; + +@@ -3606,8 +3684,8 @@ int unshare_files(void) + return 0; + } + +-int sysctl_max_threads(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) ++int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, ++ size_t *lenp, loff_t *ppos) + { + struct ctl_table t; + int ret; +diff --git a/kernel/groups.c b/kernel/groups.c +index 9b43da22647d..8045812e8a3c 100644 +--- a/kernel/groups.c ++++ b/kernel/groups.c +@@ -11,6 +11,9 @@ + #include <linux/user_namespace.h> + #include <linux/vmalloc.h> + #include <linux/uaccess.h> ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif + + struct group_info *groups_alloc(int gidsetsize) + { +@@ -119,7 +122,11 @@ void set_groups(struct cred *new, struct group_info *group_info) + { + put_group_info(new->group_info); + get_group_info(group_info); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_group_info(new,group_info); ++ #else + new->group_info = group_info; ++ #endif + } + + EXPORT_SYMBOL(set_groups); +diff --git a/kernel/kthread.c b/kernel/kthread.c +index 1eea53050bab..317eac6eb2f2 100644 +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -30,6 +30,10 @@ + #include <linux/sched/isolation.h> + #include <trace/events/sched.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#endif ++ + + static DEFINE_SPINLOCK(kthread_create_lock); + static LIST_HEAD(kthread_create_list); +@@ -1429,6 +1433,10 @@ void kthread_use_mm(struct mm_struct *mm) + tsk->active_mm = mm; + tsk->mm = mm; + membarrier_update_current_mm(mm); ++ #ifdef CONFIG_IEE ++ iee_set_token_mm(tsk, mm); ++ iee_set_token_pgd(tsk, mm->pgd); ++ #endif + switch_mm_irqs_off(active_mm, mm, tsk); + local_irq_enable(); + task_unlock(tsk); +@@ -1473,7 +1481,12 @@ void kthread_unuse_mm(struct mm_struct *mm) + local_irq_disable(); + tsk->mm = NULL; + membarrier_update_current_mm(NULL); ++ #ifdef CONFIG_IEE ++ iee_set_token_mm(tsk, mm); ++ iee_set_token_pgd(tsk, NULL); ++ #endif + mmgrab_lazy_tlb(mm); ++ + /* active_mm is still 'mm' */ + enter_lazy_tlb(mm, tsk); + local_irq_enable(); +diff --git a/kernel/smpboot.c b/kernel/smpboot.c +index f47d8f375946..60c7d365c0e1 100644 +--- a/kernel/smpboot.c ++++ b/kernel/smpboot.c +@@ -16,6 +16,10 @@ + #include <linux/kthread.h> + #include <linux/smpboot.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#endif ++ + #include "smpboot.h" + + #ifdef CONFIG_SMP +@@ -57,6 +61,11 @@ static __always_inline void idle_init(unsigned int cpu) + pr_err("SMP: fork_idle() failed for CPU %u\n", cpu); + else + per_cpu(idle_threads, cpu) = tsk; ++ #ifdef CONFIG_IEE ++ // Set the secondary __entry_task. ++ *(struct task_struct **)SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu]) = tsk; ++ iee_set_logical_mem_ro((unsigned long)SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu])); ++ #endif + } + } + +diff --git a/kernel/sys.c b/kernel/sys.c +index 44b575990333..fbc47f83af50 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -75,6 +75,10 @@ + #include <asm/io.h> + #include <asm/unistd.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + #include "uid16.h" + + #ifndef SET_UNALIGN_CTL +@@ -395,7 +399,11 @@ long __sys_setregid(gid_t rgid, gid_t egid) + if (gid_eq(old->gid, krgid) || + gid_eq(old->egid, krgid) || + ns_capable_setid(old->user_ns, CAP_SETGID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_gid(new,krgid); ++ #else + new->gid = krgid; ++ #endif + else + goto error; + } +@@ -404,15 +412,27 @@ long __sys_setregid(gid_t rgid, gid_t egid) + gid_eq(old->egid, kegid) || + gid_eq(old->sgid, kegid) || + ns_capable_setid(old->user_ns, CAP_SETGID)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_egid(new,kegid); ++ #else + new->egid = kegid; ++ #endif + else + goto error; + } + + if (rgid != (gid_t) -1 || + (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_sgid(new,new->egid); ++ #else + new->sgid = new->egid; ++ #endif ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsgid(new,new->egid); ++ #else + new->fsgid = new->egid; ++ #endif + + retval = security_task_fix_setgid(new, old, LSM_SETID_RE); + if (retval < 0) +@@ -454,9 +474,25 @@ long __sys_setgid(gid_t gid) + + retval = -EPERM; + if (ns_capable_setid(old->user_ns, CAP_SETGID)) ++ #ifdef CONFIG_CREDP ++ { ++ iee_set_cred_fsgid(new,kgid); ++ iee_set_cred_sgid(new,kgid); ++ iee_set_cred_egid(new,kgid); ++ iee_set_cred_gid(new,kgid); ++ } ++ #else + new->gid = new->egid = new->sgid = new->fsgid = kgid; ++ #endif + else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) ++ #ifdef CONFIG_CREDP ++ { ++ iee_set_cred_fsgid(new,kgid); ++ iee_set_cred_egid(new,kgid); ++ } ++ #else + new->egid = new->fsgid = kgid; ++ #endif + else + goto error; + +@@ -488,7 +524,11 @@ static int set_user(struct cred *new) + return -EAGAIN; + + free_uid(new->user); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_user(new,new_user); ++ #else + new->user = new_user; ++ #endif + return 0; + } + +@@ -549,7 +589,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) + + retval = -EPERM; + if (ruid != (uid_t) -1) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_uid(new,kruid); ++ #else + new->uid = kruid; ++ #endif + if (!uid_eq(old->uid, kruid) && + !uid_eq(old->euid, kruid) && + !ns_capable_setid(old->user_ns, CAP_SETUID)) +@@ -557,7 +601,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) + } + + if (euid != (uid_t) -1) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_euid(new,keuid); ++ #else + new->euid = keuid; ++ #endif + if (!uid_eq(old->uid, keuid) && + !uid_eq(old->euid, keuid) && + !uid_eq(old->suid, keuid) && +@@ -572,8 +620,16 @@ long __sys_setreuid(uid_t ruid, uid_t euid) + } + if (ruid != (uid_t) -1 || + (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_suid(new,new->euid); ++ #else + new->suid = new->euid; ++ #endif ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,new->euid); ++ #else + new->fsuid = new->euid; ++ #endif + + retval = security_task_fix_setuid(new, old, LSM_SETID_RE); + if (retval < 0) +@@ -626,7 +682,12 @@ long __sys_setuid(uid_t uid) + + retval = -EPERM; + if (ns_capable_setid(old->user_ns, CAP_SETUID)) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_uid(new,kuid); ++ iee_set_cred_suid(new,kuid); ++ #else + new->suid = new->uid = kuid; ++ #endif + if (!uid_eq(kuid, old->uid)) { + retval = set_user(new); + if (retval < 0) +@@ -636,7 +697,12 @@ long __sys_setuid(uid_t uid) + goto error; + } + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_euid(new,kuid); ++ iee_set_cred_fsuid(new,kuid); ++ #else + new->fsuid = new->euid = kuid; ++ #endif + + retval = security_task_fix_setuid(new, old, LSM_SETID_ID); + if (retval < 0) +@@ -710,7 +776,11 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) + return -ENOMEM; + + if (ruid != (uid_t) -1) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_uid(new,kruid); ++ #else + new->uid = kruid; ++ #endif + if (!uid_eq(kruid, old->uid)) { + retval = set_user(new); + if (retval < 0) +@@ -718,10 +788,22 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) + } + } + if (euid != (uid_t) -1) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_euid(new,keuid); ++ #else + new->euid = keuid; ++ #endif + if (suid != (uid_t) -1) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_suid(new,ksuid); ++ #else + new->suid = ksuid; ++ #endif ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,new->euid); ++ #else + new->fsuid = new->euid; ++ #endif + + retval = security_task_fix_setuid(new, old, LSM_SETID_RES); + if (retval < 0) +@@ -810,12 +892,29 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) + return -ENOMEM; + + if (rgid != (gid_t) -1) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_gid(new,krgid); ++ #else + new->gid = krgid; ++ #endif + if (egid != (gid_t) -1) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_egid(new,kegid); ++ #else + new->egid = kegid; ++ #endif + if (sgid != (gid_t) -1) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_sgid(new,ksgid); ++ #else + new->sgid = ksgid; ++ #endif ++ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsgid(new,new->egid); ++ #else + new->fsgid = new->egid; ++ #endif + + retval = security_task_fix_setgid(new, old, LSM_SETID_RES); + if (retval < 0) +@@ -882,7 +981,11 @@ long __sys_setfsuid(uid_t uid) + uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || + ns_capable_setid(old->user_ns, CAP_SETUID)) { + if (!uid_eq(kuid, old->fsuid)) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,kuid); ++ #else + new->fsuid = kuid; ++ #endif + if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) + goto change_okay; + } +@@ -926,7 +1029,11 @@ long __sys_setfsgid(gid_t gid) + gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || + ns_capable_setid(old->user_ns, CAP_SETGID)) { + if (!gid_eq(kgid, old->fsgid)) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsgid(new,kgid); ++ #else + new->fsgid = kgid; ++ #endif + if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) + goto change_okay; + } +diff --git a/kernel/umh.c b/kernel/umh.c +index 1b13c5d34624..32f5c88e10bf 100644 +--- a/kernel/umh.c ++++ b/kernel/umh.c +@@ -32,6 +32,10 @@ + + #include <trace/events/module.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; + static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; + static DEFINE_SPINLOCK(umh_sysctl_lock); +@@ -91,9 +95,15 @@ static int call_usermodehelper_exec_async(void *data) + goto out; + + spin_lock(&umh_sysctl_lock); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_bset(new,cap_intersect(usermodehelper_bset, new->cap_bset)); ++ iee_set_cred_cap_inheritable(new,cap_intersect(usermodehelper_inheritable, ++ new->cap_inheritable)); ++ #else + new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); + new->cap_inheritable = cap_intersect(usermodehelper_inheritable, + new->cap_inheritable); ++ #endif + spin_unlock(&umh_sysctl_lock); + + if (sub_info->init) { +diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c +index 1d8e47bed3f1..9f1921025539 100644 +--- a/kernel/user_namespace.c ++++ b/kernel/user_namespace.c +@@ -22,6 +22,10 @@ + #include <linux/bsearch.h> + #include <linux/sort.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + static struct kmem_cache *user_ns_cachep __read_mostly; + static DEFINE_MUTEX(userns_state_mutex); + +@@ -45,6 +49,19 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) + /* Start with the same capabilities as init but useless for doing + * anything as the capabilities are bound to the new user namespace. + */ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_securebits(cred,SECUREBITS_DEFAULT); ++ iee_set_cred_cap_inheritable(cred,CAP_EMPTY_SET); ++ iee_set_cred_cap_permitted(cred,CAP_FULL_SET); ++ iee_set_cred_cap_effective(cred,CAP_FULL_SET); ++ iee_set_cred_cap_ambient(cred,CAP_EMPTY_SET); ++ iee_set_cred_cap_bset(cred,CAP_FULL_SET); ++#ifdef CONFIG_KEYS ++ key_put(cred->request_key_auth); ++ iee_set_cred_request_key_auth(cred,NULL); ++#endif ++ iee_set_cred_user_ns(cred,user_ns); ++ #else + cred->securebits = SECUREBITS_DEFAULT; + cred->cap_inheritable = CAP_EMPTY_SET; + cred->cap_permitted = CAP_FULL_SET; +@@ -57,6 +74,7 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) + #endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ + cred->user_ns = user_ns; ++ #endif + } + + static unsigned long enforced_nproc_rlimit(void) +diff --git a/mm/Kconfig b/mm/Kconfig +index 45d4139c959c..eb9d41768c15 100644 +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -530,6 +530,18 @@ config NUMA_KEEP_MEMINFO + config MEMORY_ISOLATION + bool + ++# Config for kernel module isolation ++config KOI ++ depends on ARM64 ++ depends on ARM64_VA_BITS_48 ++ depends on ARM64_4K_PAGES ++ def_bool n ++ ++# Configs for pgtable isolation ++config PTP ++ depends on IEE ++ def_bool y ++ + # IORESOURCE_SYSTEM_RAM regions in the kernel resource tree that are marked + # IORESOURCE_EXCLUSIVE cannot be mapped to user space, for example, via + # /dev/mem. +diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c +index d25d99cb5f2b..2ea51f559d4e 100644 +--- a/mm/damon/ops-common.c ++++ b/mm/damon/ops-common.c +@@ -44,6 +44,7 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr + if (!folio) + return; + ++ + if (ptep_clear_young_notify(vma, addr, pte)) + folio_set_young(folio); + +diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c +index 13f0d1192707..60dc95c5b286 100644 +--- a/mm/debug_vm_pgtable.c ++++ b/mm/debug_vm_pgtable.c +@@ -452,7 +452,11 @@ static void __init pmd_huge_tests(struct pgtable_debug_args *args) + * X86 defined pmd_set_huge() verifies that the given + * PMD is not a populated non-leaf entry. + */ ++ #ifdef CONFIG_PTP ++ set_pmd(args->pmdp, __pmd(0)); ++ #else + WRITE_ONCE(*args->pmdp, __pmd(0)); ++ #endif + WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot)); + WARN_ON(!pmd_clear_huge(args->pmdp)); + pmd = READ_ONCE(*args->pmdp); +@@ -472,7 +476,11 @@ static void __init pud_huge_tests(struct pgtable_debug_args *args) + * X86 defined pud_set_huge() verifies that the given + * PUD is not a populated non-leaf entry. + */ ++ #ifdef CONFIG_PTP ++ set_pud(args->pudp, __pud(0)); ++ #else + WRITE_ONCE(*args->pudp, __pud(0)); ++ #endif + WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot)); + WARN_ON(!pud_clear_huge(args->pudp)); + pud = READ_ONCE(*args->pudp); +@@ -511,7 +519,11 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args) + + pr_debug("Validating PUD clear\n"); + pud = __pud(pud_val(pud) | RANDOM_ORVALUE); ++ #ifdef CONFIG_PTP ++ set_pud(args->pudp, pud); ++ #else + WRITE_ONCE(*args->pudp, pud); ++ #endif + pud_clear(args->pudp); + pud = READ_ONCE(*args->pudp); + WARN_ON(!pud_none(pud)); +@@ -548,7 +560,11 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args) + + pr_debug("Validating P4D clear\n"); + p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); ++ #ifdef CONFIG_PTP ++ set_p4d(args->p4dp, p4d); ++ #else + WRITE_ONCE(*args->p4dp, p4d); ++ #endif + p4d_clear(args->p4dp); + p4d = READ_ONCE(*args->p4dp); + WARN_ON(!p4d_none(p4d)); +@@ -582,7 +598,11 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args) + + pr_debug("Validating PGD clear\n"); + pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); ++ #ifdef CONFIG_PTP ++ set_pgd(args->pgdp, pgd); ++ #else + WRITE_ONCE(*args->pgdp, pgd); ++ #endif + pgd_clear(args->pgdp); + pgd = READ_ONCE(*args->pgdp); + WARN_ON(!pgd_none(pgd)); +@@ -650,7 +670,11 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args) + + pr_debug("Validating PMD clear\n"); + pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); ++ #ifdef CONFIG_PTP ++ set_pmd(args->pmdp, pmd); ++ #else + WRITE_ONCE(*args->pmdp, pmd); ++ #endif + pmd_clear(args->pmdp); + pmd = READ_ONCE(*args->pmdp); + WARN_ON(!pmd_none(pmd)); +diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c +index ce06b2884789..a039c7a50ec5 100644 +--- a/mm/early_ioremap.c ++++ b/mm/early_ioremap.c +@@ -147,7 +147,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) + if (after_paging_init) + __late_set_fixmap(idx, phys_addr, prot); + else ++ #ifdef CONFIG_PTP ++ __iee_set_fixmap_pre_init(idx, phys_addr, prot); ++ #else + __early_set_fixmap(idx, phys_addr, prot); ++ #endif + phys_addr += PAGE_SIZE; + --idx; + --nrpages; +@@ -199,13 +203,66 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) + if (after_paging_init) + __late_clear_fixmap(idx); + else ++ #ifdef CONFIG_PTP ++ __iee_set_fixmap_pre_init(idx, 0, FIXMAP_PAGE_CLEAR); ++ #else + __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); ++ #endif + --idx; + --nrpages; + } + prev_map[slot] = NULL; + } + ++#ifdef CONFIG_PTP ++void __init early_iounmap_after_init(void __iomem *addr, unsigned long size) ++{ ++ unsigned long virt_addr; ++ unsigned long offset; ++ unsigned int nrpages; ++ enum fixed_addresses idx; ++ int i, slot; ++ ++ slot = -1; ++ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { ++ if (prev_map[i] == addr) { ++ slot = i; ++ break; ++ } ++ } ++ ++ if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n", ++ addr, size)) ++ return; ++ ++ if (WARN(prev_size[slot] != size, ++ "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", ++ addr, size, slot, prev_size[slot])) ++ return; ++ ++ WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n", ++ addr, size, slot); ++ ++ virt_addr = (unsigned long)addr; ++ if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))) ++ return; ++ ++ offset = offset_in_page(virt_addr); ++ nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; ++ ++ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; ++ while (nrpages > 0) { ++ if (after_paging_init) ++ __late_clear_fixmap(idx); ++ else ++ __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); ++ --idx; ++ --nrpages; ++ } ++ prev_map[slot] = NULL; ++} ++#endif ++ + /* Remap an IO device */ + void __init __iomem * + early_ioremap(resource_size_t phys_addr, unsigned long size) +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 763bb25e4f99..80bb2c0abeda 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -39,6 +39,10 @@ + #include <linux/memory-tiers.h> + #include <linux/compat.h> + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + #include <asm/tlb.h> + #include <asm/pgalloc.h> + #include "internal.h" +@@ -2489,6 +2493,10 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, + unsigned long addr; + pte_t *pte; + int i; ++ #ifdef CONFIG_PTP ++ pte_t *ptep; ++ unsigned long iee_addr; ++ #endif + + /* + * Leave pmd empty until pte is filled note that it is fine to delay +@@ -2501,7 +2509,14 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, + old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd); + + pgtable = pgtable_trans_huge_withdraw(mm, pmd); +- pmd_populate(mm, &_pmd, pgtable); ++ #ifdef CONFIG_PTP ++ ptep = (pte_t *)page_address(pgtable); ++ iee_addr = __phys_to_iee(__pa(ptep)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)ptep); ++ #endif ++ //pmd_populate(mm, &_pmd, pgtable); ++ _pmd = __pmd(__phys_to_pmd_val(page_to_phys(pgtable)) | PMD_TYPE_TABLE); + + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte); +@@ -2534,6 +2549,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr; + pte_t *pte; + int i; ++ #ifdef CONFIG_PTP ++ pte_t *ptep; ++ unsigned long iee_addr; ++ #endif + + VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); + VM_BUG_ON_VMA(vma->vm_start > haddr, vma); +@@ -2671,7 +2690,14 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, + * This's critical for some architectures (Power). + */ + pgtable = pgtable_trans_huge_withdraw(mm, pmd); +- pmd_populate(mm, &_pmd, pgtable); ++ #ifdef CONFIG_PTP ++ ptep = (pte_t *)page_to_virt(pgtable); ++ iee_addr = __phys_to_iee(__pa(ptep)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)ptep); ++ #endif ++ //pmd_populate(mm, &_pmd, pgtable); ++ _pmd = __pmd(__phys_to_pmd_val(page_to_phys(pgtable)) | PMD_TYPE_TABLE); + + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte); +diff --git a/mm/init-mm.c b/mm/init-mm.c +index 24c809379274..07d060fca6f0 100644 +--- a/mm/init-mm.c ++++ b/mm/init-mm.c +@@ -55,3 +55,20 @@ void setup_initial_init_mm(void *start_code, void *end_code, + init_mm.end_data = (unsigned long)end_data; + init_mm.brk = (unsigned long)brk; + } ++ ++#ifdef CONFIG_KOI ++/* ++ * This is used to init ko_mm when creating pgtable for a ko to be isolated ++ * the ko_mm belongs to a specific ko, pgdp is allocated by koi_pgd_alloc ++ */ ++void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp) { ++ ko_mm->mm_rb = RB_ROOT; ++ ko_mm->pgd = pgdp; ++ ko_mm->mm_users = (atomic_t)ATOMIC_INIT(2); ++ ko_mm->mm_count = (atomic_t)ATOMIC_INIT(1); ++ ko_mm->mmap_lock = (struct rw_semaphore)__RWSEM_INITIALIZER(ko_mm->mmap_lock); ++ ko_mm->page_table_lock = __SPIN_LOCK_UNLOCKED(ko_mm.page_table_lock); ++ ko_mm->arg_lock = __SPIN_LOCK_UNLOCKED(ko_mm->arg_lock); ++ ko_mm->mmlist = (struct list_head)LIST_HEAD_INIT(ko_mm->mmlist); ++} ++#endif +diff --git a/mm/memory.c b/mm/memory.c +index 4ef917a182f9..28da89a19e30 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -80,6 +80,10 @@ + #include <linux/userswap.h> + #include <linux/dynamic_pool.h> + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + #include <trace/events/kmem.h> + + #include <asm/io.h> +@@ -5872,6 +5876,11 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) + + spin_lock(&mm->page_table_lock); + if (!p4d_present(*p4d)) { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(new)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)new); ++ #endif + mm_inc_nr_puds(mm); + smp_wmb(); /* See comment in pmd_install() */ + p4d_populate(mm, p4d, new); +@@ -5896,6 +5905,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) + + ptl = pud_lock(mm, pud); + if (!pud_present(*pud)) { ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr = __phys_to_iee(__pa(new)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)new); ++ #endif + mm_inc_nr_pmds(mm); + smp_wmb(); /* See comment in pmd_install() */ + pud_populate(mm, pud, new); +diff --git a/mm/slub.c b/mm/slub.c +index ee3e32cdb7fd..20a45a7feed5 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -42,6 +42,11 @@ + #include <kunit/test-bug.h> + #include <linux/sort.h> + ++#ifdef CONFIG_IEE ++#include <linux/iee-func.h> ++#include <asm/iee-access.h> ++#endif ++ + #include <linux/debugfs.h> + #include <trace/events/kmem.h> + +@@ -317,6 +322,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) + /* + * Tracking user of a slab. + */ ++#ifndef CONFIG_IEE + #define TRACK_ADDRS_COUNT 16 + struct track { + unsigned long addr; /* Called from address */ +@@ -329,6 +335,7 @@ struct track { + }; + + enum track_item { TRACK_ALLOC, TRACK_FREE }; ++#endif + + #ifdef SLAB_SUPPORTS_SYSFS + static int sysfs_slab_add(struct kmem_cache *); +@@ -379,7 +386,9 @@ static struct workqueue_struct *flushwq; + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ ++#ifndef CONFIG_IEE + typedef struct { unsigned long v; } freeptr_t; ++#endif + + /* + * Returns freelist pointer (ptr). With hardening, this is obfuscated +@@ -464,7 +473,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) + #endif + + freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); ++ else ++ *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); ++ #else + *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); ++ #endif + } + + /* Loop over all objects in a slab */ +@@ -809,7 +825,34 @@ static void set_track_update(struct kmem_cache *s, void *object, + depot_stack_handle_t handle) + { + struct track *p = get_track(s, object, alloc); ++#ifdef CONFIG_IEE ++ struct track tmp; ++#endif + ++#ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ { ++ tmp = *p; ++ #ifdef CONFIG_STACKDEPOT ++ tmp.handle = handle; ++ #endif ++ tmp.addr = addr; ++ tmp.cpu = smp_processor_id(); ++ tmp.pid = current->pid; ++ tmp.when = jiffies; ++ iee_set_track(p,&tmp); ++ } ++ else ++ { ++ #ifdef CONFIG_STACKDEPOT ++ p->handle = handle; ++ #endif ++ p->addr = addr; ++ p->cpu = smp_processor_id(); ++ p->pid = current->pid; ++ p->when = jiffies; ++ } ++#else + #ifdef CONFIG_STACKDEPOT + p->handle = handle; + #endif +@@ -817,6 +860,7 @@ static void set_track_update(struct kmem_cache *s, void *object, + p->cpu = smp_processor_id(); + p->pid = current->pid; + p->when = jiffies; ++#endif + } + + static __always_inline void set_track(struct kmem_cache *s, void *object, +@@ -835,7 +879,14 @@ static void init_tracking(struct kmem_cache *s, void *object) + return; + + p = get_track(s, object, TRACK_ALLOC); ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ iee_memset(p, 0, 2*sizeof(struct track)); ++ else ++ memset(p, 0, 2*sizeof(struct track)); ++ #else + memset(p, 0, 2*sizeof(struct track)); ++ #endif + } + + static void print_track(const char *s, struct track *t, unsigned long pr_time) +@@ -1045,7 +1096,14 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) + unsigned int poison_size = s->object_size; + + if (s->flags & SLAB_RED_ZONE) { ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ iee_memset(p - s->red_left_pad, val, s->red_left_pad); ++ else ++ memset(p - s->red_left_pad, val, s->red_left_pad); ++ #else + memset(p - s->red_left_pad, val, s->red_left_pad); ++ #endif + + if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { + /* +@@ -1058,12 +1116,34 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) + } + + if (s->flags & __OBJECT_POISON) { ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ { ++ iee_memset(p, POISON_FREE, poison_size - 1); ++ iee_memset(&p[poison_size - 1], POISON_END, 1); ++ } ++ else ++ { ++ memset(p, POISON_FREE, poison_size - 1); ++ p[poison_size - 1] = POISON_END; ++ } ++ #else + memset(p, POISON_FREE, poison_size - 1); + p[poison_size - 1] = POISON_END; ++ #endif + } + +- if (s->flags & SLAB_RED_ZONE) ++ if (s->flags & SLAB_RED_ZONE) { ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ iee_memset(p + poison_size, val, s->inuse - poison_size); ++ else ++ memset(p + poison_size, val, s->inuse - poison_size); ++ #else + memset(p + poison_size, val, s->inuse - poison_size); ++ #endif ++ ++ } + } + + static void restore_bytes(struct kmem_cache *s, char *message, u8 data, +@@ -1433,7 +1513,14 @@ void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) + return; + + metadata_access_enable(); ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ iee_memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); ++ else ++ memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); ++ #else + memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); ++ #endif + metadata_access_disable(); + } + +@@ -2015,6 +2102,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) + void *start, *p, *next; + int idx; + bool shuffle; ++ #ifdef CONFIG_IEE ++ unsigned int order; ++ #endif + + flags &= gfp_allowed_mask; + +@@ -2029,6 +2119,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) + alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM; + + slab = alloc_slab_page(alloc_gfp, node, oo); ++ #ifdef CONFIG_IEE ++ order = oo_order(oo); ++ #endif + if (unlikely(!slab)) { + oo = s->min; + alloc_gfp = flags; +@@ -2037,6 +2130,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) + * Try a lower order alloc if possible + */ + slab = alloc_slab_page(alloc_gfp, node, oo); ++ #ifdef CONFIG_IEE ++ order = oo_order(oo); ++ #endif + if (unlikely(!slab)) + return NULL; + stat(s, ORDER_FALLBACK); +@@ -2046,6 +2142,45 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) + slab->inuse = 0; + slab->frozen = 0; + ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ { ++ int i; ++ for(i = 0; i < (0x1 << order); i++) ++ { ++ unsigned long iee_addr = __phys_to_iee(page_to_phys(folio_page(slab_folio(slab), i))); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)page_address(folio_page(slab_folio(slab), i))); ++ } ++ } ++ ++ // If the page belongs to a task_struct, alloc token for it and set iee&lm va. ++ if(strcmp(s->name, "task_struct") == 0) ++ { ++ int i; ++ for(i = 0; i < (0x1 << order); i++) ++ { ++ void *token_addr = (void *)__phys_to_iee(page_to_phys(folio_page(slab_folio(slab), i))); ++ // Get lm va of the page. ++ void *alloc_token = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); ++ iee_set_token_page_valid(token_addr, alloc_token); ++ set_iee_page_valid(__phys_to_iee(__pa(alloc_token))); ++ iee_set_logical_mem_ro((unsigned long)alloc_token); ++ } ++ } ++ #else ++ #ifdef CONFIG_KOI ++ if (strcmp(s->name, "task_struct") == 0) { ++ int i; ++ for (i = 0; i < (0x1 << order); i++) { ++ void *token_addr = __phys_to_virt(page_to_phys(page + i)) + koi_offset; ++ void *alloc_token = __get_free_page(GFP_KERNEL | __GFP_ZERO); ++ koi_add_page_mapping(token_addr, alloc_token); ++ } ++ } ++ #endif ++ #endif ++ + account_slab(slab, oo_order(oo), s, flags); + + slab->slab_cache = s; +@@ -2098,6 +2233,67 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) + __folio_clear_slab(folio); + mm_account_reclaimed_pages(pages); + unaccount_slab(slab, order, s); ++ ++ #ifdef CONFIG_IEE ++ if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) ++ { ++ int i; ++ for(i = 0; i < (0x1 << order); i++) ++ { ++ unsigned long iee_addr = __phys_to_iee(page_to_phys(folio_page(folio, i))); ++ set_iee_page_invalid(iee_addr); ++ iee_set_logical_mem_rw((unsigned long)page_address(folio_page(folio, i))); ++ } ++ } ++ // If the page containing this token is empty, free it and restore iee&lm va. ++ if(strcmp(s->name, "task_struct") == 0) ++ { ++ int i; ++ for(i = 0; i < (0x1 << order); i++) ++ { ++ void *token_addr = (void *)__phys_to_iee(page_to_phys(folio_page(folio, i))); ++ unsigned long flags; ++ unsigned long res; ++ local_irq_save(flags); ++ asm volatile("at s1e1r, %0"::"r"(token_addr)); ++ isb(); ++ res = read_sysreg(par_el1); ++ local_irq_restore(flags); ++ if(!(res & 0x1)) ++ { ++ // Get lm va of the page. ++ void *token_page = __va(res & PTE_ADDR_MASK); ++ iee_set_token_page_invalid(token_addr); ++ set_iee_page_invalid(__phys_to_iee(__pa(token_page))); ++ iee_set_logical_mem_rw((unsigned long)token_page); ++ free_page((unsigned long)token_page); ++ } ++ } ++ } ++ #else ++ #ifdef CONFIG_KOI ++ if(strcmp(s->name, "task_struct") == 0) ++ { ++ int i; ++ for(i = 0; i < (0x1 << order); i++) ++ { ++ void *token_addr = __phys_to_virt(page_to_phys(page + i)) + koi_offset; ++ unsigned long flags; ++ local_irq_save(flags); ++ asm volatile("at s1e1r, %0"::"r"(token_addr)); ++ isb(); ++ unsigned long res = read_sysreg(par_el1); ++ local_irq_restore(flags); ++ if(!(res & 0x1)) ++ { ++ koi_remove_page_mapping(token_addr); ++ free_page(__va(res & PTE_ADDR_MASK)); ++ } ++ } ++ } ++ #endif ++ #endif ++ + __free_pages(&folio->page, order); + } + +diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c +index a2cbe44c48e1..7cf05d293312 100644 +--- a/mm/sparse-vmemmap.c ++++ b/mm/sparse-vmemmap.c +@@ -28,6 +28,10 @@ + #include <linux/vmalloc.h> + #include <linux/sched.h> + ++#ifdef CONFIG_PTP ++#include <linux/iee-func.h> ++#endif ++ + #include <asm/dma.h> + #include <asm/pgalloc.h> + +@@ -146,6 +150,9 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, + struct page *reuse) + { + pte_t *pte = pte_offset_kernel(pmd, addr); ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + if (pte_none(ptep_get(pte))) { + pte_t entry; + void *p; +@@ -167,6 +174,11 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, + get_page(reuse); + p = page_to_virt(reuse); + } ++#ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(p)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)p); ++#endif + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + set_pte_at(&init_mm, addr, pte, entry); + } +@@ -176,11 +188,20 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, + static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) + { + void *p = vmemmap_alloc_block(size, node); ++ #ifdef CONFIG_PTP ++ unsigned long iee_addr; ++ #endif + + if (!p) + return NULL; + memset(p, 0, size); + ++ #ifdef CONFIG_PTP ++ iee_addr = __phys_to_iee(__pa(p)); ++ set_iee_page_valid(iee_addr); ++ iee_set_logical_mem_ro((unsigned long)p); ++ #endif ++ + return p; + } + +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index e6058942a084..27a006728009 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -3431,7 +3431,7 @@ static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private) + + if (WARN_ON_ONCE(pfn_valid(pfn))) + return -EINVAL; +- ++ + ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); + set_pte_at(&init_mm, addr, pte, ptent); + +diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c +index c42ddd85ff1f..4714b4f2be08 100644 +--- a/net/dns_resolver/dns_key.c ++++ b/net/dns_resolver/dns_key.c +@@ -34,6 +34,10 @@ + #include <keys/user-type.h> + #include "internal.h" + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + MODULE_DESCRIPTION("DNS Resolver"); + MODULE_AUTHOR("Wang Lei"); + MODULE_LICENSE("GPL"); +@@ -365,8 +369,13 @@ static int __init init_dns_resolver(void) + /* instruct request_key() to use this special keyring as a cache for + * the results it looks up */ + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(cred,keyring); ++ iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); ++ #else + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; ++ #endif + dns_resolver_cache = cred; + + kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); +diff --git a/security/commoncap.c b/security/commoncap.c +index bc0521104197..d7d3b7cc13e8 100644 +--- a/security/commoncap.c ++++ b/security/commoncap.c +@@ -26,6 +26,10 @@ + #include <linux/personality.h> + #include <linux/mnt_idmapping.h> + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + /* + * If a non-root user executes a setuid-root binary in + * !secure(SECURE_NOROOT) mode, then we raise capabilities. +@@ -266,6 +270,15 @@ int cap_capset(struct cred *new, + if (!cap_issubset(*effective, *permitted)) + return -EPERM; + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,*effective); ++ iee_set_cred_cap_inheritable(new,*inheritable); ++ iee_set_cred_cap_permitted(new,*permitted); ++ ++ iee_set_cred_cap_ambient(new,cap_intersect(new->cap_ambient, ++ cap_intersect(*permitted, ++ *inheritable))); ++ #else + new->cap_effective = *effective; + new->cap_inheritable = *inheritable; + new->cap_permitted = *permitted; +@@ -277,6 +290,7 @@ int cap_capset(struct cred *new, + new->cap_ambient = cap_intersect(new->cap_ambient, + cap_intersect(*permitted, + *inheritable)); ++ #endif + if (WARN_ON(!cap_ambient_invariant_ok(new))) + return -EINVAL; + return 0; +@@ -601,9 +615,16 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, + * pP' = (X & fP) | (pI & fI) + * The addition of pA' is handled later. + */ ++#ifdef CONFIG_CREDP ++ kernel_cap_t temp = new->cap_permitted; ++ temp.val = (new->cap_bset.val & caps->permitted.val) | ++ (new->cap_inheritable.val & caps->inheritable.val); ++ iee_set_cred_cap_permitted(new,temp); ++#else + new->cap_permitted.val = + (new->cap_bset.val & caps->permitted.val) | + (new->cap_inheritable.val & caps->inheritable.val); ++#endif + + if (caps->permitted.val & ~new->cap_permitted.val) + /* insufficient to execute correctly */ +@@ -726,7 +747,15 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, + int rc = 0; + struct cpu_vfs_cap_data vcaps; + ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = bprm->cred->cap_permitted; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_permitted(bprm->cred, tmp_cap); ++ } while (0); ++ #else + cap_clear(bprm->cred->cap_permitted); ++ #endif + + if (!file_caps_enabled) + return 0; +@@ -757,7 +786,15 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, + + out: + if (rc) ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = bprm->cred->cap_permitted; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_permitted(bprm->cred, tmp_cap); ++ } while (0); ++ #else + cap_clear(bprm->cred->cap_permitted); ++ #endif + + return rc; + } +@@ -809,8 +846,13 @@ static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, + */ + if (__is_eff(root_uid, new) || __is_real(root_uid, new)) { + /* pP' = (cap_bset & ~0) | (pI & ~0) */ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_permitted(new,cap_combine(old->cap_bset, ++ old->cap_inheritable)); ++ #else + new->cap_permitted = cap_combine(old->cap_bset, + old->cap_inheritable); ++ #endif + } + /* + * If only the real uid is 0, we do not set the effective bit. +@@ -919,34 +961,71 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) + /* downgrade; they get no more than they had, and maybe less */ + if (!ns_capable(new->user_ns, CAP_SETUID) || + (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_euid(new,new->uid); ++ iee_set_cred_egid(new,new->gid); ++ #else + new->euid = new->uid; + new->egid = new->gid; ++ #endif + } ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_permitted(new,cap_intersect(new->cap_permitted, ++ old->cap_permitted)); ++ #else + new->cap_permitted = cap_intersect(new->cap_permitted, + old->cap_permitted); ++ #endif + } + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_fsuid(new,new->euid); ++ iee_set_cred_suid(new,new->euid); ++ iee_set_cred_fsgid(new,new->egid); ++ iee_set_cred_sgid(new,new->egid); ++ #else + new->suid = new->fsuid = new->euid; + new->sgid = new->fsgid = new->egid; ++ #endif + + /* File caps or setid cancels ambient. */ + if (has_fcap || is_setid) ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = new->cap_ambient; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_ambient(new, tmp_cap); ++ } while (0); ++ #else + cap_clear(new->cap_ambient); ++ #endif + + /* + * Now that we've computed pA', update pP' to give: + * pP' = (X & fP) | (pI & fI) | pA' + */ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_permitted(new,cap_combine(new->cap_permitted, new->cap_ambient)); ++ #else + new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient); ++ #endif + + /* + * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set, + * this is the same as pE' = (fE ? pP' : 0) | pA'. + */ + if (effective) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,new->cap_permitted); ++ #else + new->cap_effective = new->cap_permitted; ++ #endif + else ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,new->cap_ambient); ++ #else + new->cap_effective = new->cap_ambient; ++ #endif + + if (WARN_ON(!cap_ambient_invariant_ok(new))) + return -EPERM; +@@ -957,7 +1036,11 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) + return ret; + } + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_securebits(new,new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); ++ #else + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); ++ #endif + + if (WARN_ON(!cap_ambient_invariant_ok(new))) + return -EPERM; +@@ -1092,8 +1175,21 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) + !uid_eq(new->euid, root_uid) && + !uid_eq(new->suid, root_uid))) { + if (!issecure(SECURE_KEEP_CAPS)) { ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = new->cap_permitted; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_permitted(new, tmp_cap); ++ } while (0); ++ do { ++ kernel_cap_t tmp_cap = new->cap_effective; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_effective(new, tmp_cap); ++ } while (0); ++ #else + cap_clear(new->cap_permitted); + cap_clear(new->cap_effective); ++ #endif + } + + /* +@@ -1101,12 +1197,32 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) + * by exec to drop capabilities. We should make sure that + * this remains the case. + */ ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = new->cap_ambient; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_ambient(new, tmp_cap); ++ } while (0); ++ #else + cap_clear(new->cap_ambient); ++ #endif + } + if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid)) ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = new->cap_effective; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_effective(new, tmp_cap); ++ } while (0); ++ #else + cap_clear(new->cap_effective); ++ #endif + if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,new->cap_permitted); ++ #else + new->cap_effective = new->cap_permitted; ++ #endif + } + + /** +@@ -1142,13 +1258,22 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + kuid_t root_uid = make_kuid(old->user_ns, 0); + if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,cap_drop_fs_set(new->cap_effective)); ++ #else + new->cap_effective = + cap_drop_fs_set(new->cap_effective); ++ #endif + + if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid)) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_cap_effective(new,cap_raise_fs_set(new->cap_effective, ++ new->cap_permitted)); ++ #else + new->cap_effective = + cap_raise_fs_set(new->cap_effective, + new->cap_permitted); ++ #endif + } + break; + +@@ -1243,7 +1368,15 @@ static int cap_prctl_drop(unsigned long cap) + new = prepare_creds(); + if (!new) + return -ENOMEM; ++ #ifdef CONFIG_CREDP ++ { ++ kernel_cap_t tmp = new->cap_bset; ++ cap_lower(tmp, cap); ++ iee_set_cred_cap_bset(new, tmp); ++ } ++ #else + cap_lower(new->cap_bset, cap); ++ #endif + return commit_creds(new); + } + +@@ -1319,7 +1452,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, + new = prepare_creds(); + if (!new) + return -ENOMEM; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_securebits(new,arg2); ++ #else + new->securebits = arg2; ++ #endif + return commit_creds(new); + + case PR_GET_SECUREBITS: +@@ -1338,9 +1475,17 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, + if (!new) + return -ENOMEM; + if (arg2) ++ #ifdef CONFIG_CREDP ++ iee_set_cred_securebits(new,new->securebits | issecure_mask(SECURE_KEEP_CAPS)); ++ #else + new->securebits |= issecure_mask(SECURE_KEEP_CAPS); ++ #endif + else ++ #ifdef CONFIG_CREDP ++ iee_set_cred_securebits(new,new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); ++ #else + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); ++ #endif + return commit_creds(new); + + case PR_CAP_AMBIENT: +@@ -1351,7 +1496,15 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, + new = prepare_creds(); + if (!new) + return -ENOMEM; ++ #ifdef CONFIG_CREDP ++ do { ++ kernel_cap_t tmp_cap = new->cap_ambient; ++ tmp_cap.val = 0; ++ iee_set_cred_cap_ambient(new, tmp_cap); ++ } while (0); ++ #else + cap_clear(new->cap_ambient); ++ #endif + return commit_creds(new); + } + +@@ -1375,9 +1528,25 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, + if (!new) + return -ENOMEM; + if (arg2 == PR_CAP_AMBIENT_RAISE) ++ #ifdef CONFIG_CREDP ++ { ++ kernel_cap_t tmp = new->cap_ambient; ++ cap_raise(tmp, arg3); ++ iee_set_cred_cap_ambient(new, tmp); ++ } ++ #else + cap_raise(new->cap_ambient, arg3); ++ #endif + else ++ #ifdef CONFIG_CREDP ++ { ++ kernel_cap_t tmp = new->cap_ambient; ++ cap_lower(tmp, arg3); ++ iee_set_cred_cap_ambient(new, tmp); ++ } ++ #else + cap_lower(new->cap_ambient, arg3); ++ #endif + return commit_creds(new); + } + +diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c +index 19be69fa4d05..6cb164dfc19b 100644 +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -23,6 +23,9 @@ + #include <linux/uaccess.h> + #include <keys/request_key_auth-type.h> + #include "internal.h" ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif + + #define KEY_MAX_DESC_SIZE 4096 + +@@ -1155,7 +1158,11 @@ static int keyctl_change_reqkey_auth(struct key *key) + return -ENOMEM; + + key_put(new->request_key_auth); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_request_key_auth(new,key_get(key)); ++ #else + new->request_key_auth = key_get(key); ++ #endif + + return commit_creds(new); + } +@@ -1432,7 +1439,11 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) + } + + set: ++ #ifdef CONFIG_CREDP ++ iee_set_cred_jit_keyring(new,reqkey_defl); ++ #else + new->jit_keyring = reqkey_defl; ++ #endif + commit_creds(new); + return old_setting; + error: +@@ -1644,9 +1655,17 @@ long keyctl_session_to_parent(void) + cred = cred_alloc_blank(); + if (!cred) + goto error_keyring; ++ #ifdef CONFIG_CREDP ++ newwork = (struct rcu_head *)(cred->rcu.func); ++ #else + newwork = &cred->rcu; ++ #endif + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_session_keyring(cred,key_ref_to_ptr(keyring_r)); ++ #else + cred->session_keyring = key_ref_to_ptr(keyring_r); ++ #endif + keyring_r = NULL; + init_task_work(newwork, key_change_session_keyring); + +@@ -1705,7 +1724,11 @@ long keyctl_session_to_parent(void) + write_unlock_irq(&tasklist_lock); + rcu_read_unlock(); + if (oldwork) ++ #ifdef CONFIG_CREDP ++ put_cred(*(struct cred **)(oldwork + 1)); ++ #else + put_cred(container_of(oldwork, struct cred, rcu)); ++ #endif + if (newwork) + put_cred(cred); + return ret; +diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c +index b5d5333ab330..aaa2a2347d84 100644 +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -19,6 +19,10 @@ + #include <keys/request_key_auth-type.h> + #include "internal.h" + ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif ++ + /* Session keyring create vs join semaphore */ + static DEFINE_MUTEX(key_session_mutex); + +@@ -232,7 +236,11 @@ int install_thread_keyring_to_cred(struct cred *new) + if (IS_ERR(keyring)) + return PTR_ERR(keyring); + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_thread_keyring(new,keyring); ++ #else + new->thread_keyring = keyring; ++ #endif + return 0; + } + +@@ -279,7 +287,11 @@ int install_process_keyring_to_cred(struct cred *new) + if (IS_ERR(keyring)) + return PTR_ERR(keyring); + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_process_keyring(new,keyring); ++ #else + new->process_keyring = keyring; ++ #endif + return 0; + } + +@@ -338,7 +350,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) + + /* install the keyring */ + old = cred->session_keyring; ++ #ifdef CONFIG_CREDP ++ iee_set_cred_session_keyring(cred,keyring); ++ #else + cred->session_keyring = keyring; ++ #endif + + if (old) + key_put(old); +@@ -911,7 +927,11 @@ long join_session_keyring(const char *name) + void key_change_session_keyring(struct callback_head *twork) + { + const struct cred *old = current_cred(); ++ #ifdef CONFIG_CREDP ++ struct cred *new = *(struct cred **)(twork + 1); ++ #else + struct cred *new = container_of(twork, struct cred, rcu); ++ #endif + + if (unlikely(current->flags & PF_EXITING)) { + put_cred(new); +@@ -925,6 +945,38 @@ void key_change_session_keyring(struct callback_head *twork) + return; + } + ++ /* If get_ucounts fails more bits are needed in the refcount */ ++ if (unlikely(!get_ucounts(old->ucounts))) { ++ WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); ++ put_cred(new); ++ return; ++ } ++ ++ #ifdef CONFIG_CREDP ++ iee_set_cred_uid(new,old-> uid); ++ iee_set_cred_euid(new,old-> euid); ++ iee_set_cred_suid(new,old-> suid); ++ iee_set_cred_fsuid(new,old->fsuid); ++ iee_set_cred_gid(new,old-> gid); ++ iee_set_cred_egid(new,old-> egid); ++ iee_set_cred_sgid(new,old-> sgid); ++ iee_set_cred_fsgid(new,old->fsgid); ++ iee_set_cred_user(new,get_uid(old->user)); ++ iee_set_cred_ucounts(new, old->ucounts); ++ iee_set_cred_user_ns(new,get_user_ns(old->user_ns)); ++ iee_set_cred_group_info(new,get_group_info(old->group_info)); ++ ++ iee_set_cred_securebits(new,old->securebits); ++ iee_set_cred_cap_inheritable(new,old->cap_inheritable); ++ iee_set_cred_cap_permitted(new,old->cap_permitted); ++ iee_set_cred_cap_effective(new,old->cap_effective); ++ iee_set_cred_cap_ambient(new,old->cap_ambient); ++ iee_set_cred_cap_bset(new,old->cap_bset); ++ ++ iee_set_cred_jit_keyring(new,old->jit_keyring); ++ iee_set_cred_thread_keyring(new,key_get(old->thread_keyring)); ++ iee_set_cred_process_keyring(new,key_get(old->process_keyring)); ++ #else + new-> uid = old-> uid; + new-> euid = old-> euid; + new-> suid = old-> suid; +@@ -948,6 +1000,7 @@ void key_change_session_keyring(struct callback_head *twork) + new->jit_keyring = old->jit_keyring; + new->thread_keyring = key_get(old->thread_keyring); + new->process_keyring = key_get(old->process_keyring); ++ #endif + + security_transfer_creds(new, old); + +diff --git a/security/security.c b/security/security.c +index 407b51719f79..74ffd7ea3f37 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -30,6 +30,9 @@ + #include <linux/string.h> + #include <linux/msg.h> + #include <net/flow.h> ++#ifdef CONFIG_CREDP ++#include <asm/iee-cred.h> ++#endif + + /* How many LSMs were built into the kernel? */ + #define LSM_COUNT (__end_lsm_info - __start_lsm_info) +@@ -570,11 +573,19 @@ EXPORT_SYMBOL(unregister_blocking_lsm_notifier); + static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) + { + if (blob_sizes.lbs_cred == 0) { ++ #ifdef CONFIG_CREDP ++ iee_set_cred_security(cred,NULL); ++ #else + cred->security = NULL; ++ #endif + return 0; + } + ++ #ifdef CONFIG_CREDP ++ iee_set_cred_security(cred,kzalloc(blob_sizes.lbs_cred, gfp)); ++ #else + cred->security = kzalloc(blob_sizes.lbs_cred, gfp); ++ #endif + if (cred->security == NULL) + return -ENOMEM; + return 0; +@@ -2950,7 +2961,11 @@ void security_cred_free(struct cred *cred) + call_void_hook(cred_free, cred); + + kfree(cred->security); ++ #ifdef CONFIG_CREDP ++ iee_set_cred_security(cred,NULL); ++ #else + cred->security = NULL; ++ #endif + } + + /** +-- +2.34.1 + |