diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 13:08:42 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 13:08:42 -0800 |
| commit | 99ca0edb41aabd888ca1548fa0391a4975740a83 (patch) | |
| tree | 8f2327f46b14e603d0bc2d8b5816f278314712da /arch/arm64/kernel | |
| parent | 4a037ad5d115b2cc79a5071a7854475f365476fa (diff) | |
| parent | 1ffa9763828cf73a4d4eaa04c29a4a89fb0708c7 (diff) | |
| download | linux-99ca0edb41aabd888ca1548fa0391a4975740a83.tar.gz linux-99ca0edb41aabd888ca1548fa0391a4975740a83.tar.bz2 linux-99ca0edb41aabd888ca1548fa0391a4975740a83.zip | |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Will Deacon:
- vDSO build improvements including support for building with BSD.
- Cleanup to the AMU support code and initialisation rework to support
cpufreq drivers built as modules.
- Removal of synthetic frame record from exception stack when entering
the kernel from EL0.
- Add support for the TRNG firmware call introduced by Arm spec
DEN0098.
- Cleanup and refactoring across the board.
- Avoid calling arch_get_random_seed_long() from
add_interrupt_randomness()
- Perf and PMU updates including support for Cortex-A78 and the v8.3
SPE extensions.
- Significant steps along the road to leaving the MMU enabled during
kexec relocation.
- Faultaround changes to initialise prefaulted PTEs as 'old' when
hardware access-flag updates are supported, which drastically
improves vmscan performance.
- CPU errata updates for Cortex-A76 (#1463225) and Cortex-A55
(#1024718)
- Preparatory work for yielding the vector unit at a finer granularity
in the crypto code, which in turn will one day allow us to defer
softirq processing when it is in use.
- Support for overriding CPU ID register fields on the command-line.
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (85 commits)
drivers/perf: Replace spin_lock_irqsave to spin_lock
mm: filemap: Fix microblaze build failure with 'mmu_defconfig'
arm64: Make CPU_BIG_ENDIAN depend on ld.bfd or ld.lld 13.0.0+
arm64: cpufeatures: Allow disabling of Pointer Auth from the command-line
arm64: Defer enabling pointer authentication on boot core
arm64: cpufeatures: Allow disabling of BTI from the command-line
arm64: Move "nokaslr" over to the early cpufeature infrastructure
KVM: arm64: Document HVC_VHE_RESTART stub hypercall
arm64: Make kvm-arm.mode={nvhe, protected} an alias of id_aa64mmfr1.vh=0
arm64: Add an aliasing facility for the idreg override
arm64: Honor VHE being disabled from the command-line
arm64: Allow ID_AA64MMFR1_EL1.VH to be overridden from the command line
arm64: cpufeature: Add an early command-line cpufeature override facility
arm64: Extract early FDT mapping from kaslr_early_init()
arm64: cpufeature: Use IDreg override in __read_sysreg_by_encoding()
arm64: cpufeature: Add global feature override facility
arm64: Move SCTLR_EL1 initialisation to EL-agnostic code
arm64: Simplify init_el2_state to be non-VHE only
arm64: Move VHE-specific SPE setup to mutate_to_vhe()
arm64: Drop early setting of MDSCR_EL2.TPMS
...
Diffstat (limited to 'arch/arm64/kernel')
30 files changed, 639 insertions, 553 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 86364ab6f13f..ed65576ce710 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -17,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o + syscall.o proton-pack.o idreg-override.o targets += efi-entry.o @@ -59,9 +59,10 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_MTE) += mte.o +obj-y += vdso-wrap.o +obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o -obj-y += vdso/ probes/ -obj-$(CONFIG_COMPAT_VDSO) += vdso32/ +obj-y += probes/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index a57cffb752e8..1184c44ea2c7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -17,7 +17,7 @@ #include <asm/sections.h> #include <linux/stop_machine.h> -#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) +#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 301784463587..a36e2fc330d4 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -99,6 +99,9 @@ int main(void) DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); + DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); + DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask)); + BLANK(); #ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a63428301f42..506a1cd37973 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -107,8 +107,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) } #ifdef CONFIG_ARM64_ERRATUM_1463225 -DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - static bool has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, int scope) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3e6331b64932..066030717a4c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -352,9 +352,12 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_END, }; +static struct arm64_ftr_override __ro_after_init no_override = { }; + struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { .name = "SYS_CTR_EL0", - .ftr_bits = ftr_ctr + .ftr_bits = ftr_ctr, + .override = &no_override, }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { @@ -544,13 +547,20 @@ static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; -#define ARM64_FTR_REG(id, table) { \ - .sys_id = id, \ - .reg = &(struct arm64_ftr_reg){ \ - .name = #id, \ - .ftr_bits = &((table)[0]), \ +#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ + .name = #id, \ + .override = (ovr), \ + .ftr_bits = &((table)[0]), \ }} +#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) + +struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar1_override; + static const struct __ftr_reg_entry { u32 sys_id; struct arm64_ftr_reg *reg; @@ -585,7 +595,8 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, + &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -594,11 +605,13 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, + &id_aa64isar1_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), - ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, + &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 0, CRn = 1, CRm = 2 */ @@ -770,6 +783,33 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); s64 ftr_new = arm64_ftr_value(ftrp, new); + s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val); + + if ((ftr_mask & reg->override->mask) == ftr_mask) { + s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new); + char *str = NULL; + + if (ftr_ovr != tmp) { + /* Unsafe, remove the override */ + reg->override->mask &= ~ftr_mask; + reg->override->val &= ~ftr_mask; + tmp = ftr_ovr; + str = "ignoring override"; + } else if (ftr_new != tmp) { + /* Override was valid */ + ftr_new = tmp; + str = "forced"; + } else if (ftr_ovr == tmp) { + /* Override was the safe value */ + str = "already set"; + } + + if (str) + pr_warn("%s[%d:%d]: %s to %llx\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift, str, tmp); + } val = arm64_ftr_set_value(ftrp, val, ftr_new); @@ -1115,14 +1155,17 @@ u64 read_sanitised_ftr_reg(u32 id) EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ - case r: return read_sysreg_s(r) + case r: val = read_sysreg_s(r); break; /* * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __read_sysreg_by_encoding(u32 sys_id) +u64 __read_sysreg_by_encoding(u32 sys_id) { + struct arm64_ftr_reg *regp; + u64 val; + switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); read_sysreg_case(SYS_ID_PFR1_EL1); @@ -1165,6 +1208,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) BUG(); return 0; } + + regp = get_arm64_ftr_reg(sys_id); + if (regp) { + val &= ~regp->override->mask; + val |= (regp->override->val & regp->override->mask); + } + + return val; } #include <linux/irqchip/arm-gic-v3.h> @@ -1455,7 +1506,7 @@ static bool cpu_has_broken_dbm(void) /* List of CPUs which have broken DBM support. */ static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 - MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 5346953e4382..9d3588450473 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -109,6 +109,55 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); } +#ifdef CONFIG_ARM64_ERRATUM_1463225 +static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static void cortex_a76_erratum_1463225_svc_handler(void) +{ + u32 reg, val; + + if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) + return; + + if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) + return; + + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); + reg = read_sysreg(mdscr_el1); + val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + write_sysreg(val, mdscr_el1); + asm volatile("msr daifclr, #8"); + isb(); + + /* We will have taken a single-step exception by this point */ + + write_sysreg(reg, mdscr_el1); + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); +} + +static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) + return false; + + /* + * We've taken a dummy step exception from the kernel to ensure + * that interrupts are re-enabled on the syscall path. Return back + * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions + * masked so that we can safely restore the mdscr and get on with + * handling the syscall. + */ + regs->pstate |= PSR_D_BIT; + return true; +} +#else /* CONFIG_ARM64_ERRATUM_1463225 */ +static void cortex_a76_erratum_1463225_svc_handler(void) { } +static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -186,7 +235,8 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); arm64_enter_el1_dbg(regs); - do_debug_exception(far, esr, regs); + if (!cortex_a76_erratum_1463225_debug_handler(regs)) + do_debug_exception(far, esr, regs); arm64_exit_el1_dbg(regs); } @@ -362,6 +412,7 @@ static void noinstr el0_svc(struct pt_regs *regs) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); enter_from_user_mode(); + cortex_a76_erratum_1463225_svc_handler(); do_el0_svc(regs); } @@ -439,6 +490,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); enter_from_user_mode(); + cortex_a76_erratum_1463225_svc_handler(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c9bae73f2621..a31a0a713c85 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -261,16 +261,16 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * In order to be able to dump the contents of struct pt_regs at the - * time the exception was taken (in case we attempt to walk the call - * stack later), chain it together with the stack frames. + * For exceptions from EL0, terminate the callchain here. + * For exceptions from EL1, create a synthetic frame record so the + * interrupted code shows up in the backtrace. */ .if \el == 0 - stp xzr, xzr, [sp, #S_STACKFRAME] + mov x29, xzr .else stp x29, x22, [sp, #S_STACKFRAME] - .endif add x29, sp, #S_STACKFRAME + .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -805,7 +805,7 @@ SYM_CODE_END(ret_to_user) // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 - add \tmp, \tmp, #(2 * PAGE_SIZE) + add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -825,7 +825,7 @@ alternative_else_nop_endif // Move from swapper_pg_dir to tramp_pg_dir .macro tramp_unmap_kernel, tmp mrs \tmp, ttbr1_el1 - sub \tmp, \tmp, #(2 * PAGE_SIZE) + sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a0dc987724ed..1e30b5550d2a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -404,10 +404,6 @@ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x5, init_task msr sp_el0, x5 // Save thread_info -#ifdef CONFIG_ARM64_PTR_AUTH - __ptrauth_keys_init_cpu x5, x6, x7, x8 -#endif - adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -436,10 +432,12 @@ SYM_FUNC_START_LOCAL(__primary_switched) #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bl kasan_early_init #endif + mov x0, x21 // pass FDT address in x0 + bl early_fdt_map // Try mapping the FDT early + bl init_feature_override // Parse cpu feature overrides #ifdef CONFIG_RANDOMIZE_BASE tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f - mov x0, x21 // pass FDT address in x0 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset @@ -447,6 +445,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) ret // to __primary_switch() 0: #endif + bl switch_to_vhe // Prefer VHE if possible add sp, sp, #16 mov x29, #0 mov x30, #0 @@ -478,13 +477,14 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -493,50 +493,11 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) eret SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) -#ifdef CONFIG_ARM64_VHE - /* - * Check for VHE being present. x2 being non-zero indicates that we - * do have VHE, and that the kernel is intended to run at EL2. - */ - mrs x2, id_aa64mmfr1_el1 - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 -#else - mov x2, xzr -#endif - cbz x2, init_el2_nvhe - - /* - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - mov_q x0, HCR_HOST_VHE_FLAGS - msr hcr_el2, x0 - isb - - init_el2_state vhe - - isb - - mov_q x0, INIT_PSTATE_EL2 - msr spsr_el2, x0 - msr elr_el2, lr - mov w0, #BOOT_CPU_MODE_EL2 - eret - -SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL) - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - */ - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mov_q x0, HCR_HOST_NVHE_FLAGS msr hcr_el2, x0 isb - init_el2_state nvhe + init_el2_state /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors @@ -623,6 +584,7 @@ SYM_FUNC_START_LOCAL(secondary_startup) /* * Common entry point for secondary CPUs. */ + bl switch_to_vhe bl __cpu_secondary_check52bitva bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir @@ -703,16 +665,9 @@ SYM_FUNC_START(__enable_mmu) offset_ttbr1 x1, x3 msr ttbr1_el1, x1 // load TTBR1 isb - msr sctlr_el1, x0 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb + + set_sctlr_el1 x0 + ret SYM_FUNC_END(__enable_mmu) @@ -883,11 +838,7 @@ SYM_FUNC_START_LOCAL(__primary_switch) tlbi vmalle1 // Remove any stale TLB entries dsb nsh - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb + set_sctlr_el1 x19 // re-enable the MMU bl __relocate_kernel #endif diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 9c9f47e9f7f4..b1cef371df2b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -16,7 +16,6 @@ #define pr_fmt(x) "hibernate: " x #include <linux/cpu.h> #include <linux/kvm_host.h> -#include <linux/mm.h> #include <linux/pm.h> #include <linux/sched.h> #include <linux/suspend.h> @@ -31,13 +30,12 @@ #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/mte.h> -#include <asm/pgalloc.h> -#include <asm/pgtable-hwdef.h> #include <asm/sections.h> #include <asm/smp.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/sysreg.h> +#include <asm/trans_pgd.h> #include <asm/virt.h> /* @@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr) } EXPORT_SYMBOL(arch_hibernation_header_restore); -static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, - unsigned long dst_addr, - pgprot_t pgprot) +static void *hibernate_page_alloc(void *arg) { - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); - if (!pudp) - return -ENOMEM; - pgd_populate(&init_mm, pgdp, pudp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); - if (!pudp) - return -ENOMEM; - p4d_populate(&init_mm, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = (void *)get_safe_page(GFP_ATOMIC); - if (!pmdp) - return -ENOMEM; - pud_populate(&init_mm, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = (void *)get_safe_page(GFP_ATOMIC); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); - - return 0; + return (void *)get_safe_page((__force gfp_t)(unsigned long)arg); } /* @@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, * page system. */ static int create_safe_exec_page(void *src_start, size_t length, - unsigned long dst_addr, phys_addr_t *phys_dst_addr) { + struct trans_pgd_info trans_info = { + .trans_alloc_page = hibernate_page_alloc, + .trans_alloc_arg = (__force void *)GFP_ATOMIC, + }; + void *page = (void *)get_safe_page(GFP_ATOMIC); - pgd_t *trans_pgd; + phys_addr_t trans_ttbr0; + unsigned long t0sz; int rc; if (!page) @@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy(page, src_start, length); __flush_icache_range((unsigned long)page, (unsigned long)page + length); - - trans_pgd = (void *)get_safe_page(GFP_ATOMIC); - if (!trans_pgd) - return -ENOMEM; - - rc = trans_pgd_map_page(trans_pgd, page, dst_addr, - PAGE_KERNEL_EXEC); + rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length, * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI * runtime services), while for a userspace-driven test_resume cycle it * points to userspace page tables (and we must point it at a zero page - * ourselves). Elsewhere we only (un)install the idmap with preemption - * disabled, so T0SZ should be as required regardless. + * ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1); + __cpu_set_tcr_t0sz(t0sz); + write_sysreg(trans_ttbr0, ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys(page); @@ -462,182 +419,6 @@ int swsusp_arch_suspend(void) return ret; } -static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) -{ - pte_t pte = READ_ONCE(*src_ptep); - - if (pte_valid(pte)) { - /* - * Resume will overwrite areas that may be marked - * read only (code, rodata). Clear the RDONLY bit from - * the temporary mappings we use during restore. - */ - set_pte(dst_ptep, pte_mkwrite(pte)); - } else if (debug_pagealloc_enabled() && !pte_none(pte)) { - /* - * debug_pagealloc will removed the PTE_VALID bit if - * the page isn't in use by the resume kernel. It may have - * been in use by the original kernel, in which case we need - * to put it back in our copy to do the restore. - * - * Before marking this entry valid, check the pfn should - * be mapped. - */ - BUG_ON(!pfn_valid(pte_pfn(pte))); - - set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); - } -} - -static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, - unsigned long end) -{ - pte_t *src_ptep; - pte_t *dst_ptep; - unsigned long addr = start; - - dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); - if (!dst_ptep) - return -ENOMEM; - pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); - dst_ptep = pte_offset_kernel(dst_pmdp, start); - - src_ptep = pte_offset_kernel(src_pmdp, start); - do { - _copy_pte(dst_ptep, src_ptep, addr); - } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); - - return 0; -} - -static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, - unsigned long end) -{ - pmd_t *src_pmdp; - pmd_t *dst_pmdp; - unsigned long next; - unsigned long addr = start; - - if (pud_none(READ_ONCE(*dst_pudp))) { - dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pmdp) - return -ENOMEM; - pud_populate(&init_mm, dst_pudp, dst_pmdp); - } - dst_pmdp = pmd_offset(dst_pudp, start); - - src_pmdp = pmd_offset(src_pudp, start); - do { - pmd_t pmd = READ_ONCE(*src_pmdp); - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - continue; - if (pmd_table(pmd)) { - if (copy_pte(dst_pmdp, src_pmdp, addr, next)) - return -ENOMEM; - } else { - set_pmd(dst_pmdp, - __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); - } - } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); - - return 0; -} - -static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, - unsigned long end) -{ - pud_t *dst_pudp; - pud_t *src_pudp; - unsigned long next; - unsigned long addr = start; - - if (p4d_none(READ_ONCE(*dst_p4dp))) { - dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pudp) - return -ENOMEM; - p4d_populate(&init_mm, dst_p4dp, dst_pudp); - } - dst_pudp = pud_offset(dst_p4dp, start); - - src_pudp = pud_offset(src_p4dp, start); - do { - pud_t pud = READ_ONCE(*src_pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - continue; - if (pud_table(pud)) { - if (copy_pmd(dst_pudp, src_pudp, addr, next)) - return -ENOMEM; - } else { - set_pud(dst_pudp, - __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); - } - } while (dst_pudp++, src_pudp++, addr = next, addr != end); - - return 0; -} - -static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, - unsigned long end) -{ - p4d_t *dst_p4dp; - p4d_t *src_p4dp; - unsigned long next; - unsigned long addr = start; - - dst_p4dp = p4d_offset(dst_pgdp, start); - src_p4dp = p4d_offset(src_pgdp, start); - do { - next = p4d_addr_end(addr, end); - if (p4d_none(READ_ONCE(*src_p4dp))) - continue; - if (copy_pud(dst_p4dp, src_p4dp, addr, next)) - return -ENOMEM; - } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); - - return 0; -} - -static int copy_ |
