From ead68df94d248c80fdbae220ae5425eb5af2e753 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Feb 2020 20:15:29 +0100 Subject: KVM: x86: enable -Werror Avoid more embarrassing mistakes. At least those that the compiler can catch. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b19ef421084d..4654e97a05cc 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -Iarch/x86/kvm +ccflags-y += -Werror KVM := ../../../virt/kvm -- cgit v1.2.3 From 6e5cf31fbe651bed7ba1df768f2e123531132417 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Feb 2020 13:28:41 +0100 Subject: x86/mce/amd: Publish the bank pointer only after setup has succeeded threshold_create_bank() creates a bank descriptor per MCA error thresholding counter which can be controlled over sysfs. It publishes the pointer to that bank in a per-CPU variable and then goes on to create additional thresholding blocks if the bank has such. However, that creation of additional blocks in allocate_threshold_blocks() can fail, leading to a use-after-free through the per-CPU pointer. Therefore, publish that pointer only after all blocks have been setup successfully. Fixes: 019f34fccfd5 ("x86, MCE, AMD: Move shared bank to node descriptor") Reported-by: Saar Amar Reported-by: Dan Carpenter Signed-off-by: Borislav Petkov Cc: Link: http://lkml.kernel.org/r/20200128140846.phctkvx5btiexvbx@kili.mountain --- arch/x86/kernel/cpu/mce/amd.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index b3a50d962851..e7313e5c497c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1198,8 +1198,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1243,16 +1244,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1260,7 +1257,7 @@ recurse: if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1345,8 +1342,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1357,9 +1352,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); -- cgit v1.2.3 From 51dede9c05df2b78acd6dcf6a17d21f0877d2d7b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Feb 2020 19:01:34 +0100 Subject: x86/mce/amd: Fix kobject lifetime Accessing the MCA thresholding controls in sysfs concurrently with CPU hotplug can lead to a couple of KASAN-reported issues: BUG: KASAN: use-after-free in sysfs_file_ops+0x155/0x180 Read of size 8 at addr ffff888367578940 by task grep/4019 and BUG: KASAN: use-after-free in show_error_count+0x15c/0x180 Read of size 2 at addr ffff888368a05514 by task grep/4454 for example. Both result from the fact that the threshold block creation/teardown code frees the descriptor memory itself instead of defining proper ->release function and leaving it to the driver core to take care of that, after all sysfs accesses have completed. Do that and get rid of the custom freeing code, fixing the above UAFs in the process. [ bp: write commit message. ] Fixes: 95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200214082801.13836-1-bp@alien8.de --- arch/x86/kernel/cpu/mce/amd.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e7313e5c497c..52de616a8065 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1163,9 +1163,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1367,8 +1370,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1378,13 +1385,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) -- cgit v1.2.3 From d3f703c4359ff06619b2322b91f69710453e6b6d Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 11 Feb 2020 11:24:33 -0800 Subject: mips: vdso: fix 'jalr t9' crash in vdso code Observed that when kernel is built with Yocto mips64-poky-linux-gcc, and mips64-poky-linux-gnun32-gcc toolchain, resulting vdso contains 'jalr t9' instructions in its code and since in vdso case nobody sets GOT table code crashes when instruction reached. On other hand observed that when kernel is built mips-poky-linux-gcc toolchain, the same 'jalr t9' instruction are replaced with PC relative function calls using 'bal' instructions. The difference boils down to -mrelax-pic-calls and -mexplicit-relocs gcc options that gets different default values depending on gcc target triplets and corresponding binutils. -mrelax-pic-calls got enabled by default only in mips-poky-linux-gcc case. MIPS binutils ld relies on R_MIPS_JALR relocation to convert 'jalr t9' into 'bal' and such relocation is generated only if -mrelax-pic-calls option is on. Please note 'jalr t9' conversion to 'bal' can happen only to static functions. These static PIC calls use mips local GOT entries that are supposed to be filled with start of DSO value by run-time linker (missing in VDSO case) and they do not have dynamic relocations. Global mips GOT entries must have dynamic relocations and they should be prevented by cmd_vdso_check Makefile rule. Solution call out -mrelax-pic-calls and -mexplicit-relocs options explicitly while compiling MIPS vdso code. That would get correct and consistent between different toolchains behaviour. Reported-by: Bruce Ashfield Signed-off-by: Victor Kamensky Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Vincenzo Frascino Cc: richard.purdie@linuxfoundation.org --- arch/mips/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index aa89a41dc5dd..848baeaef1f8 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -33,6 +33,7 @@ endif cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \ + -mrelax-pic-calls -mexplicit-relocs \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) -- cgit v1.2.3 From 07015d7a103c4420b69a287b8ef4d2535c0f4106 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 15 Feb 2020 12:38:36 -0800 Subject: MIPS: Disable VDSO time functionality on microMIPS A check we're about to add to pick up on function calls that depend on bogus use of the GOT in the VDSO picked up on instances of such function calls in microMIPS builds. Since the code appears genuinely problematic, and given the relatively small amount of use & testing that microMIPS sees, go ahead & disable the VDSO for microMIPS builds. Signed-off-by: Paul Burton --- arch/mips/vdso/Makefile | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 848baeaef1f8..b5e0bd82d47f 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -52,6 +52,8 @@ endif CFLAGS_REMOVE_vgettimeofday.o = -pg +DISABLE_VDSO := n + # # For the pre-R6 code in arch/mips/vdso/vdso.h for locating # the base address of VDSO, the linker will emit a R_MIPS_PC32 @@ -65,11 +67,24 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) - ccflags-vdso += -DDISABLE_MIPS_VDSO + DISABLE_VDSO := y endif endif +# +# GCC (at least up to version 9.2) appears to emit function calls that make use +# of the GOT when targeting microMIPS, which we can't use in the VDSO due to +# the lack of relocations. As such, we disable the VDSO for microMIPS builds. +# +ifdef CONFIG_CPU_MICROMIPS + DISABLE_VDSO := y +endif + +ifeq ($(DISABLE_VDSO),y) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) + ccflags-vdso += -DDISABLE_MIPS_VDSO +endif + # VDSO linker flags. VDSO_LDFLAGS := \ -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \ -- cgit v1.2.3 From 976c23af3ee5bd3447a7bfb6c356ceb4acf264a6 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 11 Feb 2020 11:24:34 -0800 Subject: mips: vdso: add build time check that no 'jalr t9' calls left vdso shared object cannot have GOT based PIC 'jalr t9' calls because nobody set GOT table in vdso. Contributing into vdso .o files are compiled in PIC mode and as result for internal static functions calls compiler will generate 'jalr t9' instructions. Those are supposed to be converted into PC relative 'bal' calls by linker when relocation are processed. Mips global GOT entries do have dynamic relocations and they will be caught by cmd_vdso_check Makefile rule. Static PIC calls go through mips local GOT entries that do not have dynamic relocations. For those 'jalr t9' calls could be present but without dynamic relocations and they need to be converted to 'bal' calls by linker. Add additional build time check to make sure that no 'jalr t9' slip through because of some toolchain misconfiguration that prevents 'jalr t9' to 'bal' conversion. Signed-off-by: Victor Kamensky Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Vincenzo Frascino Cc: bruce.ashfield@gmail.com Cc: richard.purdie@linuxfoundation.org --- arch/mips/vdso/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index b5e0bd82d47f..77374c1f0c77 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -97,12 +97,18 @@ GCOV_PROFILE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n +# Check that we don't have PIC 'jalr t9' calls left +quiet_cmd_vdso_mips_check = VDSOCHK $@ + cmd_vdso_mips_check = if $(OBJDUMP) --disassemble $@ | egrep -h "jalr.*t9" > /dev/null; \ + then (echo >&2 "$@: PIC 'jalr t9' calls are not supported"; \ + rm -f $@; /bin/false); fi + # # Shared build commands. # quiet_cmd_vdsold_and_vdso_check = LD $@ - cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) + cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check) quiet_cmd_vdsold = VDSO $@ cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ -- cgit v1.2.3 From 97e914b7de3c943011779b979b8093fdc0d85722 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Wed, 12 Feb 2020 10:24:55 +1300 Subject: MIPS: cavium_octeon: Fix syncw generation. The Cavium Octeon CPU uses a special sync instruction for implementing wmb, and due to a CPU bug, the instruction must appear twice. A macro had been defined to hide this: #define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) which was intended to evaluate to 2 for __SYNC_wmb, and 1 for any other type of sync. However, this expression is evaluated by the assembler, and not the compiler, and the result of '==' in the assembler is 0 or -1, not 0 or 1 as it is in C. The net result was wmb() producing no code at all. The simple fix in this patch is to change the '+' to '-'. Fixes: bf92927251b3 ("MIPS: barrier: Add __SYNC() infrastructure") Signed-off-by: Mark Tomlinson Tested-by: Chris Packham Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/include/asm/sync.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h index 7c6a1095f556..aabd097933fe 100644 --- a/arch/mips/include/asm/sync.h +++ b/arch/mips/include/asm/sync.h @@ -155,9 +155,11 @@ * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use * optimized memory barrier primitives."). Here we specify that the affected * sync instructions should be emitted twice. + * Note that this expression is evaluated by the assembler (not the compiler), + * and that the assembler evaluates '==' as 0 or -1, not 0 or 1. */ #ifdef CONFIG_CPU_CAVIUM_OCTEON -# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) +# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb)) #else # define __SYNC_rpt(type) 1 #endif -- cgit v1.2.3 From bef8e2dfceed6daeb6ca3e8d33f9c9d43b926580 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Feb 2020 21:19:22 +0100 Subject: MIPS: VPE: Fix a double free and a memory leak in 'release_vpe()' Pointer on the memory allocated by 'alloc_progmem()' is stored in 'v->load_addr'. So this is this memory that should be freed by 'release_progmem()'. 'release_progmem()' is only a call to 'kfree()'. With the current code, there is both a double free and a memory leak. Fix it by passing the correct pointer to 'release_progmem()'. Fixes: e01402b115ccc ("More AP / SP bits for the 34K, the Malta bits and things. Still wants") Signed-off-by: Christophe JAILLET Signed-off-by: Paul Burton Cc: ralf@linux-mips.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: kernel-janitors@vger.kernel.org --- arch/mips/kernel/vpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6176b9acba95..d0d832ab3d3b 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } -- cgit v1.2.3 From d4f194ed9eb9841a8f978710e4d24296f791a85b Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 7 Feb 2020 15:57:31 +1100 Subject: powerpc/eeh: Fix deadlock handling dead PHB Recovering a dead PHB can currently cause a deadlock as the PCI rescan/remove lock is taken twice. This is caused as part of an existing bug in eeh_handle_special_event(). The pe is processed while traversing the PHBs even though the pe is unrelated to the loop. This causes the pe to be, incorrectly, processed more than once. Untangling this section can move the pe processing out of the loop and also outside the locked section, correcting both problems. Fixes: 2e25505147b8 ("powerpc/eeh: Fix crash when edev->pdev changes") Cc: stable@vger.kernel.org # 5.4+ Signed-off-by: Sam Bobroff Reviewed-by: Frederic Barrat Tested-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0547e82dbf90ee0729a2979a8cac5c91665c621f.1581051445.git.sbobroff@linux.ibm.com --- arch/powerpc/kernel/eeh_driver.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a1eaffe868de..7b048cee767c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1184,6 +1184,17 @@ void eeh_handle_special_event(void) eeh_pe_state_mark(pe, EEH_PE_RECOVERING); eeh_handle_normal_event(pe); } else { + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) + edev->mode &= ~EEH_DEV_NO_HANDLER; + + /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_pe_report( + "error_detected(permanent failure)", pe, + eeh_report_failure, NULL); + pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -1192,16 +1203,6 @@ void eeh_handle_special_event(void) (phb_pe->state & EEH_PE_RECOVERING)) continue; - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) - edev->mode &= ~EEH_DEV_NO_HANDLER; - - /* Notify all devices to be down */ - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_pe_report( - "error_detected(permanent failure)", pe, - eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " -- cgit v1.2.3 From f2b67ef90b0d5eca0f2255e02cf2f620bc0ddcdb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Feb 2020 13:50:28 +0000 Subject: powerpc/hugetlb: Fix 512k hugepages on 8xx with 16k page size Commit 55c8fc3f4930 ("powerpc/8xx: reintroduce 16K pages with HW assistance") redefined pte_t as a struct of 4 pte_basic_t, because in 16K pages mode there are four identical entries in the page table. But the size of hugepage tables is calculated based of the size of (void *). Therefore, we end up with page tables of size 1k instead of 4k for 512k pages. As 512k hugepage tables are the same size as standard page tables, ie 4k, use the standard page tables instead of PGT_CACHE tables. Fixes: 3fb69c6a1a13 ("powerpc/8xx: Enable 512k hugepage support with HW assistance") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/90ec56a2315be602494619ed0223bba3b0b8d619.1580997007.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/hugetlbpage.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 73d4873fc7f8..33b3461d91e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,20 +53,24 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); + new = NULL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = PGT_CACHE(PTE_INDEX_SIZE); + cachep = NULL; num_hugepd = 1; + new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; + new = NULL; } - if (!cachep) { + if (!cachep && !new) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + if (cachep) + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -97,7 +101,10 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - kmem_cache_free(cachep, new); + if (cachep) + kmem_cache_free(cachep, new); + else + pte_free(mm, new); } else { kmemleak_ignore(new); } @@ -324,8 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, - get_hugepd_cache_index(PTE_INDEX_SIZE)); + pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); @@ -639,12 +645,13 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_cache_add(PTE_INDEX_SIZE); - else if (pdshift > shift) - pgtable_cache_add(pdshift - shift); - else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) + if (pdshift > shift) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(pdshift - shift); + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); + } configured = true; } -- cgit v1.2.3 From 50a175dd18de7a647e72aca7daf4744e3a5a81e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 9 Feb 2020 16:02:41 +0000 Subject: powerpc/hugetlb: Fix 8M hugepages on 8xx With HW assistance all page tables must be 4k aligned, the 8xx drops the last 12 bits during the walk. Redefine HUGEPD_SHIFT_MASK to mask last 12 bits out. HUGEPD_SHIFT_MASK is used to for alignment of page table cache. Fixes: 22569b881d37 ("powerpc/8xx: Enable 8M hugepage support with HW assistance") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/778b1a248c4c7ca79640eeff7740044da6a220a0.1581264115.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/page.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 86332080399a..080a0bf8e54b 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -295,8 +295,13 @@ static inline bool pfn_valid(unsigned long pfn) /* * Some number of bits at the level of the page table that points to * a hugepte are used to encode the size. This masks those bits. + * On 8xx, HW assistance requires 4k alignment for the hugepte. */ +#ifdef CONFIG_PPC_8xx +#define HUGEPD_SHIFT_MASK 0xfff +#else #define HUGEPD_SHIFT_MASK 0x3f +#endif #ifndef __ASSEMBLY__ -- cgit v1.2.3 From a4031afb9d10d97f4d0285844abbc0ab04245304 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 9 Feb 2020 18:14:42 +0000 Subject: powerpc/8xx: Fix clearing of bits 20-23 in ITLB miss In ITLB miss handled the line supposed to clear bits 20-23 on the L2 ITLB entry is buggy and does indeed nothing, leading to undefined value which could allow execution when it shouldn't. Properly do the clearing with the relevant instruction. Fixes: 74fabcadfd43 ("powerpc/8xx: don't use r12/SPRN_SPRG_SCRATCH2 in TLB Miss handlers") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Christophe Leroy Reviewed-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4f70c2778163affce8508a210f65d140e84524b4.1581272050.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9922306ae512..073a651787df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -256,7 +256,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */ + rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ -- cgit v1.2.3 From 463bfeeead97416ad2b141421f51888054dc0e18 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 14 Feb 2020 10:44:05 +0800 Subject: KVM: nVMX: Fix some obsolete comments and grammar error Fix wrong variable names and grammar error in comment. Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 3589cd3c0fcc..a5757b0b80f9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3161,10 +3161,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. * * Returns: - * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode - * NVMX_ENTRY_VMFAIL: Consistency check VMFail - * NVMX_ENTRY_VMEXIT: Consistency check VMExit - * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error + * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode + * NVMX_VMENTRY_VMFAIL: Consistency check VMFail + * NVMX_VMENTRY_VMEXIT: Consistency check VMExit + * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error */ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) @@ -5330,7 +5330,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, } /* - * Return 1 if we should exit from L2 to L1 to handle an MSR access access, + * Return 1 if we should exit from L2 to L1 to handle an MSR access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed * disinterest in the current event (read or write a specific MSR) by using an * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. -- cgit v1.2.3 From b16c3724dd717a354d0f0257fd647ef5518982aa Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 13 Feb 2020 11:13:13 -0500 Subject: s390/defconfig: enable CONFIG_PROTECTED_VIRTUALIZATION_GUEST The guest support for protected virtualization is default on most distributions. Also refresh defconfig and debug_defconfig. Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 28 +++++++++++++--------------- arch/s390/configs/defconfig | 11 +++++------ 2 files changed, 18 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2e60c80395ab..0c86ba19fa2b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -53,6 +53,7 @@ CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y +CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m @@ -474,7 +475,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set @@ -684,7 +684,6 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -748,7 +747,6 @@ CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_HEADERS_INSTALL=y -CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y @@ -772,9 +770,9 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_DEBUG_SHIRQ=y +CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y -CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -783,9 +781,20 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 +CONFIG_LATENCYTOP=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_STACK_TRACER=y +CONFIG_IRQSOFF_TRACER=y +CONFIG_PREEMPT_TRACER=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_HIST_TRIGGERS=y +CONFIG_S390_PTDUMP=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -796,15 +805,6 @@ CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y -CONFIG_LATENCYTOP=y -CONFIG_IRQSOFF_TRACER=y -CONFIG_PREEMPT_TRACER=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -CONFIG_HIST_TRIGGERS=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_TEST_SORT=y @@ -814,5 +814,3 @@ CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 25f799849582..6b27d861a9a3 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -53,6 +53,7 @@ CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y +CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m @@ -470,7 +471,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set @@ -677,7 +677,6 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -739,18 +738,18 @@ CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y +CONFIG_S390_PTDUMP=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y -- cgit v1.2.3 From 380324734956c64cd060e1db4304f3117ac15809 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 13 Feb 2020 23:42:07 -0700 Subject: s390/mm: Explicitly compare PAGE_DEFAULT_KEY against zero in storage_key_init_range Clang warns: In file included from ../arch/s390/purgatory/purgatory.c:10: In file included from ../include/linux/kexec.h:18: In file included from ../include/linux/crash_core.h:6: In file included from ../include/linux/elfcore.h:5: In file included from ../include/linux/user.h:1: In file included from ../arch/s390/include/asm/user.h:11: ../arch/s390/include/asm/page.h:45:6: warning: converting the result of '<<' to a boolean always evaluates to false [-Wtautological-constant-compare] if (PAGE_DEFAULT_KEY) ^ ../arch/s390/include/asm/page.h:23:44: note: expanded from macro 'PAGE_DEFAULT_KEY' #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) ^ 1 warning generated. Explicitly compare this against zero to silence the warning as it is intended to be used in a boolean context. Fixes: de3fa841e429 ("s390/mm: fix compile for PAGE_DEFAULT_KEY != 0") Link: https://github.com/ClangBuiltLinux/linux/issues/860 Link: https://lkml.kernel.org/r/20200214064207.10381-1-natechancellor@gmail.com Acked-by: Christian Borntraeger Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 85e944f04c70..1019efd85b9d 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } -- cgit v1.2.3 From 788d671517b5c81efbed9310ccbadb8cca86a08e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 8 Feb 2020 07:10:52 -0700 Subject: s390/kaslr: Fix casts in get_random Clang warns: ../arch/s390/boot/kaslr.c:78:25: warning: passing 'char *' to parameter of type 'const u8 *' (aka 'const unsigned char *') converts between pointers to integer types with different sign [-Wpointer-sign] (char *) entropy, (char *) entropy, ^~~~~~~~~~~~~~~~ ../arch/s390/include/asm/cpacf.h:280:28: note: passing argument to parameter 'src' here u8 *dest, const u8 *src, long src_len) ^ 2 warnings generated. Fix the cast to match what else is done in this function. Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)") Link: https://github.com/ClangBuiltLinux/linux/issues/862 Link: https://lkml.kernel.org/r/20200208141052.48476-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik --- arch/s390/boot/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5d12352545c5..5591243d673e 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -75,7 +75,7 @@ static unsigned long get_random(unsigned long limit) *(unsigned long *) prng.parm_block ^= seed; for (i = 0; i < 16; i++) { cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, - (char *) entropy, (char *) entropy, + (u8 *) entropy, (u8 *) entropy, sizeof(entropy)); memcpy(prng.parm_block, entropy, sizeof(entropy)); } -- cgit v1.2.3 From 94e90f727f7424d827256023cace829cad6896f4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 16 Feb 2020 23:48:29 +0900 Subject: s390: make 'install' not depend on vmlinux For the same reason as commit 19514fc665ff ("arm, kbuild: make "make install" not depend on vmlinux"), the install targets should never trigger the rebuild of the kernel. The variable, CONFIGURE, is not set by anyone. Remove it as well. Link: https://lkml.kernel.org/r/20200216144829.27023-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Signed-off-by: Vasily Gorbik --- arch/s390/Makefile | 2 +- arch/s390/boot/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e0e3a465bbfd..8dfa2cf1f05c 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -146,7 +146,7 @@ all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage -install: vmlinux +install: $(Q)$(MAKE) $(build)=$(boot) $@ bzImage: vmlinux diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e2c47d3a1c89..0ff9261c915e 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -70,7 +70,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) -install: $(CONFIGURE) $(obj)/bzImage +install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" -- cgit v1.2.3 From 2464cc4c345699adea52c7aef75707207cb8a2f6 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 11 Feb 2020 00:38:29 -0300 Subject: powerpc/tm: Fix clearing MSR[TS] in current when reclaiming on signal delivery After a treclaim, we expect to be in non-transactional state. If we don't clear the current thread's MSR[TS] before we get preempted, then tm_recheckpoint_new_task() will recheckpoint and we get rescheduled in suspended transaction state. When handling a signal caught in transactional state, handle_rt_signal64() calls get_tm_stackpointer() that treclaims the transaction using tm_reclaim_current() but without clearing the thread's MSR[TS]. This can cause the TM Bad Thing exception below if later we pagefault and get preempted trying to access the user's sigframe, using __put_user(). Afterwards, when we are rescheduled back into do_page_fault() (but now in suspended state since the thread's MSR[TS] was not cleared), upon executing 'rfid' after completion of the page fault handling, the exception is raised because a transition from suspended to non-transactional state is invalid. Unexpected TM Bad Thing exception at c00000000000de44 (msr 0x8000000302a03031) tm_scratch=800000010280b033 Oops: Unrecoverable exception, sig: 6 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries CPU: 25 PID: 15547 Comm: a.out Not tainted 5.4.0-rc2 #32 NIP: c00000000000de44 LR: c000000000034728 CTR: 0000000000000000 REGS: c00000003fe7bd70 TRAP: 0700 Not tainted (5.4.0-rc2) MSR: 8000000302a03031 CR: 44000884 XER: 00000000 CFAR: c00000000000dda4 IRQMASK: 0 PACATMSCRATCH: 800000010280b033 GPR00: c000000000034728 c000000f65a17c80 c000000001662800 00007fffacf3fd78 GPR04: 0000000000001000 0000000000001000 0000000000000000 c000000f611f8af0 GPR08: 0000000000000000 0000000078006001 0000000000000000 000c000000000000 GPR12: c000000f611f84b0 c00000003ffcb200 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000f611f8140 GPR24: 0000000000000000 00007fffacf3fd68 c000000f65a17d90 c000000f611f7800 GPR28: c000000f65a17e90 c000000f65a17e90 c000000001685e18 00007fffacf3f000 NIP [c00000000000de44] fast_exception_return+0xf4/0x1b0 LR [c000000000034728] handle_rt_signal64+0x78/0xc50 Call Trace: [c000000f65a17c80] [c000000000034710] handle_rt_signal64+0x60/0xc50 (unreliable) [c000000f65a17d30] [c000000000023640] do_notify_resume+0x330/0x460 [c000000f65a17e20] [c00000000000dcc4] ret_from_except_lite+0x70/0x74 Instruction dump: 7c4ff120 e8410170 7c5a03a6 38400000 f8410060 e8010070 e8410080 e8610088 60000000 60000000 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed0989 ---[ end trace 93094aa44b442f87 ]--- The simplified sequence of events that triggers the above exception is: ... # userspace in NON-TRANSACTIONAL state tbegin # userspace in TRANSACTIONAL state signal delivery # kernelspace in SUSPENDED state handle_rt_signal64() get_tm_stackpointer() treclaim # kernelspace in NON-TRANSACTIONAL state __put_user() page fault happens. We will never get back here because of the TM Bad Thing exception. page fault handling kicks in and we voluntarily preempt ourselves do_page_fault() __schedule() __switch_to(other_task) our task is rescheduled and we recheckpoint because the thread's MSR[TS] was not cleared __switch_to(our_task) switch_to_tm() tm_recheckpoint_new_task() trechkpt # kernelspace in SUSPENDED state The page fault handling resumes, but now we are in suspended transaction state do_page_fault() completes rfid <----- trying to get back where the page fault happened (we were non-transactional back then) TM Bad Thing # illegal transition from suspended to non-transactional This patch fixes that issue by clearing the current thread's MSR[TS] just after treclaim in get_tm_stackpointer() so that we stay in non-transactional state in case we are preempted. In order to make treclaim and clearing the thread's MSR[TS] atomic from a preemption perspective when CONFIG_PREEMPT is set, preempt_disable/enable() is used. It's also necessary to save the previous value of the thread's MSR before get_tm_stackpointer() is called so that it can be exposed to the signal handler later in setup_tm_sigcontexts() to inform the userspace MSR at the moment of the signal delivery. Found with tm-signal-context-force-tm kernel selftest. Fixes: 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context") Cc: stable@vger.kernel.org # v3.9 Signed-off-by: Gustavo Luiz Duarte Acked-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200211033831.11165-1-gustavold@linux.ibm.com --- arch/powerpc/kernel/signal.c | 17 +++++++++++++++-- arch/powerpc/kernel/signal_32.c | 28 ++++++++++++++-------------- arch/powerpc/kernel/signal_64.c | 22 ++++++++++------------ 3 files changed, 39 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e6c30cee6abf..d215f9554553 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) * normal/non-checkpointed stack pointer. */ + unsigned long ret = tsk->thread.regs->gpr[1]; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) - return tsk->thread.ckpt_regs.gpr[1]; + ret = tsk->thread.ckpt_regs.gpr[1]; + + /* + * If we treclaim, we must clear the current thread's TM bits + * before re-enabling preemption. Otherwise we might be + * preempted and have the live MSR[TS] changed behind our back + * (tm_recheckpoint_new_task() would recheckpoint). Besides, we + * enter the signal handler in non-transactional state. + */ + tsk->thread.regs->msr &= ~MSR_TS_MASK; + preempt_enable(); } #endif - return tsk->thread.regs->gpr[1]; + return ret; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 98600b276f76..1b090a76b444 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, */ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret) + struct mcontext __user *tm_frame, int sigret, + unsigned long msr) { - unsigned long msr = regs->msr; - WARN_ON(tm_suspend_disabled); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) goto badframe; } else @@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret)) + sigret, msr)) goto badframe; } else diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 117515564ec7..84ed2e77ef9c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, struct task_struct *tsk, - int signr, sigset_t *set, unsigned long handler) + int signr, sigset_t *set, unsigned long handler, + unsigned long msr) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); - BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + BUG_ON(!MSR_TM_ACTIVE(msr)); WARN_ON(tm_suspend_disabled); @@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); @@ -824,6 +817,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -841,7 +838,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= __put_user(0, &frame->uc.uc_flags); err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ @@ -849,7 +846,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, - (unsigned long)ksig->ka.sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler, + msr); } else #endif { -- cgit v1.2.3 From 232ca1eecafed8c54491017f0612c33d8c742d74 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 15 Feb 2020 10:14:25 +0000 Subject: powerpc/32s: Fix DSI and ISI exceptions for CONFIG_VMAP_STACK hash_page() needs to read page tables from kernel memory. When entire kernel memory is mapped by BATs, which is normally the case when CONFIG_STRICT_KERNEL_RWX is not set, it works even if the page hosting the page table is not referenced in the MMU hash table. However, if the page where the page table resides is not covered by a BAT, a DSI fault can be encountered from hash_page(), and it loops forever. This can happen when CONFIG_STRICT_KERNEL_RWX is selected and the alignment of the different regions is too small to allow covering the entire memory with BATs. This also happens when CONFIG_DEBUG_PAGEALLOC is selected or when booting with 'nobats' flag. Also, if the page containing the kernel stack is not present in the MMU hash table, registers cannot be saved and a recursive DSI fault is encountered. To allow hash_page() to properly do its job at all time and load the MMU hash table whenever needed, it must run with data MMU disabled. This means it must be called before re-enabling data MMU. To allow this, registers clobbered by hash_page() and create_hpte() have to be saved in the thread struct together with SRR0, SSR1, DAR and DSISR. It is also necessary to ensure that DSI prolog doesn't overwrite regs saved by prolog of the current running exception. That means: - DSI can only use SPRN_SPRG_SCRATCH0 - Exceptions must free SPRN_SPRG_SCRATCH0 before writing to the stack. This also fixes the Oops reported by Erhard when create_hpte() is called by add_hash_page(). Due to prolog size increase, a few more exceptions had to get split in two parts. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Reported-by: Erhard F. Signed-off-by: Christophe Leroy Tested-by: Erhard F. Tested-by: Larry Finger Signed-off-by: Michael Ellerman Link: https://bugzilla.kernel.org/show_bug.cgi?id=206501 Link: https://lore.kernel.org/r/64a4aa44686e9fd4b01333401367029771d9b231.1581761633.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/processor.h | 4 + arch/powerpc/kernel/asm-offsets.c | 12 +++ arch/powerpc/kernel/head_32.S | 155 ++++++++++++++++++++++++++++++++-- arch/powerpc/kernel/head_32.h | 21 ++++- arch/powerpc/mm/book3s32/hash_low.S | 52 +++++------- arch/powerpc/mm/book3s32/mmu.c | 10 +-- arch/powerpc/mm/kasan/kasan_init_32.c | 3 +- 7 files changed, 212 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8387698bd5b6..eedcbfb9a6ff 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -168,6 +168,10 @@ struct thread_struct { unsigned long srr1; unsigned long dar; unsigned long dsisr; +#ifdef CONFIG_PPC_BOOK3S_32 + unsigned long r0, r3, r4, r5, r6, r8, r9, r11; + unsigned long lr, ctr; +#endif #endif /* Debug Registers */ struct debug_reg debug; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c25e562f1cd9..fcf24a365fc0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -132,6 +132,18 @@ int main(void) OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); OFFSET(DSISR, thread_struct, dsisr); +#ifdef CONFIG_PPC_BOOK3S_32 + OFFSET(THR0, thread_struct, r0); + OFFSET(THR3, thread_struct, r3); + OFFSET(THR4, thread_struct, r4); + OFFSET(THR5, thread_struct, r5); + OFFSET(THR6, thread_struct, r6); + OFFSET(THR8, thread_struct, r8); + OFFSET(THR9, thread_struct, r9); + OFFSET(THR11, thread_struct, r11); + OFFSET(THLR, thread_struct, lr); + OFFSET(THCTR, thread_struct, ctr); +#endif #endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0493fcac6409..97c887950c3c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -290,17 +290,55 @@ MachineCheck: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP - bne cr1,1f +#ifdef CONFIG_VMAP_STACK + mfspr r4, SPRN_SPRG_THREAD + tovirt(r4, r4) + lwz r4, RTAS_SP(r4) + cmpwi cr1, r4, 0 #endif - EXC_XFER_STD(0x200, machine_check_exception) -#ifdef CONFIG_PPC_CHRP -1: b machine_check_in_rtas + beq cr1, machine_check_tramp + b machine_check_in_rtas +#else + b machine_check_tramp #endif /* Data access exception. */ . = 0x300 DO_KVM 0x300 DataAccess: +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH0,r10 + mfspr r10, SPRN_SPRG_THREAD +BEGIN_MMU_FTR_SECTION + stw r11, THR11(r10) + mfspr r10, SPRN_DSISR + mfcr r11 +#ifdef CONFIG_PPC_KUAP + andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else + andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h +#endif + mfspr r10, SPRN_SPRG_THREAD + beq hash_page_dsi +.Lhash_page_dsi_cont: + mtcr r11 + lwz r11, THR11(r10) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) + mtspr SPRN_SPRG_SCRATCH1,r11 + mfspr r11, SPRN_DAR + stw r11, DAR(r10) + mfspr r11, SPRN_DSISR + stw r11, DSISR(r10) + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ + stw r11, SRR1(r10) + mfcr r10 + andi. r11, r11, MSR_PR + + EXCEPTION_PROLOG_1 + b handle_page_fault_tramp_1 +#else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION @@ -316,11 +354,32 @@ BEGIN_MMU_FTR_SECTION FTR_SECTION_ELSE b handle_page_fault_tramp_2 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ . = 0x400 DO_KVM 0x400 InstructionAccess: +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfspr r10, SPRN_SPRG_THREAD + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ + stw r11, SRR1(r10) + mfcr r10 +BEGIN_MMU_FTR_SECTION + andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ + bne hash_page_isi +.Lhash_page_isi_cont: + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) + andi. r11, r11, MSR_PR + + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 +#else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ beq 1f /* if so, try to put a PTE */ @@ -329,6 +388,7 @@ InstructionAccess: BEGIN_MMU_FTR_SECTION bl hash_page END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* CONFIG_VMAP_STACK */ 1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r4, _DAR(r11) @@ -344,7 +404,7 @@ Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x600, alignment_exception) + b alignment_exception_tramp /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -645,15 +705,100 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) . = 0x3000 +machine_check_tramp: + EXC_XFER_STD(0x200, machine_check_exception) + +alignment_exception_tramp: + EXC_XFER_STD(0x600, alignment_exception) + handle_page_fault_tramp_1: +#ifdef CONFIG_VMAP_STACK + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 +#endif lwz r4, _DAR(r11) lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: EXC_XFER_LITE(0x300, handle_page_fault) +#ifdef CONFIG_VMAP_STACK +.macro save_regs_thread thread + stw r0, THR0(\thread) + stw r3, THR3(\thread) + stw r4, THR4(\thread) + stw r5, THR5(\thread) + stw r6, THR6(\thread) + stw r8, THR8(\thread) + stw r9, THR9(\thread) + mflr r0 + stw r0, THLR(\thread) + mfctr r0 + stw r0, THCTR(\thread) +.endm + +.macro restore_regs_thread thread + lwz r0, THLR(\thread) + mtlr r0 + lwz r0, THCTR(\thread) + mtctr r0 + lwz r0, THR0(\thread) + lwz r3, THR3(\thread) + lwz r4, THR4(\thread) + lwz r5, THR5(\thread) + lwz r6, THR6(\thread) + lwz r8, THR8(\thread) + lwz r9, THR9(\thread) +.endm + +hash_page_dsi: + save_regs_thread r10 + mfdsisr r3 + mfdar r4 + mfsrr0 r5 + mfsrr1 r9 + rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + bl hash_page + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + b .Lhash_page_dsi_cont + +hash_page_isi: + mr r11, r10 + mfspr r10, SPRN_SPRG_THREAD + save_regs_thread r10 + li r3, 0 + lwz r4, SRR0(r10) + lwz r9, SRR1(r10) + bl hash_page + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + mr r10, r11 + b .Lhash_page_isi_cont + + .globl fast_hash_page_return +fast_hash_page_return: + andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */ + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + bne 1f + + /* DSI */ + mtcr r11 + lwz r11, THR11(r10) + mfspr r10, SPRN_SPRG_SCRATCH0 + SYNC + RFI + +1: /* ISI */ + mtcr r11 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + SYNC + RFI + stack_overflow: vmap_stack_overflow_exception +#endif AltiVecUnavailable: EXCEPTION_PROLOG diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a6a5fbbf8504..9db162f79fe6 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -64,11 +64,25 @@ .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) +BEGIN_MMU_FTR_SECTION + mtcr r10 +FTR_SECTION_ELSE + stw r10, _CCR(r11) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#else stw r10,_CCR(r11) /* save registers */ +#endif + mfspr r10, SPRN_SPRG_SCRATCH0 stw r12,GPR12(r11) stw r9,GPR9(r11) - mfspr r10,SPRN_SPRG_SCRATCH0 stw r10,GPR10(r11) +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) +BEGIN_MMU_FTR_SECTION + mfcr r10 + stw r10, _CCR(r11) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif mfspr r12,SPRN_SPRG_SCRATCH1 stw r12,GPR11(r11) mflr r10 @@ -83,6 +97,11 @@ stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) +BEGIN_MMU_FTR_SECTION + andi. r10, r9, MSR_PR +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index c11b0a005196..2015c4f96238 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -25,12 +25,6 @@ #include #include -#ifdef CONFIG_VMAP_STACK -#define ADDR_OFFSET 0 -#else -#define ADDR_OFFSET PAGE_OFFSET -#endif - #ifdef CONFIG_SMP .section .bss .align 2 @@ -53,8 +47,8 @@ mmu_hash_lock: .text _GLOBAL(hash_page) #ifdef CONFIG_SMP - lis r8, (mmu_hash_lock - ADDR_OFFSET)@h - ori r8, r8, (mmu_hash_lock - ADDR_OFFSET)@l + lis r8, (mmu_hash_lock - PAGE_OFFSET)@h + ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l lis r0,0x0fff b 10f 11: lwz r6,0(r8) @@ -72,12 +66,9 @@ _GLOBAL(hash_page) cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ -#ifdef CONFIG_VMAP_STACK - tovirt(r5, r5) -#endif blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - ADDR_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - ADDR_OFFSET)@l /* kernel page table */ + lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ 112: #ifndef CONFIG_PTE_64BIT @@ -89,9 +80,6 @@ _GLOBAL(hash_page) lwzx r8,r8,r5 /* Get L1 entry */ rlwinm. r8,r8,0,0,20 /* extract pt base address */ #endif -#ifdef CONFIG_VMAP_STACK - tovirt(r8, r8) -#endif #ifdef CONFIG_SMP beq- hash_page_out /* return if no mapping */ #else @@ -143,30 +131,36 @@ retry: bne- retry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ +#ifndef CONFIG_VMAP_STACK mfctr r0 stw r0,_CTR(r11) +#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP eieio - lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha + lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 - stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8) + stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif +#ifdef CONFIG_VMAP_STACK + b fast_hash_page_return +#else /* Return from the exception */ lwz r5,_CTR(r11) mtctr r5 lwz r0,GPR0(r11) lwz r8,GPR8(r11) b fast_exception_return +#endif #ifdef CONFIG_SMP hash_page_out: eieio - lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha + lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 - stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8) + stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ @@ -341,7 +335,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) patch_site 1f, patch__hash_page_A1 patch_site 2f, patch__hash_page_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -0: lis r0, (Hash_base - ADDR_OFFSET)@h /* base address of hash table */ +0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */ 1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ 2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r3,r3,r0 /* make primary hash */ @@ -355,10 +349,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - ADDR_OFFSET)@ha - lwz r6, (htab_hash_searches - ADDR_OFFSET)@l(r4) + lis r4, (htab_hash_searches - PAGE_OFFSET)@ha + lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - ADDR_OFFSET)@l(r4) + stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 @@ -390,10 +384,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ found_empty /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - ADDR_OFFSET)@ha - lwz r6, (primary_pteg_full - ADDR_OFFSET)@l(r4) + lis r4, (primary_pteg_full - PAG