From 094d00f8ca58c5d29b25e23b4daaed1ff1f13b41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jan 2022 08:57:58 +0000 Subject: KVM: arm64: pkvm: Use the mm_ops indirection for cache maintenance CMOs issued from EL2 cannot directly use the kernel helpers, as EL2 doesn't have a mapping of the guest pages. Oops. Instead, use the mm_ops indirection to use helpers that will perform a mapping at EL2 and allow the CMO to be effective. Fixes: 25aa28691bb9 ("KVM: arm64: Move guest CMOs to the fault handlers") Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220114125038.1336965-1-maz@kernel.org --- arch/arm64/kvm/hyp/pgtable.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 844a6f003fd5..2cb3867eb7c2 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ stage2_put_pte(ptep, mmu, addr, level, mm_ops); - if (need_flush) { - kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); - } + if (need_flush && mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); if (childp) mm_ops->put_page(childp); @@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; - kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - pte_follow = kvm_pte_follow(pte, mm_ops); - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); + if (mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); return 0; } -- cgit v1.2.3 From d11a327ed95dbec756b99cbfef2a7fd85c9eeb09 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jan 2022 21:07:47 +0000 Subject: KVM: arm64: vgic-v3: Restrict SEIS workaround to known broken systems Contrary to what df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors") was asserting, there is at least one other system out there (Cavium ThunderX2) implementing SEIS, and not in an obviously broken way. So instead of imposing the M1 workaround on an innocent bystander, let's limit it to the two known broken Apple implementations. Fixes: df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors") Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220122103912.795026-1-maz@kernel.org --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 3 +++ arch/arm64/kvm/vgic/vgic-v3.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 20db2f281cf2..4fb419f7b8b6 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) + val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT); /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 78cf674c1230..221489c29354 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf) } early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + is_midr_in_range_list(read_cpuid_id(), broken_seis)); +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } - if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { - kvm_info("GICv3 with locally generated SEI\n"); + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; group0_trap = true; group1_trap = true; if (ich_vtr_el2 & ICH_VTR_TDS_MASK) -- cgit v1.2.3 From 1ea1d6a847d2b1d17fefd9196664b95f052a0775 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 13 Jan 2022 11:44:19 +0100 Subject: s390/nmi: handle guarded storage validity failures for KVM guests machine check validity bits reflect the state of the machine check. If a guest does not make use of guarded storage, the validity bit might be off. We can not use the host CR bit to decide if the validity bit must be on. So ignore "invalid" guarded storage controls for KVM guests in the host and rely on the machine check being forwarded to the guest. If no other errors happen from a host perspective everything is fine and no process must be killed and the host can continue to run. Cc: stable@vger.kernel.org Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Reported-by: Carsten Otte Signed-off-by: Christian Borntraeger Tested-by: Carsten Otte Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/nmi.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0c9e894913dc..147c0d5fd9b4 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -307,11 +307,21 @@ static int notrace s390_validate_registers(union mci mci, int umode) if (cr2.gse) { if (!mci.gs) { /* - * Guarded storage register can't be restored and - * the current processes uses guarded storage. - * It has to be terminated. + * 2 cases: + * - machine check in kernel or userspace + * - machine check while running SIE (KVM guest) + * For kernel or userspace the userspace values of + * guarded storage control can not be recreated, the + * process must be terminated. + * For SIE the guest values of guarded storage can not + * be recreated. This is either due to a bug or due to + * GS being disabled in the guest. The guest will be + * notified by KVM code and the guests machine check + * handling must take care of this. The host values + * are saved by KVM and are not affected. */ - kill_task = 1; + if (!test_cpu_flag(CIF_MCCK_GUEST)) + kill_task = 1; } else { load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); } -- cgit v1.2.3 From f094a39c6ba168f2df1edfd1731cca377af5f442 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 17 Jan 2022 18:40:32 +0100 Subject: s390/nmi: handle vector validity failures for KVM guests The machine check validity bit tells about the context. If a KVM guest was running the bit tells about the guest validity and the host state is not affected. As a guest can disable the guest validity this might result in unwanted host errors on machine checks. Cc: stable@vger.kernel.org Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/nmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 147c0d5fd9b4..651a51914e34 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode) /* Validate vector registers */ union ctlreg0 cr0; - if (!mci.vr) { + /* + * The vector validity must only be checked if not running a + * KVM guest. For KVM guests the machine check is forwarded by + * KVM and it is the responsibility of the guest to take + * appropriate actions. The host vector or FPU values have been + * saved by KVM and will be restored by KVM. + */ + if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) { /* * Vector registers can't be restored. If the kernel * currently uses vector registers the system is -- cgit v1.2.3 From 3d787b392d169d4a2e3aee6ac6dfd6ec39722cf2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 22 Jan 2022 10:24:31 +0100 Subject: s390/uaccess: fix compile error Compiling with e.g MARCH=z900 results in compile errors: arch/s390/lib/uaccess.c: In function 'copy_from_user_mvcos': >> arch/s390/lib/uaccess.c:65:15: error: variable 'spec' has initializer but incomplete type 65 | union oac spec = { Therefore make definition of union oac visible for all MARCHs. Reported-by: kernel test robot Cc: Nico Boehr Cc: Janis Schoetterl-Glausch Fixes: 012a224e1fa3 ("s390/uaccess: introduce bit field for OAC specifier") Signed-off-by: Heiko Carstens --- arch/s390/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 147cb3534ce4..d74e26b48604 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - union oac { unsigned int val; struct { @@ -71,6 +69,8 @@ union oac { }; }; +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ -- cgit v1.2.3 From 1f52b0aba6fd37653416375cb8a1ca673acf8d5f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 17 Jan 2022 16:13:28 +0000 Subject: x86/MCE/AMD: Allow thresholding interface updates after init Changes to the AMD Thresholding sysfs code prevents sysfs writes from updating the underlying registers once CPU init is completed, i.e. "threshold_banks" is set. Allow the registers to be updated if the thresholding interface is already initialized or if in the init path. Use the "set_lvt_off" value to indicate if running in the init path, since this value is only set during init. Fixes: a037f3ca0ea0 ("x86/mce/amd: Make threshold bank setting hotplug robust") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220117161328.19148-1-yazen.ghannam@amd.com --- arch/x86/kernel/cpu/mce/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index a1e2f41796dc..9f4b508886dd 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr) u32 hi, lo; /* sysfs write might race against an offline operation */ - if (this_cpu_read(threshold_banks)) + if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off) return; rdmsr(tr->b->address, lo, hi); -- cgit v1.2.3 From aec982603aa8cc0a21143681feb5f60ecc69d718 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 6 Dec 2021 11:11:51 +0000 Subject: powerpc/fixmap: Fix VM debug warning on unmap Unmapping a fixmap entry is done by calling __set_fixmap() with FIXMAP_PAGE_CLEAR as flags. Today, powerpc __set_fixmap() calls map_kernel_page(). map_kernel_page() is not happy when called a second time for the same page. WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/pgtable.c:194 set_pte_at+0xc/0x1e8 CPU: 0 PID: 1 Comm: swapper Not tainted 5.16.0-rc3-s3k-dev-01993-g350ff07feb7d-dirty #682 NIP: c0017cd4 LR: c00187f0 CTR: 00000010 REGS: e1011d50 TRAP: 0700 Not tainted (5.16.0-rc3-s3k-dev-01993-g350ff07feb7d-dirty) MSR: 00029032 CR: 42000208 XER: 00000000 GPR00: c0165fec e1011e10 c14c0000 c0ee2550 ff800000 c0f3d000 00000000 c001686c GPR08: 00001000 b00045a9 00000001 c0f58460 c0f50000 00000000 c0007e10 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 GPR24: 00000000 00000000 c0ee2550 00000000 c0f57000 00000ff8 00000000 ff800000 NIP [c0017cd4] set_pte_at+0xc/0x1e8 LR [c00187f0] map_kernel_page+0x9c/0x100 Call Trace: [e1011e10] [c0736c68] vsnprintf+0x358/0x6c8 (unreliable) [e1011e30] [c0165fec] __set_fixmap+0x30/0x44 [e1011e40] [c0c13bdc] early_iounmap+0x11c/0x170 [e1011e70] [c0c06cb0] ioremap_legacy_serial_console+0x88/0xc0 [e1011e90] [c0c03634] do_one_initcall+0x80/0x178 [e1011ef0] [c0c0385c] kernel_init_freeable+0xb4/0x250 [e1011f20] [c0007e34] kernel_init+0x24/0x140 [e1011f30] [c0016268] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 7fe3fb78 48019689 80010014 7c630034 83e1000c 5463d97e 7c0803a6 38210010 4e800020 81250000 712a0001 41820008 <0fe00000> 9421ffe0 93e1001c 48000030 Implement unmap_kernel_page() which clears an existing pte. Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Tested-by: Maxime Bizon Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b0b752f6f6ecc60653e873f385c6f0dce4e9ab6a.1638789098.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 2 ++ arch/powerpc/include/asm/fixmap.h | 6 ++++-- arch/powerpc/include/asm/nohash/32/pgtable.h | 1 + arch/powerpc/include/asm/nohash/64/pgtable.h | 1 + arch/powerpc/mm/pgtable.c | 9 +++++++++ 6 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 609c80f67194..f8b94f78403f 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte) #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 33e073d6b0c4..875730d5af40 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p return hash__map_kernel_page(ea, pa, prot); } +void unmap_kernel_page(unsigned long va); + static inline int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 947b5b9c4424..a832aeafe560 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx, BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) return; - - map_kernel_page(__fix_to_virt(idx), phys, flags); + if (pgprot_val(flags)) + map_kernel_page(__fix_to_virt(idx), phys, flags); + else + unmap_kernel_page(__fix_to_virt(idx)); } #define __early_set_fixmap __set_fixmap diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b67742e2a9b2..d959c2a73fbf 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,6 +64,7 @@ extern int icache_44x_need_flush; #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index a3313e853e5e..2816d158280a 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -308,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __swp_entry_to_pte(x) __pte((x).val) int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index abb3198bd277..6ec5a7dd7913 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, __set_pte_at(mm, addr, ptep, pte, 0); } +void unmap_kernel_page(unsigned long va) +{ + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep = pte_offset_kernel(pmdp, va); + + pte_clear(&init_mm, va, ptep); + flush_tlb_kernel_range(va, va + PAGE_SIZE); +} + /* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor -- cgit v1.2.3 From fb6433b48a178d4672cb26632454ee0b21056eaa Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sat, 22 Jan 2022 09:04:29 +0530 Subject: powerpc/perf: Fix power_pmu_disable to call clear_pmi_irq_pending only if PMI is pending Running selftest with CONFIG_PPC_IRQ_SOFT_MASK_DEBUG enabled in kernel triggered below warning: [ 172.851380] ------------[ cut here ]------------ [ 172.851391] WARNING: CPU: 8 PID: 2901 at arch/powerpc/include/asm/hw_irq.h:246 power_pmu_disable+0x270/0x280 [ 172.851402] Modules linked in: dm_mod bonding nft_ct nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables rfkill nfnetlink sunrpc xfs libcrc32c pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel ip_tables ext4 mbcache jbd2 sd_mod t10_pi sg ibmvscsi ibmveth scsi_transport_srp fuse [ 172.851442] CPU: 8 PID: 2901 Comm: lost_exception_ Not tainted 5.16.0-rc5-03218-g798527287598 #2 [ 172.851451] NIP: c00000000013d600 LR: c00000000013d5a4 CTR: c00000000013b180 [ 172.851458] REGS: c000000017687860 TRAP: 0700 Not tainted (5.16.0-rc5-03218-g798527287598) [ 172.851465] MSR: 8000000000029033 CR: 48004884 XER: 20040000 [ 172.851482] CFAR: c00000000013d5b4 IRQMASK: 1 [ 172.851482] GPR00: c00000000013d5a4 c000000017687b00 c000000002a10600 0000000000000004 [ 172.851482] GPR04: 0000000082004000 c0000008ba08f0a8 0000000000000000 00000008b7ed0000 [ 172.851482] GPR08: 00000000446194f6 0000000000008000 c00000000013b118 c000000000d58e68 [ 172.851482] GPR12: c00000000013d390 c00000001ec54a80 0000000000000000 0000000000000000 [ 172.851482] GPR16: 0000000000000000 0000000000000000 c000000015d5c708 c0000000025396d0 [ 172.851482] GPR20: 0000000000000000 0000000000000000 c00000000a3bbf40 0000000000000003 [ 172.851482] GPR24: 0000000000000000 c0000008ba097400 c0000000161e0d00 c00000000a3bb600 [ 172.851482] GPR28: c000000015d5c700 0000000000000001 0000000082384090 c0000008ba0020d8 [ 172.851549] NIP [c00000000013d600] power_pmu_disable+0x270/0x280 [ 172.851557] LR [c00000000013d5a4] power_pmu_disable+0x214/0x280 [ 172.851565] Call Trace: [ 172.851568] [c000000017687b00] [c00000000013d5a4] power_pmu_disable+0x214/0x280 (unreliable) [ 172.851579] [c000000017687b40] [c0000000003403ac] perf_pmu_disable+0x4c/0x60 [ 172.851588] [c000000017687b60] [c0000000003445e4] __perf_event_task_sched_out+0x1d4/0x660 [ 172.851596] [c000000017687c50] [c000000000d1175c] __schedule+0xbcc/0x12a0 [ 172.851602] [c000000017687d60] [c000000000d11ea8] schedule+0x78/0x140 [ 172.851608] [c000000017687d90] [c0000000001a8080] sys_sched_yield+0x20/0x40 [ 172.851615] [c000000017687db0] [c0000000000334dc] system_call_exception+0x18c/0x380 [ 172.851622] [c000000017687e10] [c00000000000c74c] system_call_common+0xec/0x268 The warning indicates that MSR_EE being set(interrupt enabled) when there was an overflown PMC detected. This could happen in power_pmu_disable since it runs under interrupt soft disable condition ( local_irq_save ) and not with interrupts hard disabled. commit 2c9ac51b850d ("powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC") intended to clear PMI pending bit in Paca when disabling the PMU. It could happen that PMC gets overflown while code is in power_pmu_disable callback function. Hence add a check to see if PMI pending bit is set in Paca before clearing it via clear_pmi_pending. Fixes: 2c9ac51b850d ("powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC") Reported-by: Sachin Sant Signed-off-by: Athira Rajeev Tested-by: Sachin Sant Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220122033429.25395-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 32b98b7a1f86..b5b42cf0a703 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1355,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu) * Otherwise provide a warning if there is PMI pending, but * no counter is found overflown. */ - if (any_pmc_overflown(cpuhw)) - clear_pmi_irq_pending(); - else + if (any_pmc_overflown(cpuhw)) { + /* + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). + */ + if (pmi_irq_pending()) + clear_pmi_irq_pending(); + } else WARN_ON(pmi_irq_pending()); val = mmcra = cpuhw->mmcr.mmcra; -- cgit v1.2.3 From f3b7e73b2c6619884351a3a0a7468642f852b8a2 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 19 Jan 2022 19:26:37 +0100 Subject: s390/module: fix loading modules with a lot of relocations If the size of the PLT entries generated by apply_rela() exceeds 64KiB, the first ones can no longer reach __jump_r1 with brc. Fix by using brcl. An alternative solution is to add a __jump_r1 copy after every 64KiB, however, the space savings are quite small and do not justify the additional complexity. Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") Cc: stable@vger.kernel.org Reported-by: Andrea Righi Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kernel/module.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index d52d85367bf7..b032e556eeb7 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -33,7 +33,7 @@ #define DEBUGP(fmt , ...) #endif -#define PLT_ENTRY_SIZE 20 +#define PLT_ENTRY_SIZE 22 void *module_alloc(unsigned long size) { @@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int insn[5]; - unsigned int *ip = me->core_layout.base + - me->arch.plt_offset + - info->plt_offset; - - insn[0] = 0x0d10e310; /* basr 1,0 */ - insn[1] = 0x100a0004; /* lg 1,10(1) */ + unsigned char insn[PLT_ENTRY_SIZE]; + char *plt_base; + char *ip; + + plt_base = me->core_layout.base + me->arch.plt_offset; + ip = plt_base + info->plt_offset; + *(int *)insn = 0x0d10e310; /* basr 1,0 */ + *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { - unsigned int *ij; - ij = me->core_layout.base + - me->arch.plt_offset + - me->arch.plt_size - PLT_ENTRY_SIZE; - insn[2] = 0xa7f40000 + /* j __jump_r1 */ - (unsigned int)(u16) - (((unsigned long) ij - 8 - - (unsigned long) ip) / 2); + char *jump_r1; + + jump_r1 = plt_base + me->arch.plt_size - + PLT_ENTRY_SIZE; + /* brcl 0xf,__jump_r1 */ + *(short *)&insn[8] = 0xc0f4; + *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; } else { - insn[2] = 0x07f10000; /* br %r1 */ + *(int *)&insn[8] = 0x07f10000; /* br %r1 */ } - insn[3] = (unsigned int) (val >> 32); - insn[4] = (unsigned int) val; + *(long *)&insn[14] = val; write(ip, insn, sizeof(insn)); info->plt_initialized = 1; -- cgit v1.2.3 From 90c5318795eefa09a9f9aef8d18a904e24962b5c Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 19 Jan 2022 19:26:38 +0100 Subject: s390/module: test loading modules with a lot of relocations Add a test in order to prevent regressions. Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 15 +++++++++++ arch/s390/lib/Makefile | 3 +++ arch/s390/lib/test_modules.c | 35 +++++++++++++++++++++++++ arch/s390/lib/test_modules.h | 50 ++++++++++++++++++++++++++++++++++++ arch/s390/lib/test_modules_helpers.c | 13 ++++++++++ 5 files changed, 116 insertions(+) create mode 100644 arch/s390/lib/test_modules.c create mode 100644 arch/s390/lib/test_modules.h create mode 100644 arch/s390/lib/test_modules_helpers.c (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9750f92380f5..be9f39fd06df 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -945,6 +945,9 @@ config S390_GUEST endmenu +config S390_MODULES_SANITY_TEST_HELPERS + def_bool n + menu "Selftests" config S390_UNWIND_SELFTEST @@ -971,4 +974,16 @@ config S390_KPROBES_SANITY_TEST Say N if you are unsure. +config S390_MODULES_SANITY_TEST + def_tristate n + depends on KUNIT + default KUNIT_ALL_TESTS + prompt "Enable s390 specific modules tests" + select S390_MODULES_SANITY_TEST_HELPERS + help + This option enables an s390 specific modules test. This option is + not useful for distributions or general kernels, but only for + kernel developers working on architecture code. + + Say N if you are unsure. endmenu diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 707cd4622c13..69feb8ed3312 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls +obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o +obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c new file mode 100644 index 000000000000..d056baa8fbb0 --- /dev/null +++ b/arch/s390/lib/test_modules.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include + +#include "test_modules.h" + +#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) +REPEAT_10000(DECLARE_RETURN); + +/* + * Test that modules with many relocations are loaded properly. + */ +static void test_modules_many_vmlinux_relocs(struct kunit *test) +{ + int result = 0; + +#define CALL_RETURN(i) result += test_modules_return_ ## i() + REPEAT_10000(CALL_RETURN); + KUNIT_ASSERT_EQ(test, result, 49995000); +} + +static struct kunit_case modules_testcases[] = { + KUNIT_CASE(test_modules_many_vmlinux_relocs), + {} +}; + +static struct kunit_suite modules_test_suite = { + .name = "modules_test_s390", + .test_cases = modules_testcases, +}; + +kunit_test_suites(&modules_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h new file mode 100644 index 000000000000..43b5e4b4af3e --- /dev/null +++ b/arch/s390/lib/test_modules.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_MODULES_H +#define TEST_MODULES_H + +#define __REPEAT_10000_3(f, x) \ + f(x ## 0); \ + f(x ## 1); \ + f(x ## 2); \ + f(x ## 3); \ + f(x ## 4); \ + f(x ## 5); \ + f(x ## 6); \ + f(x ## 7); \ + f(x ## 8); \ + f(x ## 9) +#define __REPEAT_10000_2(f, x) \ + __REPEAT_10000_3(f, x ## 0); \ + __REPEAT_10000_3(f, x ## 1); \ + __REPEAT_10000_3(f, x ## 2); \ + __REPEAT_10000_3(f, x ## 3); \ + __REPEAT_10000_3(f, x ## 4); \ + __REPEAT_10000_3(f, x ## 5); \ + __REPEAT_10000_3(f, x ## 6); \ + __REPEAT_10000_3(f, x ## 7); \ + __REPEAT_10000_3(f, x ## 8); \ + __REPEAT_10000_3(f, x ## 9) +#define __REPEAT_10000_1(f, x) \ + __REPEAT_10000_2(f, x ## 0); \ + __REPEAT_10000_2(f, x ## 1); \ + __REPEAT_10000_2(f, x ## 2); \ + __REPEAT_10000_2(f, x ## 3); \ + __REPEAT_10000_2(f, x ## 4); \ + __REPEAT_10000_2(f, x ## 5); \ + __REPEAT_10000_2(f, x ## 6); \ + __REPEAT_10000_2(f, x ## 7); \ + __REPEAT_10000_2(f, x ## 8); \ + __REPEAT_10000_2(f, x ## 9) +#define REPEAT_10000(f) \ + __REPEAT_10000_1(f, 0); \ + __REPEAT_10000_1(f, 1); \ + __REPEAT_10000_1(f, 2); \ + __REPEAT_10000_1(f, 3); \ + __REPEAT_10000_1(f, 4); \ + __REPEAT_10000_1(f, 5); \ + __REPEAT_10000_1(f, 6); \ + __REPEAT_10000_1(f, 7); \ + __REPEAT_10000_1(f, 8); \ + __REPEAT_10000_1(f, 9) + +#endif diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c new file mode 100644 index 000000000000..1670349a03eb --- /dev/null +++ b/arch/s390/lib/test_modules_helpers.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include + +#include "test_modules.h" + +#define DEFINE_RETURN(i) \ + int test_modules_return_ ## i(void) \ + { \ + return 1 ## i - 10000; \ + } \ + EXPORT_SYMBOL_GPL(test_modules_return_ ## i) +REPEAT_10000(DEFINE_RETURN); -- cgit v1.2.3 From c9bb19368b3ab111aedf3297e65bf84c9d3aa005 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Dec 2021 14:58:49 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 20 ++++++++++---------- arch/s390/configs/defconfig | 16 +++++++++------- arch/s390/configs/zfcpdump_defconfig | 3 +++ 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7fe8975b49ec..498bed9b261b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -96,7 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -109,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -116,7 +117,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -185,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -391,6 +390,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -400,6 +400,7 @@ CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -501,6 +502,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -511,7 +513,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -542,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -592,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -756,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -774,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m CONFIG_CRC7=m @@ -807,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_VM_VMACACHE=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m @@ -819,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_TEST_LOCKUP=m -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 466780c465f5..61e36b999f67 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_GCC_PLUGINS is not set @@ -91,7 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y @@ -101,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -108,7 +109,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -177,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -382,6 +381,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -391,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -492,6 +493,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -502,7 +504,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -533,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -582,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -743,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -762,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index eed3b9acfa71..c55c668dc3c7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,6 +1,7 @@ # CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set # CONFIG_TIME_NS is not set @@ -34,6 +35,7 @@ CONFIG_NET=y # CONFIG_PCPU_DEV_REFCNT is not set # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set @@ -58,6 +60,7 @@ CONFIG_ZFCP=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -- cgit v1.2.3 From 278583055a237270fac70518275ba877bf9e4013 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jan 2022 18:42:07 +0000 Subject: KVM: arm64: Use shadow SPSR_EL1 when injecting exceptions on !VHE Injecting an exception into a guest with non-VHE is risky business. Instead of writing in the shadow register for the switch code to restore it, we override the CPU register instead. Which gets overriden a few instructions later by said restore code. The result is that although the guest correctly gets the exception, it will return to the original context in some random state, depending on what was there the first place... Boo. Fix the issue by writing to the shadow register. The original code is absolutely fine on VHE, as the state is already loaded, and writing to the shadow register in that case would actually be a bug. Fixes: bb666c472ca2 ("KVM: arm64: Inject AArch64 exceptions from HYP") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Link: https://lore.kernel.org/r/20220121184207.423426-1-maz@kernel.org --- arch/arm64/kvm/hyp/exception.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 0418399e0a20..c5d009715402 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { - write_sysreg_el1(val, SYS_SPSR); + if (has_vhe()) + write_sysreg_el1(val, SYS_SPSR); + else + __vcpu_sys_reg(vcpu, SPSR_EL1) = val; } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) -- cgit v1.2.3 From 58cd4a088e8917b4092c7011d499e277e04a6644 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jan 2022 12:12:34 +0000 Subject: arm64: vdso: Fix "no previous prototype" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If compiling the arm64 kernel with W=1 the following warning is produced: | arch/arm64/kernel/vdso/vgettimeofday.c:9:5: error: no previous prototype for ‘__kernel_clock_gettime’ [-Werror=missing-prototypes] | 9 | int __kernel_clock_gettime(clockid_t clock, | | ^~~~~~~~~~~~~~~~~~~~~~ | arch/arm64/kernel/vdso/vgettimeofday.c:15:5: error: no previous prototype for ‘__kernel_gettimeofday’ [-Werror=missing-prototypes] | 15 | int __kernel_gettimeofday(struct __kernel_old_timeval *tv, | | ^~~~~~~~~~~~~~~~~~~~~ | arch/arm64/kernel/vdso/vgettimeofday.c:21:5: error: no previous prototype for ‘__kernel_clock_getres’ [-Werror=missing-prototypes] | 21 | int __kernel_clock_getres(clockid_t clock_id, | | ^~~~~~~~~~~~~~~~~~~~~ This patch removes "-Wmissing-prototypes" and "-Wmissing-declarations" compilers flags from the compilation of vgettimeofday.c to make possible to build the kernel with CONFIG_WERROR enabled. Cc: Will Deacon Reported-by: Marc Kleine-Budde Signed-off-by: Vincenzo Frascino Tested-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20220121121234.47273-1-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 60813497a381..172452f79e46 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO +# -Wmissing-prototypes and -Wmissing-declarations are removed from +# the CFLAGS of vgettimeofday.c to make possible to build the +# kernel with CONFIG_WERROR enabled. CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) + $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations KASAN_SANITIZE := n KCSAN_SANITIZE := n UBSAN_SANITIZE := n -- cgit v1.2.3 From 94fea1d8a30eadc3ef07afc0f53dc06799bb300b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 22 Jan 2022 01:52:11 +0000 Subject: KVM: VMX: Zero host's SYSENTER_ESP iff SYSENTER is NOT used Zero vmcs.HOST_IA32_SYSENTER_ESP when initializing *constant* host state if and only if SYSENTER cannot be used, i.e. the kernel is a 64-bit kernel and is not emulating 32-bit syscalls. As the name suggests, vmx_set_constant_host_state() is intended for state that is *constant*. When SYSENTER is used, SYSENTER_ESP isn't constant because stacks are per-CPU, and the VMCS must be updated whenever the vCPU is migrated to a new CPU. The logic in vmx_vcpu_load_vmcs() doesn't differentiate between "never loaded" and "loaded on a different CPU", i.e. setting SYSENTER_ESP on VMCS load also handles setting correct host state when the VMCS is first loaded. Because a VMCS must be loaded before it is initialized during vCPU RESET, zeroing the field in vmx_set_constant_host_state() obliterates the value that was written when the VMCS was loaded. If the vCPU is run before it is migrated, the subsequent VM-Exit will zero out MSR_IA32_SYSENTER_ESP, leading to a #DF on the next 32-bit syscall. double fault: 0000 [#1] SMP CPU: 0 PID: 990 Comm: stable Not tainted 5.16.0+ #97 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 EIP: entry_SYSENTER_32+0x0/0xe7 Code: <9c> 50 eb 17 0f 20 d8 a9 00 10 00 00 74 0d 25 ff ef ff ff 0f 22 d8 EAX: 000000a2 EBX: a8d1300c ECX: a8d13014 EDX: 00000000 ESI: a8f87000 EDI: a8d13014 EBP: a8d12fc0 ESP: 00000000 DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00210093 CR0: 80050033 CR2: fffffffc CR3: 02c3b000 CR4: 00152e90 Fixes: 6ab8a4053f71 ("KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)") Cc: Lai Jiangshan Signed-off-by: Sean Christopherson Message-Id: <20220122015211.1468758-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a02a28ce7cc3..705e5c082738 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4094,10 +4094,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_write32(HOST_IA32_SYSENTER_CS, low32); /* - * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites - * HOST_IA32_SYSENTER_ESP. + * SYSENTER is used for 32-bit system calls on either 32-bit or + * 64-bit kernels. It is always zero If neither is allowed, otherwise + * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may + * have already done so!). */ - vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32)) + vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ -- cgit v1.2.3 From adb759e599990416e42e659c024a654b76c84617 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 23 Jan 2022 13:42:19 +0100 Subject: x86,kvm/xen: Remove superfluous .fixup usage Commit 14243b387137 ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery") adds superfluous .fixup usage after the whole .fixup section was removed in commit e5eefda5aa51 ("x86: Remove .fixup section"). Fixes: 14243b387137 ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery") Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Message-Id: <20220123124219.GH20638@worktop.programming.kicks-ass.net> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 0e3f7d6e9fd7..bad57535fad0 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -316,10 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotq %0\n" "\t" LOCK_PREFIX "andq %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) @@ -335,10 +332,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotl %0\n" "\t" LOCK_PREFIX "andl %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel32), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) -- cgit v1.2.3 From 72bb9dcb6c33cfac80282713c2b4f2b254cd24d1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 24 Jan 2022 08:45:37 +0530 Subject: arm64: Add Cortex-X2 CPU part definition Add the CPU Partnumbers for the new Arm designs. Cc: Will Deacon Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1642994138-25887-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 19b8441aa8f2..657eeb06c784 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -74,6 +74,7 @@ #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -116,6 +117,7 @@ #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) -- cgit v1.2.3 From eb30d838a44c9e59a2a106884f536119859c7257 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 24 Jan 2022 08:45:38 +0530 Subject: arm64: errata: Update ARM64_ERRATUM_[2119858|2224489] with Cortex-X2 ranges Errata ARM64_ERRATUM_[2119858|2224489] also affect some Cortex-X2 ranges as well. Lets update these errata definition and detection to accommodate all new Cortex-X2 based cpu MIDR ranges. Cc: Will Deacon Cc: Mathieu Poirier Cc: Suzuki Poulose Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1642994138-25887-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 12 ++++++------ arch/arm64/kernel/cpu_errata.c | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6978140edfa4..77b8f653f4bc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -671,14 +671,14 @@ config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE bool config ARM64_ERRATUM_2119858 - bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y depends on CORESIGHT_TRBE select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE help - This option adds the workaround for ARM Cortex-A710 erratum 2119858. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858. - Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in the event of a WRAP event. @@ -761,14 +761,14 @@ config ARM64_ERRATUM_2253138 If unsure, say Y. config ARM64_ERRATUM_2224489 - bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range" depends on CORESIGHT_TRBE default y select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE help - This option adds the workaround for ARM Cortex-A710 erratum 2224489. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489. - Affected Cortex-A710 cores might write to an out-of-range address, not reserved + Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved for TRBE. Under some conditions, the TRBE might generate a write to the next virtually addressed page following the last page of the TRBE address space (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e1c1aef9ebd..29cc062a4153 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -347,6 +347,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; @@ -371,6 +372,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; -- cgit v1.2.3 From 1e0924bd09916fab795fc2a21ec1d148f24299fd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 24 Jan 2022 17:17:54 +0900 Subject: arm64: Mark start_backtrace() notrace and NOKPROBE_SYMBOL Mark the start_backtrace() as notrace and NOKPROBE_SYMBOL because this function is called from ftrace and lockdep to get the caller address via return_address(). The lockdep is used in kprobes, it should also be NOKPROBE_SYMBOL. Fixes: b07f3499661c ("arm64: stacktrace: Move start_backtrace() out of the header") Cc: # 5.13.x Signed-off-by: Masami Hiramatsu Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/164301227374.1433152.12808232644267107415.stgit@devnote2 Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0fb58fed54cb..e4103e085681 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -33,8 +33,8 @@ */ -static void start_backtrace(struct stackframe *frame, unsigned long fp, - unsigned long pc) +static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) { frame->fp = fp; frame->pc = pc; @@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp, frame->prev_fp = 0; frame->prev_type = STACK_TYPE_UNKNOWN; } +NOKPROBE_SYMBOL(start_backtrace); /* * Unwind from one frame record (A) to the next frame record (B). -- cgit v1.2.3 From 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 22 Jan 2022 20:55:30 +1000 Subject: KVM: PPC: Book3S HV Nested: Fix nested HFSCR being clobbered with multiple vCPUs The L0 is storing HFSCR requested by the L1 for the L2 in struct kvm_nested_guest when the L1 requests a vCPU enter L2. kvm_nested_guest is not a per-vCPU structure. Hilarity ensues. Fix it by moving the nested hfscr into the vCPU structure together with the other per-vCPU nested fields. Fixes: 8b210a880b35 ("KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220122105530.3477250-1-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 1 - arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/book3s_hv.c | 3 +-- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fe07558173ef..827038a33064 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -39,7 +39,6 @@ struct kvm_nested_guest { pgd_t *shadow_pgtable; /* our page table for this guest */ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ u64 process_table; /* process table entry for this guest */ - u64 hfscr; /* HFSCR that the L1 requested for this nested guest */ long refcnt; /* number of pointers to this struct */ struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a770443cd6e0..d9bf60bf0816 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -818,6 +818,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; + u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ u32 nested_vcpu_id; gpa_t nested_io_gpr; #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d1817cd9a691..84c89f08ae9a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { - struct kvm_nested_guest *nested = vcpu->arch.nested; int r; int srcu_idx; @@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * it into a HEAI. */ if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || - (nested->hfscr & (1UL << cause))) { + (vcpu->arch.nested_hfscr & (1UL << cause))) { vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8f8daaeeb3b7..9d373f8963ee 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* set L1 state to L2 state */ vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; - l2->hfscr = l2_hv.hfscr; + vcpu->arch.nested_hfscr = l2_hv.hfscr; vcpu->arch.regs = l2_regs; /* Guest must always run with ME enabled, HV disabled. */ -- cgit v1.2.3 From 8defc2a5dd8f4c0cb19ecbaca8d3e89ab98524da Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Jan 2022 00:39:28 +1000 Subject: powerpc/64s/interrupt: Fix decrementer storm The decrementer exception can fail to be cleared when the interrupt returns in the case where the decrementer wraps with the next timer still beyond decrementer_max. This results in a decrementer interrupt storm. This is triggerable with small decrementer system with hard and soft watchdogs disabled. Fix this by always programming the decrementer if there was no timer. Fixes: 0faf20a1ad16 ("powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless perf is in use") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220124143930.3923442-1-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 62361cc7281c..cd0b8b71ecdd 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } -- cgit v1.2.3 From 5c89be1dd5cfb697614bc13626ba3bd0781aa160 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 24 Jan 2022 11:36:05 +0100 Subject: KVM: x86: Move CPUID.(EAX=0x12,ECX=1) mangling to __kvm_update_cpuid_runtime() Full equality check of CPUID data on update (kvm_cpuid_check_equal()) may fail for SGX enabled CPUs as CPUID.(EAX=0x12,ECX=1) is currently being mangled in kvm_vcpu_after_set_cpuid(). Move it to __kvm_update_cpuid_runtime() and split off cpuid_get_supported_xcr0() helper as 'vcpu->arch.guest_supported_xcr0' update needs (logically) to stay in kvm_vcpu_after_set_cpuid(). Cc: stable@vger.kernel.org Fixes: feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN") Signed-off-by: Vitaly Kuznetsov Message-Id: <20220124103606.2630588-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 54 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 3902c28fb6cb..89d7822a8f5b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -196,10 +196,26 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) vcpu->arch.pv_cpuid.features = best->eax; } +/* + * Calculate guest's supported XCR0 taking into account guest CPUID data and + * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0). + */ +static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) +{ + struct kvm_cpuid_entry2 *best; + + best = cpuid_entry2_find(entries, nent, 0xd, 0); + if (!best) + return 0; + + return (best->eax | ((u64)best->edx << 32)) & supported_xcr0; +} + static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, int nent) { struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); best = cpuid_entry2_find(entries, nent, 1, 0); if (best) { @@ -238,6 +254,21 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } + + /* + * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate + * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's + * requested XCR0 value. The enclave's XFRM must be a subset of XCRO + * at the time of EENTER, thus adjust the allowed XFRM by the guest's + * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to + * '1' even on CPUs that don't support XSAVE. + */ + best = cpuid_entry2_find(entries, nent, 0x12, 0x1); + if (best) { + best->ecx &= guest_supported_xcr0 & 0xffffffff; + best->edx &= guest_supported_xcr0 >> 32; + best->ecx |= XFEATURE_MASK_FPSSE; + } } void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) @@ -261,27 +292,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - best = kvm_find_cpuid_entry(vcpu, 0xD, 0); - if (!best) - vcpu->arch.guest_supported_xcr0 = 0; - else - vcpu->arch.guest_supported_xcr0 = - (best->eax | ((u64)best->edx << 32)) & supported_xcr0; - - /* - * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate - * the supported XSAVE