diff options
68 files changed, 1023 insertions, 1006 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4c74a2f4ddfc..6d380890e075 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1390,6 +1390,11 @@ hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs. If specified, z/VM IUCV HVC accepts connections from listed z/VM user IDs only. + + hv_nopvspin [X86,HYPER_V] Disables the paravirt spinlock optimizations + which allow the hypervisor to 'idle' the + guest on lock contention. + keep_bootcon [KNL] Do not unregister boot console at start. This is only useful for debugging when something happens in the window diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h index d51e5cd31d01..cdbf02d9c1d4 100644 --- a/arch/arm/include/asm/paravirt.h +++ b/arch/arm/include/asm/paravirt.h @@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled; struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); }; -extern struct pv_time_ops pv_time_ops; + +struct paravirt_patch_template { + struct pv_time_ops time; +}; + +extern struct paravirt_patch_template pv_ops; static inline u64 paravirt_steal_clock(int cpu) { - return pv_time_ops.steal_clock(cpu); + return pv_ops.time.steal_clock(cpu); } #endif diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c index 53f371ed4568..75c158b0353f 100644 --- a/arch/arm/kernel/paravirt.c +++ b/arch/arm/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 07060e5b5864..17e478928276 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -62,29 +62,6 @@ static __read_mostly unsigned int xen_events_irq; uint32_t xen_start_flags; EXPORT_SYMBOL(xen_start_flags); -int xen_remap_domain_gfn_array(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t *gfn, int nr, - int *err_ptr, pgprot_t prot, - unsigned domid, - struct page **pages) -{ - return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr, - prot, domid, pages); -} -EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); - -/* Not used by XENFEAT_auto_translated guests. */ -int xen_remap_domain_gfn_range(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t gfn, int nr, - pgprot_t prot, unsigned domid, - struct page **pages) -{ - return -ENOSYS; -} -EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); - int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) { @@ -92,17 +69,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); -/* Not used by XENFEAT_auto_translated guests. */ -int xen_remap_domain_mfn_array(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t *mfn, int nr, - int *err_ptr, pgprot_t prot, - unsigned int domid, struct page **pages) -{ - return -ENOSYS; -} -EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); - static void xen_read_wallclock(struct timespec64 *ts) { u32 version; diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index bb5dcea42003..799d9dd6f7cc 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled; struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); }; -extern struct pv_time_ops pv_time_ops; + +struct paravirt_patch_template { + struct pv_time_ops time; +}; + +extern struct paravirt_patch_template pv_ops; static inline u64 paravirt_steal_clock(int cpu) { - return pv_time_ops.steal_clock(cpu); + return pv_ops.time.steal_clock(cpu); } #endif diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 53f371ed4568..75c158b0353f 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ff425a2d286c..46d42af4501a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -523,6 +523,7 @@ config X86_VSMP bool "ScaleMP vSMP" select HYPERVISOR_GUEST select PARAVIRT + select PARAVIRT_XXL depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM depends on SMP @@ -753,6 +754,9 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_XXL + bool + config PARAVIRT_DEBUG bool "paravirt-ops debugging" depends on PARAVIRT && DEBUG_KERNEL diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index a423bdb42686..a1d5918765f3 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -9,6 +9,7 @@ * paravirt and debugging variants are added.) */ #undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index fbbf1ba57ec6..687e47f8a796 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -783,7 +783,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * will ignore all of the single-step traps generated in this range. */ -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV /* * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. @@ -1241,7 +1241,7 @@ ENTRY(spurious_interrupt_bug) jmp common_exception END(spurious_interrupt_bug) -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV ENTRY(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL @@ -1322,11 +1322,13 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(3b, 8b) _ASM_EXTABLE(4b, 9b) ENDPROC(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ +#ifdef CONFIG_XEN_PVHVM BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) +#endif -#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f95dcb209fdf..7c5ce0a6c4d2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1050,7 +1050,7 @@ ENTRY(do_softirq_own_stack) ret ENDPROC(do_softirq_own_stack) -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 /* @@ -1130,11 +1130,13 @@ ENTRY(xen_failsafe_callback) ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ +#ifdef CONFIG_XEN_PVHVM apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall +#endif -#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ @@ -1151,7 +1153,7 @@ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV idtentry xennmi do_nmi has_error_code=0 idtentry xendebug do_debug has_error_code=0 idtentry xenint3 do_int3 has_error_code=0 diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index b21ee65c4101..1c11f9420a82 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,2 +1,6 @@ obj-y := hv_init.o mmu.o nested.o obj-$(CONFIG_X86_64) += hv_apic.o + +ifdef CONFIG_X86_64 +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o +endif diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c new file mode 100644 index 000000000000..a861b0456b1a --- /dev/null +++ b/arch/x86/hyperv/hv_spinlock.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific spinlock code. + * + * Copyright (C) 2018, Intel, Inc. + * + * Author : Yi Sun <yi.y.sun@intel.com> + */ + +#define pr_fmt(fmt) "Hyper-V: " fmt + +#include <linux/spinlock.h> + +#include <asm/mshyperv.h> +#include <asm/paravirt.h> +#include <asm/apic.h> + +static bool __initdata hv_pvspin = true; + +static void hv_qlock_kick(int cpu) +{ + apic->send_IPI(cpu, X86_PLATFORM_IPI_VECTOR); +} + +static void hv_qlock_wait(u8 *byte, u8 val) +{ + unsigned long msr_val; + unsigned long flags; + + if (in_nmi()) + return; + + /* + * Reading HV_X64_MSR_GUEST_IDLE MSR tells the hypervisor that the + * vCPU can be put into 'idle' state. This 'idle' state is + * terminated by an IPI, usually from hv_qlock_kick(), even if + * interrupts are disabled on the vCPU. + * + * To prevent a race against the unlock path it is required to + * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE + * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between + * the lock value check and the rdmsrl() then the vCPU might be put + * into 'idle' state by the hypervisor and kept in that state for + * an unspecified amount of time. + */ + local_irq_save(flags); + /* + * Only issue the rdmsrl() when the lock state has not changed. + */ + if (READ_ONCE(*byte) == val) + rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val); + local_irq_restore(flags); +} + +/* + * Hyper-V does not support this so far. + */ +bool hv_vcpu_is_preempted(int vcpu) +{ + return false; +} +PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted); + +void __init hv_init_spinlocks(void) +{ + if (!hv_pvspin || !apic || + !(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) || + !(ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)) { + pr_info("PV spinlocks disabled\n"); + return; + } + pr_info("PV spinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops.lock.wait = hv_qlock_wait; + pv_ops.lock.kick = hv_qlock_kick; + pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted); +} + +static __init int hv_parse_nopvspin(char *arg) +{ + hv_pvspin = false; + return 0; +} +early_param("hv_nopvspin", hv_parse_nopvspin); diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index ef5f29f913d7..e65d7fe6489f 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -231,6 +231,6 @@ void hyperv_setup_mmu_ops(void) return; pr_info("Using hypercall for remote TLB flush\n"); - pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; - pv_mmu_ops.tlb_remove_table = tlb_remove_table; + pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others; + pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4505ac2735ad..9e5ca30738e5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -8,7 +8,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL /* * These special macros can be used to get or set a debugging register */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 13c5ee878a47..68a99d2a5f33 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -108,7 +108,7 @@ static inline int desc_empty(const void *ptr) return !(desc[0] | desc[1]); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define load_TR_desc() native_load_tr_desc() @@ -134,7 +134,7 @@ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { } -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6390bd8c141b..50ba74a34a37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -162,7 +162,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 00e01d215f74..4139f7650fe5 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -38,6 +38,8 @@ #define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) /* Partition reference TSC MSR is available */ #define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) +/* Partition Guest IDLE MSR is available */ +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE (1 << 10) /* A partition's reference time stamp counter (TSC) page */ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 @@ -246,6 +248,9 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest idle MSR */ +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 + /* Hyper-V guest crash notification MSR's */ #define HV_X64_MSR_CRASH_P0 0x40000100 #define HV_X64_MSR_CRASH_P1 0x40000101 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 15450a675031..058e40fed167 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -64,7 +64,7 @@ static inline __cpuidle void native_halt(void) #endif -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #ifndef __ASSEMBLY__ @@ -123,6 +123,10 @@ static inline notrace unsigned long arch_local_irq_save(void) #define DISABLE_INTERRUPTS(x) cli #ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif + #define SWAPGS swapgs /* * Currently paravirt can't handle swapgs nicely when we @@ -135,8 +139,6 @@ static inline notrace unsigned long arch_local_irq_save(void) */ #define SWAPGS_UNSAFE_STACK swapgs -#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ - #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ @@ -145,18 +147,12 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl -#ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(x) pushfq; popq %rax -#endif #else #define INTERRUPT_RETURN iret -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define GET_CR0_INTO_EAX movl %cr0, %eax #endif - #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ static inline int arch_irqs_disabled_flags(unsigned long flags) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index eeeb9289c764..0ca50611e8ce 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -16,12 +16,12 @@ extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { } -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ #ifdef CONFIG_PERF_EVENTS diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f37704497d8f..0d6271cce198 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -351,6 +351,8 @@ int hyperv_flush_guest_mapping(u64 as); #ifdef CONFIG_X86_64 void hv_apic_init(void); +void __init hv_init_spinlocks(void); +bool hv_vcpu_is_preempted(int vcpu); #else static inline void hv_apic_init(void) {} #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 04addd6e0a4a..91e4cf189914 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -242,7 +242,7 @@ static inline unsigned long long native_read_pmc(int counter) return EAX_EDX_VAL(val, low, high); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #include <linux/errno.h> @@ -305,7 +305,7 @@ do { \ #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ /* * 64-bit version of wrmsr_safe(): |