summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/idtentry.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h95
-rw-r--r--arch/x86/include/asm/kvm_para.h3
-rw-r--r--arch/x86/include/asm/kvm_types.h7
-rw-r--r--arch/x86/include/asm/qspinlock.h1
-rw-r--r--arch/x86/kernel/kvm.c118
-rw-r--r--arch/x86/kvm/cpuid.c115
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.h34
-rw-r--r--arch/x86/kvm/mmu/mmu.c461
-rw-r--r--arch/x86/kvm/mmu/mmu_audit.c (renamed from arch/x86/kvm/mmu_audit.c)12
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h63
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h (renamed from arch/x86/kvm/mmutrace.h)2
-rw-r--r--arch/x86/kvm/mmu/page_track.c2
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h19
-rw-r--r--arch/x86/kvm/pmu.c5
-rw-r--r--arch/x86/kvm/pmu.h2
-rw-r--r--arch/x86/kvm/svm/avic.c2
-rw-r--r--arch/x86/kvm/svm/nested.c142
-rw-r--r--arch/x86/kvm/svm/sev.c47
-rw-r--r--arch/x86/kvm/svm/svm.c262
-rw-r--r--arch/x86/kvm/svm/svm.h32
-rw-r--r--arch/x86/kvm/svm/vmenter.S2
-rw-r--r--arch/x86/kvm/vmx/nested.c149
-rw-r--r--arch/x86/kvm/vmx/ops.h4
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c17
-rw-r--r--arch/x86/kvm/vmx/vmenter.S5
-rw-r--r--arch/x86/kvm/vmx/vmx.c209
-rw-r--r--arch/x86/kvm/vmx/vmx.h12
-rw-r--r--arch/x86/kvm/x86.c231
-rw-r--r--arch/x86/kvm/x86.h34
-rw-r--r--arch/x86/xen/spinlock.c4
35 files changed, 1282 insertions, 831 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fd03cefabd34..9a2849527dd7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -803,6 +803,7 @@ config KVM_GUEST
depends on PARAVIRT
select PARAVIRT_CLOCK
select ARCH_CPUIDLE_HALTPOLL
+ select X86_HV_CALLBACK_VECTOR
default y
help
This option enables various optimizations for running under the KVM
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 07533795b8d2..275e7fd20310 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -67,12 +67,12 @@ static inline void kvm_set_cpu_l1tf_flush_l1d(void)
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
-static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
+static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
}
-static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
+static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
{
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index ff198fc2495e..a43366191212 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -632,6 +632,10 @@ DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback);
#endif
+#ifdef CONFIG_KVM_GUEST
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt);
+#endif
+
#undef X86_TRAP_OTHER
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index be5363b21540..5ab3af7275d8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -193,8 +193,6 @@ struct x86_exception;
enum x86_intercept;
enum x86_intercept_stage;
-#define KVM_NR_MEM_OBJS 40
-
#define KVM_NR_DB_REGS 4
#define DR6_BD (1 << 13)
@@ -246,15 +244,6 @@ enum x86_intercept_stage;
struct kvm_kernel_irq_routing_entry;
/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
- int nobjs;
- void *objects[KVM_NR_MEM_OBJS];
-};
-
-/*
* the pages used as guest page table on soft mmu are tracked by
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
* by indirect shadow page can not be more than 15 bits.
@@ -322,43 +311,6 @@ struct kvm_rmap_head {
unsigned long val;
};
-struct kvm_mmu_page {
- struct list_head link;
- struct hlist_node hash_link;
- struct list_head lpage_disallowed_link;
-
- bool unsync;
- u8 mmu_valid_gen;
- bool mmio_cached;
- bool lpage_disallowed; /* Can't be replaced by an equiv large page */
-
- /*
- * The following two entries are used to key the shadow page in the
- * hash table.
- */
- union kvm_mmu_page_role role;
- gfn_t gfn;
-
- u64 *spt;
- /* hold the gfn of each spte inside spt */
- gfn_t *gfns;
- int root_count; /* Currently serving as active root */
- unsigned int unsync_children;
- struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
- DECLARE_BITMAP(unsync_child_bitmap, 512);
-
-#ifdef CONFIG_X86_32
- /*
- * Used out of the mmu-lock to avoid reading spte values while an
- * update is in progress; see the comments in __get_spte_lockless().
- */
- int clear_spte_count;
-#endif
-
- /* Number of writes since the last time traversal visited this page. */
- atomic_t write_flooding_count;
-};
-
struct kvm_pio_request {
unsigned long linear_rip;
unsigned long count;
@@ -384,6 +336,8 @@ struct kvm_mmu_root_info {
#define KVM_MMU_NUM_PREV_ROOTS 3
+struct kvm_mmu_page;
+
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
@@ -580,6 +534,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
+ unsigned long cr4_guest_rsvd_bits;
unsigned long cr8;
u32 host_pkru;
u32 pkru;
@@ -635,7 +590,8 @@ struct kvm_vcpu_arch {
struct kvm_mmu *walk_mmu;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
- struct kvm_mmu_memory_cache mmu_page_cache;
+ struct kvm_mmu_memory_cache mmu_shadow_page_cache;
+ struct kvm_mmu_memory_cache mmu_gfn_array_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
/*
@@ -683,7 +639,7 @@ struct kvm_vcpu_arch {
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
int maxphyaddr;
- int tdp_level;
+ int max_tdp_level;
/* emulate context */
@@ -827,6 +783,9 @@ struct kvm_vcpu_arch {
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
+ /* Host CPU on which VM-entry was most recently attempted */
+ unsigned int last_vmentry_cpu;
+
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
};
@@ -1083,7 +1042,7 @@ struct kvm_x86_ops {
void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
bool (*has_emulated_msr)(u32 index);
- void (*cpuid_update)(struct kvm_vcpu *vcpu);
+ void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
@@ -1098,7 +1057,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
+ void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -1174,10 +1133,10 @@ struct kvm_x86_ops {
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
- int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
- void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3);
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
+ int pgd_level);
bool (*has_wbinvd_exit)(void);
@@ -1220,7 +1179,6 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
- int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
@@ -1281,6 +1239,7 @@ struct kvm_x86_nested_ops {
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
@@ -1304,7 +1263,7 @@ struct kvm_arch_async_pf {
};
extern u64 __read_mostly host_efer;
-
+extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
#define __KVM_HAVE_ARCH_VM_ALLOC
@@ -1549,20 +1508,8 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
bool skip_mmu_sync);
-void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
-
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception)
-{
- return gpa;
-}
-
-static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
-{
- struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
-
- return (struct kvm_mmu_page *)page_private(page);
-}
+void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
+ int tdp_huge_page_level);
static inline u16 kvm_read_ldt(void)
{
@@ -1636,7 +1583,15 @@ asmlinkage void kvm_spurious_fault(void);
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
+ "1: \n\t" \
+ ".pushsection .discard.instr_begin \n\t" \
+ ".long 1b - . \n\t" \
+ ".popsection \n\t" \
"call kvm_spurious_fault \n\t" \
+ "1: \n\t" \
+ ".pushsection .discard.instr_end \n\t" \
+ ".long 1b - . \n\t" \
+ ".popsection \n\t" \
"668: \n\t" \
_ASM_EXTABLE(666b, 667b)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 49d3a9edb06f..338119852512 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
+#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -18,7 +19,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
#endif /* CONFIG_KVM_GUEST */
#define KVM_HYPERCALL \
- ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+ ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h
new file mode 100644
index 000000000000..08f1b57d3b62
--- /dev/null
+++ b/arch/x86/include/asm/kvm_types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_TYPES_H
+#define _ASM_X86_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_X86_KVM_TYPES_H */
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 444d6fd9a6d8..d86ab942219c 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -32,6 +32,7 @@ extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+extern bool nopvspin;
#define queued_spin_unlock queued_spin_unlock
/**
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 233c77d056c9..08320b0b2b27 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -7,8 +7,11 @@
* Authors: Anthony Liguori <aliguori@us.ibm.com>
*/
+#define pr_fmt(fmt) "kvm-guest: " fmt
+
#include <linux/context_tracking.h>
#include <linux/init.h>
+#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
#include <linux/cpu.h>
@@ -232,16 +235,11 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
- u32 reason = kvm_read_and_reset_apf_flags();
+ u32 flags = kvm_read_and_reset_apf_flags();
irqentry_state_t state;
- switch (reason) {
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
- case KVM_PV_REASON_PAGE_READY:
- break;
- default:
+ if (!flags)
return false;
- }
state = irqentry_enter(regs);
instrumentation_begin();
@@ -254,13 +252,13 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
panic("Host injected async #PF in interrupt disabled region\n");
- if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
+ if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
if (unlikely(!(user_mode(regs))))
panic("Host injected async #PF in kernel mode\n");
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
- kvm_async_pf_task_wake(token);
+ WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
}
instrumentation_end();
@@ -268,6 +266,27 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
return true;
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ u32 token;
+ irqentry_state_t state;
+
+ state = irqentry_enter(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (__this_cpu_read(apf_reason.enabled)) {
+ token = __this_cpu_read(apf_reason.token);
+ kvm_async_pf_task_wake(token);
+ __this_cpu_write(apf_reason.token, 0);
+ wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ }
+
+ irqentry_exit(regs, state);
+ set_irq_regs(old_regs);
+}
+
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@@ -289,8 +308,8 @@ static void kvm_register_steal_time(void)
return;
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
- pr_info("kvm-stealtime: cpu %d, msr %llx\n",
- cpu, (unsigned long long) slow_virt_to_phys(st));
+ pr_info("stealtime: cpu %d, msr %llx\n", cpu,
+ (unsigned long long) slow_virt_to_phys(st));
}
static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@@ -311,17 +330,19 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
- if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
- u64 pa;
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
+ u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
- pa |= KVM_ASYNC_PF_ENABLED;
+ pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+ wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
@@ -493,7 +514,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
} else {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
- WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
+ WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
+ ret);
min = max = apic_id;
ipi_bitmap = 0;
}
@@ -503,7 +525,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
if (ipi_bitmap) {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
- WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
+ WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
+ ret);
}
local_irq_restore(flags);
@@ -533,7 +556,7 @@ static void kvm_setup_pv_ipi(void)
{
apic->send_IPI_mask = kvm_send_ipi_mask;
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
- pr_info("KVM setup pv IPIs\n");
+ pr_info("setup PV IPIs\n");
}
static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
@@ -551,13 +574,6 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
}
}
-static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
-{
- native_smp_prepare_cpus(max_cpus);
- if (kvm_para_has_hint(KVM_HINTS_REALTIME))
- static_branch_disable(&virt_spin_lock_key);
-}
-
static void __init kvm_smp_prepare_boot_cpu(void)
{
/*
@@ -646,19 +662,20 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
- if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
static_branch_enable(&kvm_async_pf_enabled);
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
+ }
#ifdef CONFIG_SMP
- smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (pv_sched_yield_supported()) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
- pr_info("KVM setup pv sched yield\n");
+ pr_info("setup PV sched yield\n");
}
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
- pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
+ pr_err("failed to install cpu hotplug callbacks\n");
#else
sev_map_percpu_data();
kvm_guest_cpu_init();
@@ -854,16 +871,36 @@ asm(
*/
void __init kvm_spinlock_init(void)
{
- /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
- if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+ /*
+ * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
+ * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
+ * preferred over native qspinlock when vCPU is preempted.
+ */
+ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
+ pr_info("PV spinlocks disabled, no host support\n");
return;
+ }
- if (kvm_para_has_hint(KVM_HINTS_REALTIME))
- return;
+ /*
+ * Disable PV spinlocks and use native qspinlock when dedicated pCPUs
+ * are available.
+ */
+ if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
+ pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
+ goto out;
+ }
- /* Don't use the pvqspinlock code if there is only 1 vCPU. */
- if (num_possible_cpus() == 1)
- return;
+ if (num_possible_cpus() == 1) {
+ pr_info("PV spinlocks disabled, single CPU\n");
+ goto out;
+ }
+
+ if (nopvspin) {
+ pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
+ goto out;
+ }
+
+ pr_info("PV spinlocks enabled\n");
__pv_init_lock_hash();
pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
@@ -876,6 +913,13 @@ void __init kvm_spinlock_init(void)
pv_ops.lock.vcpu_is_preempted =
PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
}
+ /*
+ * When PV spinlock is enabled which is preferred over
+ * virt_spin_lock(), virt_spin_lock_key's value is meaningless.
+ * Just disable it anyway.
+ */
+out:
+ static_branch_disable(&virt_spin_lock_key);
}
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
@@ -895,8 +939,8 @@ static void kvm_enable_host_haltpoll(void *i)
void arch_haltpoll_enable(unsigned int cpu)
{
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
- pr_err_once("kvm: host does not support poll control\n");
- pr_err_once("kvm: host upgrade recommended\n");
+ pr_err_once("host does not support poll control\n");
+ pr_err_once("host upgrade recommended\n");
return;
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8a294f9747aa..fa873e3e6e90 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -54,28 +54,38 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
#define F feature_bit
-int kvm_update_cpuid(struct kvm_vcpu *vcpu)
+static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- struct kvm_lapic *apic = vcpu->arch.apic;
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
- if (!best)
- return 0;
+ /*
+ * The existing code assumes virtual address is 48-bit or 57-bit in the
+ * canonical address checks; exit if it is ever changed.
+ */
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best) {
+ int vaddr_bits = (best->eax & 0xff00) >> 8;
+
+ if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
+ return -EINVAL;
+ }
- /* Update OSXSAVE bit */
- if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
- cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+ return 0;
+}
+
+void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ if (best) {
+ /* Update OSXSAVE bit */
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
+ cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
- cpuid_entry_change(best, X86_FEATURE_APIC,
+ cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
-
- if (apic) {
- if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
- apic->lapic_timer.timer_mode_mask = 3 << 17;
- else
- apic->lapic_timer.timer_mode_mask = 1 << 17;
}
best = kvm_find_cpuid_entry(vcpu, 7, 0);
@@ -84,31 +94,14 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
- if (!best) {
- vcpu->arch.guest_supported_xcr0 = 0;
- } else {
- vcpu->arch.guest_supported_xcr0 =
- (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+ if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
- }
best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- /*
- * The existing code assumes virtual address is 48-bit or 57-bit in the
- * canonical address checks; exit if it is ever changed.
- */
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best) {
- int vaddr_bits = (best->eax & 0xff00) >> 8;
-
- if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
- return -EINVAL;
- }
-
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
@@ -121,14 +114,39 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
+}
+
+static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ struct kvm_cpuid_entry2 *best;
+
+ kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ if (best && apic) {
+ if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
+ apic->lapic_timer.timer_mode_mask = 3 << 17;
+ else
+ apic->lapic_timer.timer_mode_mask = 1 << 17;
+
+ kvm_apic_set_version(vcpu);
+ }
+
+ best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+ if (!best)
+ vcpu->arch.guest_supported_xcr0 = 0;
+ else
+ vcpu->arch.guest_supported_xcr0 =
+ (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
- /* Note, maxphyaddr must be updated before tdp_level. */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
- vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
kvm_mmu_reset_context(vcpu);
kvm_pmu_refresh(vcpu);
- return 0;
+ vcpu->arch.cr4_guest_rsvd_bits =
+ __cr4_reserved_bits(guest_cpuid_has, vcpu);
+ kvm_x86_ops.update_exception_bitmap(vcpu);
}
static int is_efer_nx(void)
@@ -203,10 +221,16 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_entries[i].padding[2] = 0;
}
vcpu->arch.cpuid_nent = cpuid->nent;
+ r = kvm_check_cpuid(vcpu);
+ if (r) {
+ vcpu->arch.cpuid_nent = 0;
+ kvfree(cpuid_entries);
+ goto out;
+ }
+
cpuid_fix_nx_cap(vcpu);
- kvm_apic_set_version(vcpu);
- kvm_x86_ops.cpuid_update(vcpu);
- r = kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
kvfree(cpuid_entries);
out:
@@ -227,9 +251,14 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
vcpu->arch.cpuid_nent = cpuid->nent;
- kvm_apic_set_version(vcpu);
- kvm_x86_ops.cpuid_update(vcpu);
- r = kvm_update_cpuid(vcpu);
+ r = kvm_check_cpuid(vcpu);
+ if (r) {
+ vcpu->arch.cpuid_nent = 0;
+ goto out;
+ }
+
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
out:
return r;
}
@@ -604,7 +633,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
eax.split.bit_width = cap.bit_width_gp;
eax.split.mask_length = cap.events_mask_len;
- edx.split.num_counters_fixed = cap.num_counters_fixed;
+ edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
edx.split.reserved = 0;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 05434cd9342f..3a923ae15f2f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -9,7 +9,7 @@
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
-int kvm_update_cpuid(struct kvm_vcpu *vcpu);
+void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4ce2ddd26c0b..5ccbee7165a2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -354,7 +354,6 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- struct kvm_cpuid_entry2 *feat;
u32 v = APIC_VERSION;
if (!lapic_in_kernel(vcpu))
@@ -367,8 +366,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* version first and level-triggered interrupts never get EOIed in
* IOAPIC.
*/
- feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+ if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
@@ -2068,7 +2066,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
- kvm_lapic_set_reg(apic, APIC_TDCR, val);
+ kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
apic->lapic_timer.period) {
@@ -2085,7 +2083,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_SELF_IPI:
if (apic_x2apic_mode(apic)) {
- kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
+ kvm_lapic_reg_write(apic, APIC_ICR,
+ APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
} else
ret = 1;
break;
@@ -2232,7 +2231,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
- kvm_update_cpuid(vcpu