summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst4
-rw-r--r--arch/x86/include/asm/kvm_host.h15
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/uapi/asm/kvm.h2
-rw-r--r--arch/x86/kernel/kvm.c129
-rw-r--r--arch/x86/kernel/kvmclock.c26
-rw-r--r--arch/x86/kvm/cpuid.c20
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/kvm_emulate.h1
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c20
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c17
-rw-r--r--arch/x86/kvm/svm/nested.c23
-rw-r--r--arch/x86/kvm/svm/sev.c32
-rw-r--r--arch/x86/kvm/svm/svm.c62
-rw-r--r--arch/x86/kvm/svm/svm.h1
-rw-r--r--arch/x86/kvm/vmx/capabilities.h3
-rw-r--r--arch/x86/kvm/vmx/nested.c29
-rw-r--r--arch/x86/kvm/vmx/vmx.c220
-rw-r--r--arch/x86/kvm/vmx/vmx.h12
-rw-r--r--arch/x86/kvm/x86.c153
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c16
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/tree-log.c3
-rw-r--r--fs/btrfs/zoned.c5
-rw-r--r--tools/arch/powerpc/include/uapi/asm/errno.h1
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h9
-rw-r--r--tools/arch/x86/include/asm/msr-index.h4
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--tools/arch/x86/lib/memcpy_64.S2
-rw-r--r--tools/arch/x86/lib/memset_64.S2
-rw-r--r--tools/include/asm/alternative.h (renamed from tools/include/asm/alternative-asm.h)0
-rw-r--r--tools/include/uapi/asm-generic/unistd.h11
-rw-r--r--tools/include/uapi/drm/drm.h125
-rw-r--r--tools/include/uapi/drm/i915_drm.h1
-rw-r--r--tools/include/uapi/linux/kvm.h45
-rw-r--r--tools/include/uapi/linux/perf_event.h26
-rw-r--r--tools/include/uapi/linux/prctl.h4
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt2
-rw-r--r--tools/perf/Makefile.config1
-rw-r--r--tools/perf/arch/arm64/util/kvm-stat.c2
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl5
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl4
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl4
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl4
-rw-r--r--tools/perf/pmu-events/jevents.c6
-rw-r--r--tools/perf/tests/attr/base-record2
-rw-r--r--tools/perf/tests/attr/base-stat2
-rw-r--r--tools/perf/tests/attr/system-wide-dummy2
-rw-r--r--tools/perf/util/Build7
-rw-r--r--tools/perf/util/record.c8
-rw-r--r--tools/perf/util/session.c4
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/handlers.S4
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c88
-rw-r--r--virt/kvm/kvm_main.c7
62 files changed, 859 insertions, 390 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 22d077562149..7fcb2fd38f42 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
4.126 KVM_X86_SET_MSR_FILTER
----------------------------
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
:Architectures: x86
:Type: vm ioctl
:Parameters: struct kvm_msr_filter
@@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
KVM_EXIT_X86_WRMSR exit notifications.
-8.27 KVM_X86_SET_MSR_FILTER
+8.27 KVM_CAP_X86_MSR_FILTER
---------------------------
:Architectures: x86
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cbbcee0a84f9..55efbacfc244 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
#define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
@@ -199,6 +200,7 @@ enum x86_intercept_stage;
#define KVM_NR_DB_REGS 4
+#define DR6_BUS_LOCK (1 << 11)
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
#define DR6_BT (1 << 15)
@@ -212,7 +214,7 @@ enum x86_intercept_stage;
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
*/
#define DR6_ACTIVE_LOW 0xffff0ff0
-#define DR6_VOLATILE 0x0001e00f
+#define DR6_VOLATILE 0x0001e80f
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
#define DR7_BP_EN_MASK 0x000000ff
@@ -407,7 +409,7 @@ struct kvm_mmu {
u32 pkru_mask;
u64 *pae_root;
- u64 *lm_root;
+ u64 *pml4_root;
/*
* check zero bits on shadow page table entries, these
@@ -1417,6 +1419,7 @@ struct kvm_arch_async_pf {
bool direct_map;
};
+extern u32 __read_mostly kvm_nr_uret_msrs;
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
@@ -1775,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+ return kvm_find_user_return_msr(msr) >= 0;
+}
+
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 338119852512..69299878b200 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,8 +7,6 @@
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
-extern void kvmclock_init(void);
-
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
@@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
}
#ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
return 0;
}
-static inline void kvm_disable_steal_time(void)
-{
- return;
-}
-
static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
return false;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..0662f644aad9 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags;
} smm;
+ __u16 pad;
+
__u32 flags;
__u64 preemption_timer_deadline;
};
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d307c22e5c18..a26643dc6bd6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,6 +26,7 @@
#include <linux/kprobes.h>
#include <linux/nmi.h>
#include <linux/swait.h>
+#include <linux/syscore_ops.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -37,6 +38,7 @@
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
#include <asm/ptrace.h>
+#include <asm/reboot.h>
#include <asm/svm.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
- pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
+ pr_info("setup async PF for cpu %d\n", smp_processor_id());
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
- pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+ pr_info("disable async PF for cpu %d\n", smp_processor_id());
}
-static void kvm_pv_guest_cpu_reboot(void *unused)
+static void kvm_disable_steal_time(void)
{
- /*
- * We disable PV EOI before we load a new kernel by kexec,
- * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
- * New kernel can re-enable when it boots.
- */
- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
- wrmsrl(MSR_KVM_PV_EOI_EN, 0);
- kvm_pv_disable_apf();
- kvm_disable_steal_time();
-}
+ if (!has_steal_clock)
+ return;
-static int kvm_pv_reboot_notify(struct notifier_block *nb,
- unsigned long code, void *unused)
-{
- if (code == SYS_RESTART)
- on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
- return NOTIFY_DONE;
+ wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}
-static struct notifier_block kvm_pv_reboot_nb = {
- .notifier_call = kvm_pv_reboot_notify,
-};
-
static u64 kvm_steal_clock(int cpu)
{
u64 steal;
@@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
return steal;
}
-void kvm_disable_steal_time(void)
-{
- if (!has_steal_clock)
- return;
-
- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
{
early_set_memory_decrypted((unsigned long) ptr, size);
@@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
}
}
+static void kvm_guest_cpu_offline(bool shutdown)
+{
+ kvm_disable_steal_time();
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
+ if (!shutdown)
+ apf_task_wake_all();
+ kvmclock_disable();
+}
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kvm_guest_cpu_init();
+ local_irq_restore(flags);
+ return 0;
+}
+
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void)
kvm_spinlock_init();
}
-static void kvm_guest_cpu_offline(void)
+static int kvm_cpu_down_prepare(unsigned int cpu)
{
- kvm_disable_steal_time();
- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
- wrmsrl(MSR_KVM_PV_EOI_EN, 0);
- kvm_pv_disable_apf();
- apf_task_wake_all();
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kvm_guest_cpu_offline(false);
+ local_irq_restore(flags);
+ return 0;
}
-static int kvm_cpu_online(unsigned int cpu)
+#endif
+
+static int kvm_suspend(void)
{
- local_irq_disable();
- kvm_guest_cpu_init();
- local_irq_enable();
+ kvm_guest_cpu_offline(false);
+
return 0;
}
-static int kvm_cpu_down_prepare(unsigned int cpu)
+static void kvm_resume(void)
{
- local_irq_disable();
- kvm_guest_cpu_offline();
- local_irq_enable();
- return 0;
+ kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+ .suspend = kvm_suspend,
+ .resume = kvm_resume,
+};
+
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+ kvm_guest_cpu_offline(true);
+}
+
+static int kvm_pv_reboot_notify(struct notifier_block *nb,
+ unsigned long code, void *unused)
+{
+ if (code == SYS_RESTART)
+ on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
+ return NOTIFY_DONE;
}
+static struct notifier_block kvm_pv_reboot_nb = {
+ .notifier_call = kvm_pv_reboot_notify,
+};
+
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+ kvm_guest_cpu_offline(true);
+ native_machine_crash_shutdown(regs);
+}
#endif
static void __init kvm_guest_init(void)
@@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
kvm_guest_cpu_init();
#endif
+#ifdef CONFIG_KEXEC_CORE
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
+ register_syscore_ops(&kvm_syscore_ops);
+
/*
* Hard lockup detection is enabled by default. Disable it, as guests
* can get false positives too easily, for example if the host is
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d37ed4e1d033..ad273e5861c1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -20,7 +20,6 @@
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
-#include <asm/reboot.h>
#include <asm/kvmclock.h>
static int kvmclock __initdata = 1;
@@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
}
#endif
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
- native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_crash_shutdown(regs);
-}
-#endif
-
-static void kvm_shutdown(void)
+void kvmclock_disable(void)
{
native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_shutdown();
}
static void __init kvmclock_init_mem(void)
@@ -351,10 +331,6 @@ void __init kvmclock_init(void)
#endif
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
- machine_ops.shutdown = kvm_shutdown;
-#ifdef CONFIG_KEXEC_CORE
- machine_ops.crash_shutdown = kvm_crash_shutdown;
-#endif
kvm_get_preset_lpj();
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 19606a341888..9a48f138832d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC)
+ F(SGX_LC) | F(BUS_LOCK_DETECT)
);
/* Set LA57 based on hardware capability. */
if (cpuid_ecx(7) & F(LA57))
@@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
F(PMM) | F(PMM_EN)
);
+
+ /*
+ * Hide RDTSCP and RDPID if either feature is reported as supported but
+ * probing MSR_TSC_AUX failed. This is purely a sanity check and
+ * should never happen, but the guest will likely crash if RDTSCP or
+ * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
+ * the past. For example, the sanity check may fire if this instance of
+ * KVM is running as L1 on top of an older, broken KVM.
+ */
+ if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
+ kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
+ !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
+ kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+ kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+ }
}
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
@@ -637,7 +652,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
entry->eax = 0;
- entry->ecx = F(RDPID);
+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+ entry->ecx = F(RDPID);
++array->nent;
default:
break;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 77e1c89a95a7..8a0ccdb56076 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
* from the register case of group9.
*/
static const struct gprefix pfx_0f_c7_7 = {
- N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
};
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0d359115429a..f016838faedd 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -468,6 +468,7 @@ enum x86_intercept {
x86_intercept_clgi,
x86_intercept_skinit,
x86_intercept_rdtscp,
+ x86_intercept_rdpid,
x86_intercept_icebp,
x86_intercept_wbinvd,
x86_intercept_monitor,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 152591f9243a..c0ebef560bd1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
WARN_ON(rcuwait_active(&vcpu->wait));
- cancel_hv_timer(apic);
apic_timer_expired(apic, false);
+ cancel_hv_timer(apic);
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
advance_periodic_target_expiration(apic);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4b3ee244ebe0..0144c40d09c7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
- if (WARN_ON_ONCE(!mmu->lm_root)) {
+ if (WARN_ON_ONCE(!mmu->pml4_root)) {
r = -EIO;
goto out_unlock;
}
- mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
+ mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
}
for (i = 0; i < 4; ++i) {
@@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
}
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
- mmu->root_hpa = __pa(mmu->lm_root);
+ mmu->root_hpa = __pa(mmu->pml4_root);
else
mmu->root_hpa = __pa(mmu->pae_root);
@@ -3350,7 +3350,7 @@ out_unlock:
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
- u64 *lm_root, *pae_root;
+ u64 *pml4_root, *pae_root;
/*
* When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
return -EIO;
- if (mmu->pae_root && mmu->lm_root)
+ if (mmu->pae_root && mmu->pml4_root)
return 0;
/*
* The special roots should always be allocated in concert. Yell and
* bail if KVM ends up in a state where only one of the roots is valid.
*/
- if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
+ if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
return -EIO;
/*
@@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
if (!pae_root)
return -ENOMEM;
- lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
- if (!lm_root) {
+ pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+ if (!pml4_root) {
free_page((unsigned long)pae_root);
return -ENOMEM;
}
mmu->pae_root = pae_root;
- mmu->lm_root = lm_root;
+ mmu->pml4_root = pml4_root;
return 0;
}
@@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
if (!tdp_enabled && mmu->pae_root)
set_memory_encrypted((unsigned long)mmu->pae_root, 1);
free_page((unsigned long)mmu->pae_root);
- free_page((unsigned long)mmu->lm_root);
+ free_page((unsigned long)mmu->pml4_root);
}
static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 88f69a6cc492..95eeb5ac6a8a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
}
/**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
* @kvm: kvm instance
* @as_id: the address space of the paging structure the SPTE was a part of
* @gfn: the base GFN that was mapped by the SPTE
@@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+ if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+ if (is_large_pte(old_spte))
+ atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+ else
+ atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+ }
+
/*
* The only times a SPTE should be changed from a non-present to
* non-present state is when an MMIO entry is installed/modified/
@@ -1009,6 +1016,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
}
if (!is_shadow_present_pte(iter.old_spte)) {
+ /*
+ * If SPTE has been forzen by another thread, just
+ * give up and retry, avoiding unnecessary page table
+ * allocation and free.
+ */
+ if (is_removed_spte(iter.old_spte))
+ break;
+
sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
child_pt = sp->spt;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 540d43ba2cf4..5e8d8443154e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/