summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-15 10:18:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-15 10:18:16 -0700
commite37a07e0c29cd2cef4633b1e6db5579cc99ba4cd (patch)
treed968ca38ebb196c1cf55aa83554623b326592cf1
parenta80099a152d0719e2d8d750e07f4ffa991553d30 (diff)
parentd3457c877b14aaee8c52923eedf05a3b78af0476 (diff)
downloadlinux-e37a07e0c29cd2cef4633b1e6db5579cc99ba4cd.tar.gz
linux-e37a07e0c29cd2cef4633b1e6db5579cc99ba4cd.tar.bz2
linux-e37a07e0c29cd2cef4633b1e6db5579cc99ba4cd.zip
Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Radim Krčmář: "Second batch of KVM updates for v4.13 Common: - add uevents for VM creation/destruction - annotate and properly access RCU-protected objects s390: - rename IOCTL added in the first v4.13 merge x86: - emulate VMLOAD VMSAVE feature in SVM - support paravirtual asynchronous page fault while nested - add Hyper-V userspace interfaces for better migration - improve master clock corner cases - extend internal error reporting after EPT misconfig - correct single-stepping of emulated instructions in SVM - handle MCE during VM entry - fix nVMX VM entry checks and nVMX VMCS shadowing" * tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits) kvm: x86: hyperv: make VP_INDEX managed by userspace KVM: async_pf: Let guest support delivery of async_pf from guest mode KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf KVM: async_pf: Add L1 guest async_pf #PF vmexit handler KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2 KVM: x86: make backwards_tsc_observed a per-VM variable KVM: trigger uevents when creating or destroying a VM KVM: SVM: Enable Virtual VMLOAD VMSAVE feature KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition KVM: SVM: Rename lbr_ctl field in the vmcb control area KVM: SVM: Prepare for new bit definition in lbr_ctl KVM: SVM: handle singlestep exception when skipping emulated instructions KVM: x86: take slots_lock in kvm_free_pit KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition kvm: vmx: Properly handle machine check during VM-entry KVM: x86: update master clock before computing kvmclock_offset kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls kvm: nVMX: Validate the I/O bitmaps on nested VM-entry ...
-rw-r--r--Documentation/virtual/kvm/api.txt18
-rw-r--r--Documentation/virtual/kvm/msr.txt5
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h11
-rw-r--r--arch/x86/include/asm/svm.h5
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/kvm.c7
-rw-r--r--arch/x86/kvm/hyperv.c67
-rw-r--r--arch/x86/kvm/hyperv.h3
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/mmu.c35
-rw-r--r--arch/x86/kvm/mmu.h3
-rw-r--r--arch/x86/kvm/svm.c151
-rw-r--r--arch/x86/kvm/vmx.c162
-rw-r--r--arch/x86/kvm/x86.c43
-rw-r--r--include/linux/kvm_host.h17
-rw-r--r--include/uapi/linux/kvm.h4
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/irqchip.c2
-rw-r--r--virt/kvm/kvm_main.c131
21 files changed, 470 insertions, 207 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3a9831b72945..e63a35fafef0 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is
available.
+
+8.11 KVM_CAP_HYPERV_SYNIC2
+
+Architectures: x86
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
+
+8.12 KVM_CAP_HYPERV_VP_INDEX
+
+Architectures: x86
+
+This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
+value is used to denote the target vcpu for a SynIC interrupt. For
+compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this
+capability is absent, userspace can still query this msr's value.
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 0a9ea515512a..1ebecc115dc6 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
data: Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be
- zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+ zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when
disabled. Bit 1 is 1 if asynchronous page faults can be injected
- when vcpu is in cpl == 0.
+ when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+ are delivered to L1 as #PF vmexits.
First 4 byte of 64 byte memory location will be written to by
the hypervisor at the time of asynchronous page fault (APF)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2701e5f8145b..ca3c48c0872f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -286,6 +286,7 @@
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 722d0e568863..fde36f189836 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@ struct x86_exception {
u16 error_code;
bool nested_page_fault;
u64 address; /* cr2 or nested page fault gpa */
+ u8 async_page_fault;
};
/*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1588e9e3dc01..87ac4fba6d8e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
DECLARE_BITMAP(auto_eoi_bitmap, 256);
DECLARE_BITMAP(vec_bitmap, 256);
bool active;
+ bool dont_zero_synic_pages;
};
/* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv {
+ u32 vp_index;
u64 hv_vapic;
s64 runtime_offset;
struct kvm_vcpu_hv_synic synic;
@@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
bool reinject;
u8 nr;
u32 error_code;
+ u8 nested_apf;
} exception;
struct kvm_queued_interrupt {
@@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
u64 msr_val;
u32 id;
bool send_user_only;
+ u32 host_apf_reason;
+ unsigned long nested_apf_token;
+ bool delivery_as_pf_vmexit;
} apf;
/* OSVW MSRs (AMD only) */
@@ -803,6 +809,7 @@ struct kvm_arch {
int audit_point;
#endif
+ bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock;
u32 bsp_vcpu_id;
@@ -952,9 +959,7 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu);
- void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
- bool has_error_code, u32 error_code,
- bool reinject);
+ void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 14824fc78f7e..58fffe79e417 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
- u64 lbr_ctl;
+ u64 virt_ext;
u32 clean;
u32 reserved_5;
u64 next_rip;
@@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
#define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index cff0bb6556f8..a965e5b0d328 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 43e10d6fdbed..71c17a5be983 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
#ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif
- wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
+ pa |= KVM_ASYNC_PF_ENABLED;
+
+ /* Async page fault support for L1 hypervisor is optional */
+ if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
+ (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
+ wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ebae57ac5902..2695a34fa1c5 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
return 0;
}
-static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
+static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ if (vpidx < KVM_MAX_VCPUS)
+ vcpu = kvm_get_vcpu(kvm, vpidx);
+ if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+ return vcpu;
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+ return vcpu;
+ return NULL;
+}
+
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
{
struct kvm_vcpu *vcpu;
struct kvm_vcpu_hv_synic *synic;
- if (vcpu_id >= atomic_read(&kvm->online_vcpus))
- return NULL;
- vcpu = kvm_get_vcpu(kvm, vcpu_id);
+ vcpu = get_vcpu_by_vpidx(kvm, vpidx);
if (!vcpu)
return NULL;
synic = vcpu_to_synic(vcpu);
@@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic->version = data;
break;
case HV_X64_MSR_SIEFP:
- if (data & HV_SYNIC_SIEFP_ENABLE)
+ if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
+ !synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1;
@@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic_exit(synic, msr);
break;
case HV_X64_MSR_SIMP:
- if (data & HV_SYNIC_SIMP_ENABLE)
+ if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
+ !synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1;
@@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
return ret;
}
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
{
struct kvm_vcpu_hv_synic *synic;
- synic = synic_get(kvm, vcpu_id);
+ synic = synic_get(kvm, vpidx);
if (!synic)
return -EINVAL;
@@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
kvm_hv_notify_acked_sint(vcpu, i);
}
-static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
{
struct kvm_vcpu_hv_synic *synic;
- synic = synic_get(kvm, vcpu_id);
+ synic = synic_get(kvm, vpidx);
if (!synic)
return -EINVAL;
@@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
stimer_init(&hv_vcpu->stimer[i], i);
}
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+
+ hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+}
+
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
{
+ struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+
/*
* Hyper-V SynIC auto EOI SINT's are
* not compatible with APICV, so deactivate APICV
*/
kvm_vcpu_deactivate_apicv(vcpu);
- vcpu_to_synic(vcpu)->active = true;
+ synic->active = true;
+ synic->dont_zero_synic_pages = dont_zero_synic_pages;
return 0;
}
@@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) {
+ case HV_X64_MSR_VP_INDEX:
+ if (!host)
+ return 1;
+ hv->vp_index = (u32)data;
+ break;
case HV_X64_MSR_APIC_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
@@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) {
- case HV_X64_MSR_VP_INDEX: {
- int r;
- struct kvm_vcpu *v;
-
- kvm_for_each_vcpu(r, v, vcpu->kvm) {
- if (v == vcpu) {
- data = r;
- break;
- }
- }
+ case HV_X64_MSR_VP_INDEX:
+ data = hv->vp_index;
break;
- }
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
case HV_X64_MSR_ICR:
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index cd1119538add..e637631a9574 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
void kvm_hv_irq_routing_update(struct kvm *kvm);
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a78b445ce411..af192895b1fc 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
struct kvm_pit *pit = kvm->arch.vpit;
if (pit) {
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
+ mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false);
hrtimer_cancel(&pit->pit_state.timer);
kthread_destroy_worker(pit->worker);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aafd399cf8c6..9b1dd114956a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -46,6 +46,7 @@
#include <asm/io.h>
#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
+#include "trace.h"
/*
* When setting this variable to true it enables Two-Dimensional-Paging
@@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
kvm_event_needs_reinjection(vcpu)))
return false;
- if (is_guest_mode(vcpu))
+ if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false;
return kvm_x86_ops->interrupt_allowed(vcpu);
@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false;
}
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+ u64 fault_address, char *insn, int insn_len,
+ bool need_unprotect)
+{
+ int r = 1;
+
+ switch (vcpu->arch.apf.host_apf_reason) {
+ default:
+ trace_kvm_page_fault(fault_address, error_code);
+
+ if (need_unprotect && kvm_event_needs_reinjection(vcpu))
+ kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+ r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
+ insn_len);
+ break;
+ case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ vcpu->arch.apf.host_apf_reason = 0;
+ local_irq_disable();
+ kvm_async_pf_task_wait(fault_address);
+ local_irq_enable();
+ break;
+ case KVM_PV_REASON_PAGE_READY:
+ vcpu->arch.apf.host_apf_reason = 0;
+ local_irq_disable();
+ kvm_async_pf_task_wake(fault_address);
+ local_irq_enable();
+ break;
+ }
+ return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
static bool
check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
{
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a276834950c1..d7d248a000dd 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+ u64 fault_address, char *insn, int insn_len,
+ bool need_unprotect);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 905ea6052517..4d8141e533c3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -194,7 +194,6 @@ struct vcpu_svm {
unsigned int3_injected;
unsigned long int3_rip;
- u32 apf_reason;
/* cached guest cpuid flags for faster access */
bool nrips_enabled : 1;
@@ -277,6 +276,10 @@ static int avic;
module_param(avic, int, S_IRUGO);
#endif
+/* enable/disable Virtual VMLOAD VMSAVE */
+static int vls = true;
+module_param(vls, int, 0444);
+
/* AVIC VM ID bit masks and lock */
static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
static DEFINE_SPINLOCK(avic_vm_id_lock);
@@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
svm_set_interrupt_shadow(vcpu, 0);
}
-static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
- bool has_error_code, u32 error_code,
- bool reinject)
+static void svm_queue_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned nr = vcpu->arch.exception.nr;
+ bool has_error_code = vcpu->arch.exception.has_error_code;
+ bool reinject = vcpu->arch.exception.reinject;
+ u32 error_code = vcpu->arch.exception.error_code;
/*
* If we are within a nested VM we'd better #VMEXIT and let the guest
@@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
{
u32 *msrpm = svm->msrpm;
- svm->vmcb->control.lbr_ctl = 1;
+ svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
@@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
{
u32 *msrpm = svm->msrpm;
- svm->vmcb->control.lbr_ctl = 0;
+ svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
@@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void)
}
}
+ if (vls) {
+ if (!npt_enabled ||
+ !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
+ !IS_ENABLED(CONFIG_X86_64)) {
+ vls = false;
+ } else {
+ pr_info("Virtual VMLOAD VMSAVE supported\n");
+ }
+ }
+
return 0;
err:
@@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm)
if (avic)
avic_init_vmcb(svm);
+ /*
+ * If hardware supports Virtual VMLOAD VMSAVE then enable it
+ * in VMCB and clear intercepts to avoid #VMEXIT.
+ */
+ if (vls) {
+ clr_intercept(svm, INTERCEPT_VMLOAD);
+ clr_intercept(svm, INTERCEPT_VMSAVE);
+ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ }
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -2096,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm)
{
u64 fault_address = svm->vmcb->control.exit_info_2;
- u64 error_code;
- int r = 1;
+ u64 error_code = svm->vmcb->control.exit_info_1;
- switch (svm->apf_reason) {
- default:
- error_code = svm->vmcb->control.exit_info_1;
-
- trace_kvm_page_fault(fault_address, error_code);
- if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
- kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
- r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+ return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
svm->vmcb->control.insn_bytes,
- svm->vmcb->control.insn_len);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
- svm->apf_reason = 0;
- local_irq_disable();
- kvm_async_pf_task_wait(fault_address);
- local_irq_enable();
- break;
- case KVM_PV_REASON_PAGE_READY:
- svm->apf_reason = 0;
- local_irq_disable();
- kvm_async_pf_task_wake(fault_address);
- local_irq_enable();
- break;
- }
- return r;
+ svm->vmcb->control.insn_len, !npt_enabled);
}
static int db_interception(struct vcpu_svm *svm)
@@ -2267,7 +2269,7 @@ static int io_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
- int size, in, string;
+ int size, in, string, ret;
unsigned port;
++svm->vcpu.stat.io_exits;
@@ -2279,10 +2281,16 @@ static int io_interception(struct vcpu_svm *svm)
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
svm->next_rip = svm->vmcb->control.exit_info_2;
- skip_emulated_instruction(&svm->vcpu);
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
- return in ? kvm_fast_pio_in(vcpu, size, port)
- : kvm_fast_pio_out(vcpu, size, port);
+ /*
+ * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ if (in)
+ return kvm_fast_pio_in(vcpu, size, port) && ret;
+ else
+ return kvm_fast_pio_out(vcpu, size, port) && ret;
}
static int nmi_interception(struct vcpu_svm *svm)
@@ -2415,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
if (!is_guest_mode(&svm->vcpu))
return 0;
+ vmexit = nested_svm_intercept(svm);
+ if (vmexit != NESTED_EXIT_DONE)
+ return 0;
+
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = error_code;
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
- vmexit = nested_svm_intercept(svm);
- if (vmexit == NESTED_EXIT_DONE)
- svm->nested.exit_required = true;
+ if (svm->vcpu.arch.exception.nested_apf)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+ else
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ svm->nested.exit_required = true;
return vmexit;
}
@@ -2598,7 +2610,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
break;
case SVM_EXIT_EXCP_BASE + PF_VECTOR:
/* When we're shadowing, trap PFs, but not async PF */
- if (!npt_enabled && svm->apf_reason == 0)
+ if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
return NESTED_EXIT_HOST;
break;
default:
@@ -2645,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
}
/* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
- svm->apf_reason != 0)
+ svm->vcpu.arch.exception.nested_apf != 0)
vmexit = NESTED_EXIT_DONE;
break;
}
@@ -2702,7 +2714,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
dst->event_inj = from->event_inj;
dst->event_inj_err = from->event_inj_err;
dst->nested_cr3 = from->nested_cr3;
- dst->lbr_ctl = from->lbr_ctl;
+ dst->virt_ext = from->virt_ext;
}
static int nested_svm_vmexit(struct vcpu_svm *svm)
@@ -3008,7 +3020,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
/* We don't want to see VMMCALLs from a nested guest */
clr_intercept(svm, INTERCEPT_VMMCALL);
- svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
+ svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
@@ -3055,6 +3067,7 @@ static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
struct page *page;
+ int ret;
if (nested_svm_check_permissions(svm))
return 1;
@@ -3064,18 +3077,19 @@ static int vmload_interception(struct vcpu_svm *svm)
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
nested_svm_unmap(page);
- return 1;
+ return ret;
}
static int vmsave_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
struct page *page;
+ int ret;
if (nested_svm_check_permissions(svm))
return 1;
@@ -3085,12 +3099,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
nested_svm_unmap(page);
- return 1;
+ return ret;
}
static int vmrun_interception(struct vcpu_svm *svm)
@@ -3123,25 +3137,29 @@ failed:
static int stgi_interception(struct vcpu_svm *svm)
{
+ int ret;
+
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
enable_gif(svm);
- return 1;
+ return ret;
}
static int clgi_interception(struct vcpu_svm *svm)
{
+ int ret;
+
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
disable_gif(svm);
@@ -3152,7 +3170,7 @@ static int clgi_interception(struct vcpu_svm *svm)
mark_dirty(svm->vmcb, VMCB_INTR);
}
- return 1;
+ return ret;
}
static int invlpga_interception(struct vcpu_svm *svm)
@@ -3166,8 +3184,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
static int skinit_interception(struct vcpu_svm *svm)
@@ -3190,7 +3207,7 @@ static int xsetbv_interception(struct vcpu_svm *svm)
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
return 1;
@@ -3286,8 +3303,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
- skip_emulated_instruction(&svm->vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
static int emulate_on_interception(struct vcpu_svm *svm)
@@ -3437,9 +3453,7 @@ static int dr_interception(struct vcpu_svm *svm)
kvm_register_write(&svm->vcpu, reg, val);
}
- skip_emulated_instruction(&svm->vcpu);
-
- return 1;
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
static int cr8_write_interception(struct vcpu_svm *svm)
@@ -3562,6 +3576,7 @@ static int rdmsr_interception(struct vcpu_svm *svm)
if (svm_get_msr(&svm->vcpu, &msr_info)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
} else {
trace_kvm_msr_read(ecx, msr_info.data);
@@ -3570,9 +3585,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- skip_emulated_instruction(&svm->vcpu);
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
- return 1;
}
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -3698,11 +3712,11 @@ static int wrmsr_interception(struct vcpu_svm *svm)
if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
} else {
trace_kvm_msr_write(ecx, data);
- skip_emulated_instruction(&svm->vcpu);
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
- return 1;
}
static int msr_interception(struct vcpu_svm *svm)
@@ -3731,8 +3745,7 @@ static int pause_interception(struct vcpu_svm *svm)
static int nop_interception(struct vcpu_svm *svm)
{
- skip_emulated_instruction(&(svm->vcpu));
- return 1;
+ return kvm_skip_emulated_instruction(&(svm->vcpu));
}
static int monitor_interception(struct vcpu_svm *svm)
@@ -4117,7 +4130,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);