summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-09 09:39:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-09 09:39:55 -0700
commitef688f8b8cd3eb20547a6543f03e3d8952b87769 (patch)
treed44f43e19e69bc7828a16a259dd7f42afca02e99
parent0e470763d84dcad27284067647dfb4b1a94dfce0 (diff)
parentc59fb127583869350256656b7ed848c398bef879 (diff)
downloadlinux-ef688f8b8cd3eb20547a6543f03e3d8952b87769.tar.gz
linux-ef688f8b8cd3eb20547a6543f03e3d8952b87769.tar.bz2
linux-ef688f8b8cd3eb20547a6543f03e3d8952b87769.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "The first batch of KVM patches, mostly covering x86. ARM: - Account stage2 page table allocations in memory stats x86: - Account EPT/NPT arm64 page table allocations in memory stats - Tracepoint cleanups/fixes for nested VM-Enter and emulated MSR accesses - Drop eVMCS controls filtering for KVM on Hyper-V, all known versions of Hyper-V now support eVMCS fields associated with features that are enumerated to the guest - Use KVM's sanitized VMCS config as the basis for the values of nested VMX capabilities MSRs - A myriad event/exception fixes and cleanups. Most notably, pending exceptions morph into VM-Exits earlier, as soon as the exception is queued, instead of waiting until the next vmentry. This fixed a longstanding issue where the exceptions would incorrecly become double-faults instead of triggering a vmexit; the common case of page-fault vmexits had a special workaround, but now it's fixed for good - A handful of fixes for memory leaks in error paths - Cleanups for VMREAD trampoline and VMX's VM-Exit assembly flow - Never write to memory from non-sleepable kvm_vcpu_check_block() - Selftests refinements and cleanups - Misc typo cleanups Generic: - remove KVM_REQ_UNHALT" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (94 commits) KVM: remove KVM_REQ_UNHALT KVM: mips, x86: do not rely on KVM_REQ_UNHALT KVM: x86: never write to memory from kvm_vcpu_check_block() KVM: x86: Don't snapshot pending INIT/SIPI prior to checking nested events KVM: nVMX: Make event request on VMXOFF iff INIT/SIPI is pending KVM: nVMX: Make an event request if INIT or SIPI is pending on VM-Enter KVM: SVM: Make an event request if INIT or SIPI is pending when GIF is set KVM: x86: lapic does not have to process INIT if it is blocked KVM: x86: Rename kvm_apic_has_events() to make it INIT/SIPI specific KVM: x86: Rename and expose helper to detect if INIT/SIPI are allowed KVM: nVMX: Make an event request when pending an MTF nested VM-Exit KVM: x86: make vendor code check for all nested events mailmap: Update Oliver's email address KVM: x86: Allow force_emulation_prefix to be written without a reload KVM: selftests: Add an x86-only test to verify nested exception queueing KVM: selftests: Use uapi header to get VMX and SVM exit reasons/codes KVM: x86: Rename inject_pending_events() to kvm_check_and_inject_events() KVM: VMX: Update MTF and ICEBP comments to document KVM's subtle behavior KVM: x86: Treat pending TRIPLE_FAULT requests as pending exceptions KVM: x86: Morph pending exceptions to pending VM-Exits at queue time ...
-rw-r--r--.mailmap1
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst5
-rw-r--r--Documentation/filesystems/proc.rst4
-rw-r--r--Documentation/virt/kvm/api.rst113
-rw-r--r--Documentation/virt/kvm/vcpu-requests.rst28
-rw-r--r--arch/arm64/kvm/arm.c1
-rw-r--r--arch/arm64/kvm/mmu.c36
-rw-r--r--arch/mips/kvm/emulate.c6
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c1
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c1
-rw-r--r--arch/riscv/kvm/vcpu_insn.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h22
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h39
-rw-r--r--arch/x86/kvm/cpuid.c18
-rw-r--r--arch/x86/kvm/emulate.c31
-rw-r--r--arch/x86/kvm/hyperv.c70
-rw-r--r--arch/x86/kvm/hyperv.h6
-rw-r--r--arch/x86/kvm/lapic.c38
-rw-r--r--arch/x86/kvm/lapic.h9
-rw-r--r--arch/x86/kvm/mmu/mmu.c22
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c12
-rw-r--r--arch/x86/kvm/svm/nested.c124
-rw-r--r--arch/x86/kvm/svm/svm.c35
-rw-r--r--arch/x86/kvm/trace.h53
-rw-r--r--arch/x86/kvm/vmx/capabilities.h14
-rw-r--r--arch/x86/kvm/vmx/evmcs.c192
-rw-r--r--arch/x86/kvm/vmx/evmcs.h10
-rw-r--r--arch/x86/kvm/vmx/nested.c468
-rw-r--r--arch/x86/kvm/vmx/nested.h2
-rw-r--r--arch/x86/kvm/vmx/sgx.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S24
-rw-r--r--arch/x86/kvm/vmx/vmx.c321
-rw-r--r--arch/x86/kvm/vmx/vmx.h172
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h2
-rw-r--r--arch/x86/kvm/x86.c576
-rw-r--r--arch/x86/kvm/x86.h16
-rw-r--r--arch/x86/kvm/xen.c1
-rw-r--r--drivers/base/node.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--include/linux/kvm_host.h16
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/vmstat.c1
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h45
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h53
-rw-r--r--tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c295
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c24
-rw-r--r--virt/kvm/kvm_main.c9
57 files changed, 1871 insertions, 1077 deletions
diff --git a/.mailmap b/.mailmap
index 191778125ef1..3e63fb0b1088 100644
--- a/.mailmap
+++ b/.mailmap
@@ -336,6 +336,7 @@ Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
+Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>
Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Patrick Mochel <mochel@digitalimplant.org>
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index be4a77baf784..7ce8130a8924 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1355,6 +1355,11 @@ PAGE_SIZE multiple when read back.
pagetables
Amount of memory allocated for page tables.
+ sec_pagetables
+ Amount of memory allocated for secondary page tables,
+ this currently includes KVM mmu allocations on x86
+ and arm64.
+
percpu (npn)
Amount of memory used for storing per-cpu kernel
data structures.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index e7aafc82be99..898c99eae8e4 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -982,6 +982,7 @@ Example output. You may not have all of these fields.
SUnreclaim: 142336 kB
KernelStack: 11168 kB
PageTables: 20540 kB
+ SecPageTables: 0 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
@@ -1090,6 +1091,9 @@ KernelStack
Memory consumed by the kernel stacks of all tasks
PageTables
Memory consumed by userspace page tables
+SecPageTables
+ Memory consumed by secondary page tables, this currently
+ currently includes KVM mmu allocations on x86 and arm64.
NFS_Unstable
Always zero. Previous counted pages which had been written to
the server, but has not been committed to stable storage.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index abd7c32126ce..236a797be71c 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4074,7 +4074,7 @@ Queues an SMI on the thread's vcpu.
4.97 KVM_X86_SET_MSR_FILTER
----------------------------
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
:Architectures: x86
:Type: vm ioctl
:Parameters: struct kvm_msr_filter
@@ -4173,8 +4173,10 @@ If an MSR access is not permitted through the filtering, it generates a
allows user space to deflect and potentially handle various MSR accesses
into user space.
-If a vCPU is in running state while this ioctl is invoked, the vCPU may
-experience inconsistent filtering behavior on MSR accesses.
+Note, invoking this ioctl while a vCPU is running is inherently racy. However,
+KVM does guarantee that vCPUs will see either the previous filter or the new
+filter, e.g. MSRs with identical settings in both the old and new filter will
+have deterministic behavior.
4.98 KVM_CREATE_SPAPR_TCE_64
----------------------------
@@ -5287,110 +5289,7 @@ KVM_PV_DUMP
authentication tag all of which are needed to decrypt the dump at a
later time.
-
-4.126 KVM_X86_SET_MSR_FILTER
-----------------------------
-
-:Capability: KVM_CAP_X86_MSR_FILTER
-:Architectures: x86
-:Type: vm ioctl
-:Parameters: struct kvm_msr_filter
-:Returns: 0 on success, < 0 on error
-
-::
-
- struct kvm_msr_filter_range {
- #define KVM_MSR_FILTER_READ (1 << 0)
- #define KVM_MSR_FILTER_WRITE (1 << 1)
- __u32 flags;
- __u32 nmsrs; /* number of msrs in bitmap */
- __u32 base; /* MSR index the bitmap starts at */
- __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
- };
-
- #define KVM_MSR_FILTER_MAX_RANGES 16
- struct kvm_msr_filter {
- #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
- #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
- __u32 flags;
- struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
- };
-
-flags values for ``struct kvm_msr_filter_range``:
-
-``KVM_MSR_FILTER_READ``
-
- Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
- indicates that a read should immediately fail, while a 1 indicates that
- a read for a particular MSR should be handled regardless of the default
- filter action.
-
-``KVM_MSR_FILTER_WRITE``
-
- Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
- indicates that a write should immediately fail, while a 1 indicates that
- a write for a particular MSR should be handled regardless of the default
- filter action.
-
-``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
-
- Filter both read and write accesses to MSRs using the given bitmap. A 0
- in the bitmap indicates that both reads and writes should immediately fail,
- while a 1 indicates that reads and writes for a particular MSR are not
- filtered by this range.
-
-flags values for ``struct kvm_msr_filter``:
-
-``KVM_MSR_FILTER_DEFAULT_ALLOW``
-
- If no filter range matches an MSR index that is getting accessed, KVM will
- fall back to allowing access to the MSR.
-
-``KVM_MSR_FILTER_DEFAULT_DENY``
-
- If no filter range matches an MSR index that is getting accessed, KVM will
- fall back to rejecting access to the MSR. In this mode, all MSRs that should
- be processed by KVM need to explicitly be marked as allowed in the bitmaps.
-
-This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
-specify whether a certain MSR access should be explicitly filtered for or not.
-
-If this ioctl has never been invoked, MSR accesses are not guarded and the
-default KVM in-kernel emulation behavior is fully preserved.
-
-Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
-filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
-an error.
-
-As soon as the filtering is in place, every MSR access is processed through
-the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
-x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
-and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
-register.
-
-If a bit is within one of the defined ranges, read and write accesses are
-guarded by the bitmap's value for the MSR index if the kind of access
-is included in the ``struct kvm_msr_filter_range`` flags. If no range
-cover this particular access, the behavior is determined by the flags
-field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
-and ``KVM_MSR_FILTER_DEFAULT_DENY``.
-
-Each bitmap range specifies a range of MSRs to potentially allow access on.
-The range goes from MSR index [base .. base+nmsrs]. The flags field
-indicates whether reads, writes or both reads and writes are filtered
-by setting a 1 bit in the bitmap for the corresponding MSR index.
-
-If an MSR access is not permitted through the filtering, it generates a
-#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
-allows user space to deflect and potentially handle various MSR accesses
-into user space.
-
-Note, invoking this ioctl with a vCPU is running is inherently racy. However,
-KVM does guarantee that vCPUs will see either the previous filter or the new
-filter, e.g. MSRs with identical settings in both the old and new filter will
-have deterministic behavior.
-
-4.127 KVM_XEN_HVM_SET_ATTR
+4.126 KVM_XEN_HVM_SET_ATTR
--------------------------
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst
index 31f62b64e07b..87f04c1fa53d 100644
--- a/Documentation/virt/kvm/vcpu-requests.rst
+++ b/Documentation/virt/kvm/vcpu-requests.rst
@@ -97,7 +97,7 @@ VCPU requests are simply bit indices of the ``vcpu->requests`` bitmap.
This means general bitops, like those documented in [atomic-ops]_ could
also be used, e.g. ::
- clear_bit(KVM_REQ_UNHALT & KVM_REQUEST_MASK, &vcpu->requests);
+ clear_bit(KVM_REQ_UNBLOCK & KVM_REQUEST_MASK, &vcpu->requests);
However, VCPU request users should refrain from doing so, as it would
break the abstraction. The first 8 bits are reserved for architecture
@@ -126,17 +126,6 @@ KVM_REQ_UNBLOCK
or in order to update the interrupt routing and ensure that assigned
devices will wake up the vCPU.
-KVM_REQ_UNHALT
-
- This request may be made from the KVM common function kvm_vcpu_block(),
- which is used to emulate an instruction that causes a CPU to halt until
- one of an architectural specific set of events and/or interrupts is
- received (determined by checking kvm_arch_vcpu_runnable()). When that
- event or interrupt arrives kvm_vcpu_block() makes the request. This is
- in contrast to when kvm_vcpu_block() returns due to any other reason,
- such as a pending signal, which does not indicate the VCPU's halt
- emulation should stop, and therefore does not make the request.
-
KVM_REQ_OUTSIDE_GUEST_MODE
This "request" ensures the target vCPU has exited guest mode prior to the
@@ -297,21 +286,6 @@ architecture dependent. kvm_vcpu_block() calls kvm_arch_vcpu_runnable()
to check if it should awaken. One reason to do so is to provide
architectures a function where requests may be checked if necessary.
-Clearing Requests
------------------
-
-Generally it only makes sense for the receiving VCPU thread to clear a
-request. However, in some circumstances, such as when the requesting
-thread and the receiving VCPU thread are executed serially, such as when
-they are the same thread, or when they are using some form of concurrency
-control to temporarily execute synchronously, then it's possible to know
-that the request may be cleared immediately, rather than waiting for the
-receiving VCPU thread to handle the request in VCPU RUN. The only current
-examples of this are kvm_vcpu_block() calls made by VCPUs to block
-themselves. A possible side-effect of that call is to make the
-KVM_REQ_UNHALT request, which may then be cleared immediately when the
-VCPU returns from the call.
-
References
==========
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 917086be5c6b..446f628a9de1 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -666,7 +666,6 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
vgic_v4_load(vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c9a13e487187..34c5feed9dc1 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -92,9 +92,13 @@ static bool kvm_is_device_pfn(unsigned long pfn)
static void *stage2_memcache_zalloc_page(void *arg)
{
struct kvm_mmu_memory_cache *mc = arg;
+ void *virt;
/* Allocated with __GFP_ZERO, so no need to zero */
- return kvm_mmu_memory_cache_alloc(mc);
+ virt = kvm_mmu_memory_cache_alloc(mc);
+ if (virt)
+ kvm_account_pgtable_pages(virt, 1);
+ return virt;
}
static void *kvm_host_zalloc_pages_exact(size_t size)
@@ -102,6 +106,21 @@ static void *kvm_host_zalloc_pages_exact(size_t size)
return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
}
+static void *kvm_s2_zalloc_pages_exact(size_t size)
+{
+ void *virt = kvm_host_zalloc_pages_exact(size);
+
+ if (virt)
+ kvm_account_pgtable_pages(virt, (size >> PAGE_SHIFT));
+ return virt;
+}
+
+static void kvm_s2_free_pages_exact(void *virt, size_t size)
+{
+ kvm_account_pgtable_pages(virt, -(size >> PAGE_SHIFT));
+ free_pages_exact(virt, size);
+}
+
static void kvm_host_get_page(void *addr)
{
get_page(virt_to_page(addr));
@@ -112,6 +131,15 @@ static void kvm_host_put_page(void *addr)
put_page(virt_to_page(addr));
}
+static void kvm_s2_put_page(void *addr)
+{
+ struct page *p = virt_to_page(addr);
+ /* Dropping last refcount, the page will be freed */
+ if (page_count(p) == 1)
+ kvm_account_pgtable_pages(addr, -1);
+ put_page(p);
+}
+
static int kvm_host_page_count(void *addr)
{
return page_count(virt_to_page(addr));
@@ -625,10 +653,10 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
- .zalloc_pages_exact = kvm_host_zalloc_pages_exact,
- .free_pages_exact = free_pages_exact,
+ .zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
+ .free_pages_exact = kvm_s2_free_pages_exact,
.get_page = kvm_host_get_page,
- .put_page = kvm_host_put_page,
+ .put_page = kvm_s2_put_page,
.page_count = kvm_host_page_count,
.phys_to_virt = kvm_host_va,
.virt_to_phys = kvm_host_pa,
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index b494d8d39290..edaec93a1a1f 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -955,13 +955,11 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
kvm_vcpu_halt(vcpu);
/*
- * We we are runnable, then definitely go off to user space to
+ * We are runnable, then definitely go off to user space to
* check if any I/O interrupts are pending.
*/
- if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ if (kvm_arch_vcpu_runnable(vcpu))
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
- }
}
return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d6abed6e51e6..9fc4dd8f66eb 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -499,7 +499,6 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.generic.halt_wakeup++;
/* Unset POW bit after we woke up */
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index a1f2978b2a86..b2c89e850d7a 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -393,7 +393,6 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.generic.halt_wakeup++;
return EMULATE_DONE;
case H_LOGICAL_CI_LOAD:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 06c5830a93f9..7b4920e9fd26 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -719,7 +719,6 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
hard_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index fb1490761c87..ec9c1e3c2ff4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -239,7 +239,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
case EV_HCALL_TOKEN(EV_IDLE):
r = EV_SUCCESS;
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
break;
default:
r = EV_UNIMPLEMENTED;
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
index 7eb90a47b571..0bb52761a3f7 100644
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -191,7 +191,6 @@ void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_halt(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b7ef0b71014d..45d4b8182b07 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4343,8 +4343,6 @@ retry:
goto retry;
}
- /* nothing to do, just clear the request */
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 0a9407dc0859..3089ec352743 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -138,6 +138,9 @@
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
+/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0)
+
/*
* This is specific to AMD and specifies that enlightened TLB flush is
* supported. If guest opts in to this feature, ASID invalidations only
@@ -546,7 +549,7 @@ struct hv_enlightened_vmcs {
u64 guest_rip;
u32 hv_clean_fields;
- u32 hv_padding_32;
+ u32 padding32_1;
u32 hv_synthetic_controls;
struct {
u32 nested_flush_hypercall:1;
@@ -554,14 +557,25 @@ struct hv_enlightened_vmcs {
u32 reserved:30;
} __packed hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
u64 guest_bndcfgs;
- u64 padding64_5[7];
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
u64 xss_exit_bitmap;
- u64 padding64_6[7];
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
} __packed;
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 51f777071584..82ba4a564e58 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -67,7 +67,7 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi)
-KVM_X86_OP(queue_exception)
+KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
KVM_X86_OP(nmi_allowed)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa381ab69a19..61b9dd34d333 100644
--- a/