From 653ea4489e6989f14a87abf8653f77c089097326 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jul 2024 16:59:22 -0700 Subject: KVM: nVMX: Honor userspace MSR filter lists for nested VM-Enter/VM-Exit Synthesize a consistency check VM-Exit (VM-Enter) or VM-Abort (VM-Exit) if L1 attempts to load/store an MSR via the VMCS MSR lists that userspace has disallowed access to via an MSR filter. Intel already disallows including a handful of "special" MSRs in the VMCS lists, so denying access isn't completely without precedent. More importantly, the behavior is well-defined _and_ can be communicated the end user, e.g. to the customer that owns a VM running as L1 on top of KVM. On the other hand, ignoring userspace MSR filters is all but guaranteed to result in unexpected behavior as the access will hit KVM's internal state, which is likely not up-to-date. Unlike KVM-internal accesses, instruction emulation, and dedicated VMCS fields, the MSRs in the VMCS load/store lists are 100% guest controlled, thus making it all but impossible to reason about the correctness of ignoring the MSR filter. And if userspace *really* wants to deny access to MSRs via the aforementioned scenarios, userspace can hide the associated feature from the guest, e.g. by disabling the PMU to prevent accessing PERF_GLOBAL_CTRL via its VMCS field. But for the MSR lists, KVM is blindly processing MSRs; the MSR filters are the _only_ way for userspace to deny access. This partially reverts commit ac8d6cad3c7b ("KVM: x86: Only do MSR filtering when access MSR by rdmsr/wrmsr"). Cc: Hou Wenlong Cc: Jim Mattson Link: https://lore.kernel.org/r/20240722235922.3351122-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx/nested.c | 12 ++++++------ arch/x86/kvm/x86.c | 6 ++++-- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..4a93ac1b9be9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2060,6 +2060,8 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); +int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2392a7ef254d..674f7089cc44 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -981,7 +981,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } - if (kvm_set_msr(vcpu, e.index, e.value)) { + if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", __func__, i, e.index, e.value); @@ -1017,7 +1017,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, } } - if (kvm_get_msr(vcpu, msr_index, data)) { + if (kvm_get_msr_with_filter(vcpu, msr_index, data)) { pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, msr_index); return false; @@ -1112,9 +1112,9 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, /* * Emulated VMEntry does not fail here. Instead a less * accurate value will be returned by - * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() - * instead of reading the value from the vmcs02 VMExit - * MSR-store area. + * nested_vmx_get_vmexit_msr_value() by reading KVM's + * internal MSR state instead of reading the value from + * the vmcs02 VMExit MSR-store area. */ pr_warn_ratelimited( "Not enough msr entries in msr_autostore. Can't add msr %x\n", @@ -4806,7 +4806,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) goto vmabort; } - if (kvm_set_msr(vcpu, h.index, h.value)) { + if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) { pr_debug_ratelimited( "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", __func__, j, h.index, h.value); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..34b52b49f5e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1940,19 +1940,21 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, return ret; } -static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; return kvm_get_msr_ignored_check(vcpu, index, data, false); } +EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); -static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) return KVM_MSR_RET_FILTERED; return kvm_set_msr_ignored_check(vcpu, index, data, false); } +EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) { -- cgit v1.2.3