From a807b78ad04b2eaa348f52f5cc7702385b6de1ee Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 1 Feb 2023 08:29:03 -0500 Subject: kvm: vmx: Add IA32_FLUSH_CMD guest support Expose IA32_FLUSH_CMD to the guest if the guest CPUID enumerates support for this MSR. As with IA32_PRED_CMD, permission for unintercepted writes to this MSR will be granted to the guest after the first non-zero write. Co-developed-by: Jim Mattson Signed-off-by: Jim Mattson Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20230201132905.549148-2-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kvm/vmx/nested.c') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1bc2b80273c9..f63b28f46a71 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -654,6 +654,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_PRED_CMD, MSR_TYPE_W); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_FLUSH_CMD, MSR_TYPE_W); + kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); vmx->nested.force_msr_bitmap_recalc = false; -- cgit v1.2.3 From 2c86c444e2751a867bfa6a059a80ba67ef9d441a Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 17 Feb 2023 07:53:17 +0800 Subject: KVM: x86/mmu: Use kvm_mmu_invalidate_addr() in nested_ept_invalidate_addr() Use kvm_mmu_invalidate_addr() instead open calls to mmu->invlpg(). No functional change intended. Signed-off-by: Lai Jiangshan Link: https://lore.kernel.org/r/20230216235321.735214-1-jiangshanlai@gmail.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx/nested.c') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f63b28f46a71..c4ede229ed82 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -358,6 +358,7 @@ static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, gpa_t addr) { + unsigned long roots = 0; uint i; struct kvm_mmu_root_info *cached_root; @@ -368,8 +369,10 @@ static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd, eptp)) - vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa); + roots |= KVM_MMU_ROOT_PREVIOUS(i); } + if (roots) + kvm_mmu_invalidate_addr(vcpu, vcpu->arch.mmu, addr, roots); } static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From ad36aab37ae4f2ca898a809454020c5acc23b47c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 19 Jan 2023 22:19:45 +0800 Subject: KVM: nVMX: Remove outdated comments in nested_vmx_setup_ctls_msrs() nested_vmx_setup_ctls_msrs() initializes the vmcs_conf.nested, which stores the global VMX MSR configurations when nested is supported, regardless of any particular CPUID settings for one VM. Commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm KVM_GET_MSRS") added the some feature flags for secondary proc-based controls, so that those features can be available in KVM_GET_MSRS. Yet this commit did not remove the obsolete comments in nested_vmx_setup_ctls_msrs(). Just fix the comments, and no functional change intended. Fixes: 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm KVM_GET_MSRS") Signed-off-by: Yu Zhang Link: https://lore.kernel.org/r/20230119141946.585610-1-yu.c.zhang@linux.intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/kvm/vmx/nested.c') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f63b28f46a71..d778fa304f51 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6866,11 +6866,6 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) msrs->procbased_ctls_low &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); - /* - * secondary cpu-based controls. Do not include those that - * depend on CPUID bits, they are added later by - * vmx_vcpu_after_set_cpuid. - */ msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl; -- cgit v1.2.3 From f6cde92083dec5cf424504d7029acdffbe5beed8 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 19 Jan 2023 22:19:46 +0800 Subject: KVM: nVMX: Add helpers to setup VMX control msr configs nested_vmx_setup_ctls_msrs() is used to set up the various VMX MSR controls for nested VMX. But it is a bit lengthy, just add helpers to setup the configuration of VMX MSRs. Suggested-by: Sean Christopherson Signed-off-by: Yu Zhang Link: https://lore.kernel.org/r/20230119141946.585610-2-yu.c.zhang@linux.intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 107 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 74 insertions(+), 33 deletions(-) (limited to 'arch/x86/kvm/vmx/nested.c') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d778fa304f51..dab529cd799c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6753,36 +6753,9 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT; } -/* - * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be - * returned for the various VMX controls MSRs when nested VMX is enabled. - * The same values should also be used to verify that vmcs12 control fields are - * valid during nested entry from L1 to L2. - * Each of these control msrs has a low and high 32-bit half: A low bit is on - * if the corresponding bit in the (32-bit) control field *must* be on, and a - * bit in the high half is on if the corresponding bit in the control field - * may be on. See also vmx_control_verify(). - */ -void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) +static void nested_vmx_setup_pinbased_ctls(struct vmcs_config *vmcs_conf, + struct nested_vmx_msrs *msrs) { - struct nested_vmx_msrs *msrs = &vmcs_conf->nested; - - /* - * Note that as a general rule, the high half of the MSRs (bits in - * the control fields which may be 1) should be initialized by the - * intersection of the underlying hardware's MSR (i.e., features which - * can be supported) and the list of features we want to expose - - * because they are known to be properly supported in our code. - * Also, usually, the low half of the MSRs (bits which must be 1) can - * be set to 0, meaning that L1 may turn off any of these bits. The - * reason is that if one of these bits is necessary, it will appear - * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control - * fields of vmcs01 and vmcs02, will turn these bits off - and - * nested_vmx_l1_wants_exit() will not pass related exits to L1. - * These rules have exceptions below. - */ - - /* pin-based controls */ msrs->pinbased_ctls_low = PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; @@ -6795,8 +6768,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) msrs->pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; +} - /* exit controls */ +static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, + struct nested_vmx_msrs *msrs) +{ msrs->exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; @@ -6815,8 +6791,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; +} - /* entry controls */ +static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, + struct nested_vmx_msrs *msrs) +{ msrs->entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; @@ -6832,8 +6811,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; +} - /* cpu-based controls */ +static void nested_vmx_setup_cpubased_ctls(struct vmcs_config *vmcs_conf, + struct nested_vmx_msrs *msrs) +{ msrs->procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; @@ -6865,7 +6847,12 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) /* We support free control of CR3 access interception. */ msrs->procbased_ctls_low &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); +} +static void nested_vmx_setup_secondary_ctls(u32 ept_caps, + struct vmcs_config *vmcs_conf, + struct nested_vmx_msrs *msrs) +{ msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl; @@ -6943,8 +6930,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) if (enable_sgx) msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING; +} - /* miscellaneous data */ +static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf, + struct nested_vmx_msrs *msrs) +{ msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | @@ -6952,7 +6942,10 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) VMX_MISC_ACTIVITY_HLT | VMX_MISC_ACTIVITY_WAIT_SIPI; msrs->misc_high = 0; +} +static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) +{ /* * This MSR reports some information about VMX support. We * should return information about the VMX we emulate for the @@ -6967,7 +6960,10 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; +} +static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) +{ /* * These MSRs specify bits which the guest must keep fixed on * while L1 is in VMXON mode (in L1's root mode, or running an L2). @@ -6984,6 +6980,51 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) if (vmx_umip_emulated()) msrs->cr4_fixed1 |= X86_CR4_UMIP; +} + +/* + * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be + * returned for the various VMX controls MSRs when nested VMX is enabled. + * The same values should also be used to verify that vmcs12 control fields are + * valid during nested entry from L1 to L2. + * Each of these control msrs has a low and high 32-bit half: A low bit is on + * if the corresponding bit in the (32-bit) control field *must* be on, and a + * bit in the high half is on if the corresponding bit in the control field + * may be on. See also vmx_control_verify(). + */ +void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) +{ + struct nested_vmx_msrs *msrs = &vmcs_conf->nested; + + /* + * Note that as a general rule, the high half of the MSRs (bits in + * the control fields which may be 1) should be initialized by the + * intersection of the underlying hardware's MSR (i.e., features which + * can be supported) and the list of features we want to expose - + * because they are known to be properly supported in our code. + * Also, usually, the low half of the MSRs (bits which must be 1) can + * be set to 0, meaning that L1 may turn off any of these bits. The + * reason is that if one of these bits is necessary, it will appear + * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control + * fields of vmcs01 and vmcs02, will turn these bits off - and + * nested_vmx_l1_wants_exit() will not pass related exits to L1. + * These rules have exceptions below. + */ + nested_vmx_setup_pinbased_ctls(vmcs_conf, msrs); + + nested_vmx_setup_exit_ctls(vmcs_conf, msrs); + + nested_vmx_setup_entry_ctls(vmcs_conf, msrs); + + nested_vmx_setup_cpubased_ctls(vmcs_conf, msrs); + + nested_vmx_setup_secondary_ctls(ept_caps, vmcs_conf, msrs); + + nested_vmx_setup_misc_data(vmcs_conf, msrs); + + nested_vmx_setup_basic(msrs); + + nested_vmx_setup_cr_fixed(msrs); msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr(); } -- cgit v1.2.3 From fb509f76acc8d42bed11bca308404f81c2be856a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 22 Mar 2023 02:37:31 +0100 Subject: KVM: VMX: Make CR0.WP a guest owned bit Guests like grsecurity that make heavy use of CR0.WP to implement kernel level W^X will suffer from the implied VMEXITs. With EPT there is no need to intercept a guest change of CR0.WP, so simply make it a guest owned bit if we can do so. This implies that a read of a guest's CR0.WP bit might need a VMREAD. However, the only potentially affected user seems to be kvm_init_mmu() which is a heavy operation to begin with. But also most callers already cache the full value of CR0 anyway, so no additional VMREAD is needed. The only exception is nested_vmx_load_cr3(). This change is VMX-specific, as SVM has no such fine grained control register intercept control. Suggested-by: Sean Christopherson Signed-off-by: Mathias Krause Link: https://lore.kernel.org/r/20230322013731.102955-7-minipli@grsecurity.net Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx/nested.c') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f63b28f46a71..61d940fc91ba 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4481,7 +4481,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * CR0_GUEST_HOST_MASK is already set in the original vmcs01 * (KVM doesn't change it); */ - vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); vmx_set_cr0(vcpu, vmcs12->host_cr0); /* Same as above - no reason to call set_cr4_guest_host_mask(). */ @@ -4632,7 +4632,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) */ vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); - vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); -- cgit v1.2.3 From 607475cfa0f753862c0030de836875fe4ac5cf8a Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Wed, 22 Mar 2023 12:58:21 +0800 Subject: KVM: x86: Add helpers to query individual CR0/CR4 bits Add helpers to check if a specific CR0/CR4 bit is set to avoid a plethora of implicit casts from the "unsigned long" return of kvm_read_cr*_bits(), and to make each caller's intent more obvious. Defer converting helpers that do truly ugly casts from "unsigned long" to "int", e.g. is_pse(), to a future commit so that their conversion is more isolated. Opportunistically drop the superfluous pcid_enabled from kvm_set_cr3(); the local variable is used only once, immediately after its declaration. Suggested-by: Sean Christopherson Signed-off-by: Binbin Wu Link: https://lore.kernel.org/r/20230322045824.22970-2-binbin.wu@linux.intel.com [sean: move "obvious" conversions to this commit, massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx/nested.c') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 61d940fc91ba..06b2c24d5a74 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5154,7 +5154,7 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) * does force CR0.PE=1, but only to also force VM86 in order to emulate * Real Mode, and so there's no need to check CR0.PE manually. */ - if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { + if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -- cgit v1.2.3