diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-04-26 15:53:36 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-04-26 15:53:36 -0400 |
commit | 48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab (patch) | |
tree | d69a46c054bf8a8558b17cdb9510ae5285296ba6 | |
parent | 807b758496e42ada4ba3f3defcfbac88afcd64f8 (diff) | |
parent | 457bd7af1a17182e7f1f97eeb5d9107f8699e99d (diff) | |
download | linux-48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab.tar.gz linux-48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab.tar.bz2 linux-48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab.zip |
Merge tag 'kvm-x86-pmu-6.4' of https://github.com/kvm-x86/linux into HEAD
KVM x86 PMU changes for 6.4:
- Disallow virtualizing legacy LBRs if architectural LBRs are available,
the two are mutually exclusive in hardware
- Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES)
after KVM_RUN, and overhaul the vmx_pmu_caps selftest to better
validate PERF_CAPABILITIES
- Apply PMU filters to emulated events and add test coverage to the
pmu_event_filter selftest
- Misc cleanups and fixes
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 135 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 102 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 13 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86_64/processor.h | 45 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c | 252 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c | 230 |
14 files changed, 558 insertions, 268 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffdb0c3010e7..d197c54dcaa6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -513,6 +513,7 @@ struct kvm_pmc { #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1) #define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { + u8 version; unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; unsigned available_event_types; @@ -525,7 +526,6 @@ struct kvm_pmu { u64 global_ovf_ctrl_mask; u64 reserved_bits; u64 raw_event_mask; - u8 version; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6972e0be60fa..b944492faefa 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -414,7 +414,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ - if (vcpu->arch.last_vmentry_cpu != -1) { + if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) return r; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e3d02f059437..c8961f45e3b1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5476,7 +5476,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * Changing guest CPUID after KVM_RUN is forbidden, see the comment in * kvm_arch_vcpu_ioctl(). */ - KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm); + KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index bb1733bb5bf9..1690d41c1830 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static inline bool pmc_is_enabled(struct kvm_pmc *pmc) +static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) { return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc); } @@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return is_fixed_event_allowed(filter, pmc->idx); } +static bool pmc_event_is_allowed(struct kvm_pmc *pmc) +{ + return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && + check_pmu_event_filter(pmc); +} + static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); - if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) - goto reprogram_complete; - - if (!check_pmu_event_filter(pmc)) + if (!pmc_event_is_allowed(pmc)) goto reprogram_complete; if (pmc->counter < pmc->prev_counter) @@ -589,6 +592,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm)) + return; + + bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX); static_call(kvm_x86_pmu_refresh)(vcpu); } @@ -646,7 +653,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { pmc->prev_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, @@ -684,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); - if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc)) + if (!pmc || !pmc_event_is_allowed(pmc)) continue; /* Ignore checks for edge detect, pin control, invert and CMASK bits */ diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index be62c16f2265..5c7bbf03b599 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) KVM_PMC_MAX_FIXED); } -static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) { set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cc77a0681800..5fa939e411d8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0827e0c25309..a64ede4f1d8a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4093,7 +4093,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return false; case MSR_IA32_SMBASE: if (!IS_ENABLED(CONFIG_KVM_SMM)) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index e8a3be0b9df9..741efe2c497b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } } @@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) { int bit; - struct kvm_pmc *pmc; - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { - pmc = intel_pmc_idx_to_pmc(pmu, bit); - if (pmc) - kvm_pmu_request_counter_reprogam(pmc); - } + if (!diff) + return; + + for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) + set_bit(bit, pmu->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); } static bool intel_hw_event_available(struct kvm_pmc *pmc) @@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: msr_info->data = pmu->fixed_ctr_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_STATUS: msr_info->data = pmu->global_status; - return 0; + break; case MSR_CORE_PERF_GLOBAL_CTRL: msr_info->data = pmu->global_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = 0; - return 0; + break; case MSR_IA32_PEBS_ENABLE: msr_info->data = pmu->pebs_enable; - return 0; + break; case MSR_IA32_DS_AREA: msr_info->data = pmu->ds_area; - return 0; + break; case MSR_PEBS_DATA_CFG: msr_info->data = pmu->pebs_data_cfg; - return 0; + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_GP]; - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { msr_info->data = pmc->eventsel; - return 0; - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) { + break; + } + return 1; } - return 1; + return 0; } static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - if (pmu->fixed_ctr_ctrl == data) - return 0; - if (!(data & pmu->fixed_ctr_ctrl_mask)) { + if (data & pmu->fixed_ctr_ctrl_mask) + return 1; + + if (pmu->fixed_ctr_ctrl != data) reprogram_fixed_counters(pmu, data); - return 0; - } break; case MSR_CORE_PERF_GLOBAL_STATUS: - if (msr_info->host_initiated) { - pmu->global_status = data; - return 0; - } - break; /* RO MSR */ + if (!msr_info->host_initiated) + return 1; /* RO MSR */ + + pmu->global_status = data; + break; case MSR_CORE_PERF_GLOBAL_CTRL: - if (pmu->global_ctrl == data) - return 0; - if (kvm_valid_perf_global_ctrl(pmu, data)) { + if (!kvm_valid_perf_global_ctrl(pmu, data)) + return 1; + + if (pmu->global_ctrl != data) { diff = pmu->global_ctrl ^ data; pmu->global_ctrl = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if (!(data & pmu->global_ovf_ctrl_mask)) { - if (!msr_info->host_initiated) - pmu->global_status &= ~data; - return 0; - } + if (data & pmu->global_ovf_ctrl_mask) + return 1; + + if (!msr_info->host_initiated) + pmu->global_status &= ~data; break; case MSR_IA32_PEBS_ENABLE: - if (pmu->pebs_enable == data) - return 0; - if (!(data & pmu->pebs_enable_mask)) { + if (data & pmu->pebs_enable_mask) + return 1; + + if (pmu->pebs_enable != data) { diff = pmu->pebs_enable ^ data; pmu->pebs_enable = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_IA32_DS_AREA: @@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (is_noncanonical_address(data, vcpu)) return 1; + pmu->ds_area = data; - return 0; + break; case MSR_PEBS_DATA_CFG: - if (pmu->pebs_data_cfg == data) - return 0; - if (!(data & pmu->pebs_data_cfg_mask)) { - pmu->pebs_data_cfg = data; - return 0; - } + if (data & pmu->pebs_data_cfg_mask) + return 1; + + pmu->pebs_data_cfg = data; break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || @@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((msr & MSR_PMC_FULL_WIDTH_BIT) && (data & ~pmu->counter_bitmask[KVM_PMC_GP])) return 1; + if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { - if (data == pmc->eventsel) - return 0; reserved_bits = pmu->reserved_bits; if ((pmc->idx == 2) && (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; - if (!(data & reserved_bits)) { + if (data & reserved_bits) + return 1; + + if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); - return 0; + kvm_pmu_request_counter_reprogram(pmc); } - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { + break; + } + /* Not a known PMU MSR. */ + return 1; } - return 1; + return 0; } static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) @@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_enable_mask = ~0ull; pmu->pebs_data_cfg_mask = ~0ull; + memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); + + /* + * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, + * and PMU refresh is disallowed after the vCPU has run, i.e. this code + * should never be reached while KVM is passing through MSRs. + */ + if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) + return; + entry = kvm_find_cpuid_entry(vcpu, 0xa); if (!entry || !vcpu->kvm->arch.enable_pmu) return; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 536b7d688851..599dd62eeb0e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1946,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, static int vmx_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); @@ -2031,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested_vmx_allowed(vcpu)) return 1; if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, @@ -2340,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx->msr_ia32_sgxlepubkeyhash [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!msr_info->host_initiated) return 1; /* they are read-only */ if (!nested_vmx_allowed(vcpu)) @@ -6930,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) * real mode. */ return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return nested; case MSR_AMD64_VIRT_SPEC_CTRL: case MSR_AMD64_TSC_RATIO: @@ -7756,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void) if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - x86_perf_get_lbr(&lbr); - if (lbr.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { + x86_perf_get_lbr(&lbr); + if (lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ad4a45d8975b..095a41c6f346 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1556,39 +1556,41 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; static unsigned num_emulated_msrs; /* - * List of msr numbers which are used to expose MSR-based features that - * can be used by a hypervisor to validate requested CPU features. + * List of MSRs that control the existence of MSR-based features, i.e. MSRs + * that are effectively CPUID leafs. VMX MSRs are also included in the set of + * feature MSRs, but are handled separately to allow expedited lookups. */ -static const u32 msr_based_features_all[] = { - MSR_IA32_VMX_BASIC, - MSR_IA32_VMX_TRUE_PINBASED_CTLS, - MSR_IA32_VMX_PINBASED_CTLS, - MSR_IA32_VMX_TRUE_PROCBASED_CTLS, - MSR_IA32_VMX_PROCBASED_CTLS, - MSR_IA32_VMX_TRUE_EXIT_CTLS, - MSR_IA32_VMX_EXIT_CTLS, - MSR_IA32_VMX_TRUE_ENTRY_CTLS, - MSR_IA32_VMX_ENTRY_CTLS, - MSR_IA32_VMX_MISC, - MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR0_FIXED1, - MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_CR4_FIXED1, - MSR_IA32_VMX_VMCS_ENUM, - MSR_IA32_VMX_PROCBASED_CTLS2, - MSR_IA32_VMX_EPT_VPID_CAP, - MSR_IA32_VMX_VMFUNC, - +static const u32 msr_based_features_all_except_vmx[] = { MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, }; -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; /* + * All feature MSRs except uCode revID, which tracks the currently loaded uCode + * patch, are immutable once the vCPU model is defined. + */ +static bool kvm_is_immutable_feature_msr(u32 msr) +{ + int i; + + if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) + return true; + + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { + if (msr == msr_based_features_all_except_vmx[i]) + return msr != MSR_IA32_UCODE_REV; + } + + return false; +} + +/* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * does not yet virtualize. These include: * 10 - MISC_PACKAGE_CTRLS @@ -2205,6 +2207,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { + u64 val; + + /* + * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does + * not support modifying the guest vCPU model on the fly, e.g. changing + * the nVMX capabilities while L2 is running is nonsensical. Ignore + * writes of the same value, e.g. to allow userspace to blindly stuff + * all MSRs when emulating RESET. + */ + if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) { + if (do_get_msr(vcpu, index, &val) || *data != val) + return -EINVAL; + + return 0; + } + return kvm_set_msr_ignored_check(vcpu, index, *data, true); } @@ -3627,9 +3645,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~kvm_caps.supported_perf_cap) return 1; + /* + * Note, this is not just a performance optimization! KVM + * disallows changing feature MSRs after the vCPU has run; PMU + * refresh will bug the VM if called after the vCPU has run. + */ + if (vcpu->arch.perf_capabilities == data) + break; + vcpu->arch.perf_capabilities = data; kvm_pmu_refresh(vcpu); - return 0; + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu)) return 1; @@ -7045,6 +7071,18 @@ out: return r; } +static void kvm_probe_feature_msr(u32 msr_index) +{ + struct kvm_msr_entry msr = { + .index = msr_index, + }; + + if (kvm_get_msr_feature(&msr)) + return; + + msr_based_features[num_msr_based_features++] = msr_index; +} + static void kvm_probe_msr_to_save(u32 msr_index) { u32 dummy[2]; @@ -7120,7 +7158,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) msrs_to_save[num_msrs_to_save++] = msr_index; } -static void kvm_init_msr_list(void) +static void kvm_init_msr_lists(void) { unsigned i; @@ -7146,15 +7184,11 @@ static void kvm_init_msr_list(void) emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; } - for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { - struct kvm_msr_entry msr; + for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++) + kvm_probe_feature_msr(i); - msr.index = msr_based_features_all[i]; - if (kvm_get_msr_feature(&msr)) - continue; - - msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; - } + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) + kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]); } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -9488,7 +9522,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_caps.max_guest_tsc_khz = max; } kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; - kvm_init_msr_list(); + kvm_init_msr_lists(); return 0; out_unwind_ops: diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 203fb6640b5b..fbef05c0bdeb 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -40,6 +40,14 @@ void kvm_spurious_fault(void); failed; \ }) +/* + * The first...last VMX feature MSRs that are emulated by KVM. This may or may + * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an + * associated feature that KVM supports for nested virtualization. + */ +#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC +#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC + #define KVM_DEFAULT_PLE_GAP 128 #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 #define KVM_DEFAULT_PLE_WINDOW_GROW 2 @@ -83,6 +91,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val, void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.last_vmentry_cpu != -1; +} + static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu) { return vcpu->arch.exception.pending || diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 90387ddcb2a9..e1d65d933310 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -928,14 +928,45 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); -static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, - uint64_t msr_value) -{ - int r = _vcpu_set_msr(vcpu, msr_index, msr_value); - - TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); -} +/* + * Assert on an MSR access(es) and pretty print the MSR name when possible. + * Note, the caller provides the stringified name so that the name of macro is + * printed, not the value the macro resolves to (due to macro expansion). + */ +#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \ +do { \ + if (__builtin_constant_p(msr)) { \ + TEST_ASSERT(cond, fmt, str, args); \ + } else if (!(cond)) { \ + char buf[16]; \ + \ + snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \ + TEST_ASSERT(cond, fmt, buf, args); \ + } \ +} while (0) +/* + * Returns true if KVM should return the last written value when reading an MSR + * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that + * is changing, etc. This is NOT an exhaustive list! The intent is to filter + * out MSRs that are not durable _and_ that a selftest wants to write. + */ +static inline bool is_durable_msr(uint32_t msr) +{ + return msr != MSR_IA32_TSC; +} + +#define vcpu_set_msr(vcpu, msr, val) \ +do { \ + uint64_t r, v = val; \ + \ + TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \ + "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \ + if (!is_durable_msr(msr)) \ + break; \ + r = vcpu_get_msr(vcpu, msr); \ + TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\ +} while (0) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 2feef25ba691..8cec5c8aca8a 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -54,6 +54,21 @@ #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) + +/* + * "Retired instructions", from Processor Programming Reference + * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, + * Preliminary Processor Programming Reference (PPR) for AMD Family + * 17h Model 31h, Revision B0 Processors, and Preliminary Processor + * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision + * B1 Processors Volume 1 of 2. + * --- and --- + * "Instructions retired", from the Intel SDM, volume 3, + * "Pre-defined Architectural Performance Events." + */ + +#define INST_RETIRED EVENT(0xc0, 0) + /* * This event list comprises Intel's eight architectural events plus * AMD's "retired branch instructions" for Zen[123] (and possibly @@ -61,7 +76,7 @@ */ static const uint64_t event_list[] = { EVENT(0x3c, 0), - EVENT(0xc0, 0), + INST_RETIRED, EVENT(0x3c, 1), EVENT(0x2e, 0x4f), EVENT(0x2e, 0x41), @@ -71,13 +86,21 @@ static const uint64_t event_list[] = { AMD_ZEN_BR_RETIRED, }; +struct { + uint64_t loads; + uint64_t stores; + uint64_t loads_stores; + uint64_t branches_retired; + uint64_t instructions_retired; +} pmc_results; + /* * If we encounter a #GP during the guest PMU sanity check, then the guest * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). */ static void guest_gp_handler(struct ex_regs *regs) { - GUEST_SYNC(0); + GUEST_SYNC(-EFAULT); } /* @@ -92,12 +115,23 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip) wrmsr(msr, v); if (rdmsr(msr) != v) - GUEST_SYNC(0); + GUEST_SYNC(-EIO); v ^= bits_to_flip; wrmsr(msr, v); if (rdmsr(msr) != v) - GUEST_SYNC(0); + GUEST_SYNC(-EIO); +} + +static void run_and_measure_loop(uint32_t msr_base) +{ + const uint64_t branches_retired = rdmsr(msr_base + 0); + const uint64_t insn_retired = rdmsr(msr_base + 1); + + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + + pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired; + pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired; } static void intel_guest_code(void) @@ -105,19 +139,18 @@ static void intel_guest_code(void) check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); check_msr(MSR_P6_EVNTSEL0, 0xffff); check_msr(MSR_IA32_PMC0, 0xffff); - GUEST_SYNC(1); + GUEST_SYNC(0); for (;;) { - uint64_t br0, br1; - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1); - br0 = rdmsr(MSR_IA32_PMC0); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); - br1 = rdmsr(MSR_IA32_PMC0); - GUEST_SYNC(br1 - br0); + wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); + + run_and_measure_loop(MSR_IA32_PMC0); + GUEST_SYNC(0); } } @@ -130,18 +163,17 @@ static void amd_guest_code(void) { check_msr(MSR_K7_EVNTSEL0, 0xffff); check_msr(MSR_K7_PERFCTR0, 0xffff); - GUEST_SYNC(1); + GUEST_SYNC(0); for (;;) { - uint64_t br0, br1; - wrmsr(MSR_K7_EVNTSEL0, 0); wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); - br0 = rdmsr(MSR_K7_PERFCTR0); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); - br1 = rdmsr(MSR_K7_PERFCTR0); - GUEST_SYNC(br1 - br0); + wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED); + + run_and_measure_loop(MSR_K7_PERFCTR0); + GUEST_SYNC(0); } } @@ -161,6 +193,19 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) return uc.args[1]; } +static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) +{ + uint64_t r; + + memset(&pmc_results, 0, sizeof(pmc_results)); + sync_global_to_guest(vcpu->vm, pmc_results); + + r = run_vcpu_to_sync(vcpu); + TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r); + + sync_global_from_guest(vcpu->vm, pmc_results); +} + /* * In a nested environment or if the vPMU is disabled, the guest PMU * might not work as architected (accessing the PMU MSRs may raise @@ -171,13 +216,13 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) */ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) { - bool success; + uint64_t r; vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); - success = run_vcpu_to_sync(vcpu); + r = run_vcpu_to_sync(vcpu); vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL); - return success; + return !r; } static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) @@ -237,91 +282,101 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, return f; } +#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ +do { \ + uint64_t br = pmc_results.branches_retired; \ + uint64_t ir = pmc_results.instructions_retired; \ + \ + if (br && br != NUM_BRANCHES) \ + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ + __func__, br, NUM_BRANCHES); \ + TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ + __func__, br); \ + TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \ + __func__, ir); \ +} while (0) + +#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \ +do { \ + uint64_t br = pmc_results.branches_retired; \ + uint64_t ir = pmc_results.instructions_retired; \ + \ + TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \ + __func__, br); \ + TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", |