diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2023-10-31 16:37:07 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-10-31 16:37:07 -0400 |
| commit | 45b890f7689eb0aba454fc5831d2d79763781677 (patch) | |
| tree | 71ab007123eaedd8553d570d6411dfde1062748e | |
| parent | be47941980d56238455eb54401c7b3de4ac5e269 (diff) | |
| parent | 123f42f0ad6815014f54d0cc6eb9039c46ee2907 (diff) | |
| download | linux-45b890f7689eb0aba454fc5831d2d79763781677.tar.gz linux-45b890f7689eb0aba454fc5831d2d79763781677.tar.bz2 linux-45b890f7689eb0aba454fc5831d2d79763781677.zip | |
Merge tag 'kvmarm-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 6.7
- Generalized infrastructure for 'writable' ID registers, effectively
allowing userspace to opt-out of certain vCPU features for its guest
- Optimization for vSGI injection, opportunistically compressing MPIDR
to vCPU mapping into a table
- Improvements to KVM's PMU emulation, allowing userspace to select
the number of PMCs available to a VM
- Guest support for memory operation instructions (FEAT_MOPS)
- Cleanups to handling feature flags in KVM_ARM_VCPU_INIT, squashing
bugs and getting rid of useless code
- Changes to the way the SMCCC filter is constructed, avoiding wasted
memory allocations when not in use
- Load the stage-2 MMU context at vcpu_load() for VHE systems, reducing
the overhead of errata mitigations
- Miscellaneous kernel and selftest fixes
64 files changed, 3019 insertions, 1177 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e81ab4e171e4..7025b3751027 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3422,6 +3422,8 @@ return indicates the attribute is implemented. It does not necessarily indicate that the attribute can be read or written in the device's current state. "addr" is ignored. +.. _KVM_ARM_VCPU_INIT: + 4.82 KVM_ARM_VCPU_INIT ---------------------- @@ -6140,6 +6142,56 @@ writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG interface. No error will be returned, but the resulting offset will not be applied. +.. _KVM_ARM_GET_REG_WRITABLE_MASKS: + +4.139 KVM_ARM_GET_REG_WRITABLE_MASKS +------------------------------------------- + +:Capability: KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES +:Architectures: arm64 +:Type: vm ioctl +:Parameters: struct reg_mask_range (in/out) +:Returns: 0 on success, < 0 on error + + +:: + + #define KVM_ARM_FEATURE_ID_RANGE 0 + #define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + + struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; + }; + +This ioctl copies the writable masks for a selected range of registers to +userspace. + +The ``addr`` field is a pointer to the destination array where KVM copies +the writable masks. + +The ``range`` field indicates the requested range of registers. +``KVM_CHECK_EXTENSION`` for the ``KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES`` +capability returns the supported ranges, expressed as a set of flags. Each +flag's bit index represents a possible value for the ``range`` field. +All other values are reserved for future use and KVM may return an error. + +The ``reserved[13]`` array is reserved for future use and should be 0, or +KVM may return an error. + +KVM_ARM_FEATURE_ID_RANGE (0) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Feature ID range is defined as the AArch64 System register space with +op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, op2=={0-7}. + +The mask returned array pointed to by ``addr`` is indexed by the macro +``ARM64_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2)``, allowing userspace +to know what fields can be changed for the system register described by +``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a +superset of the features supported by the system. + 5. The kvm_run structure ======================== diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst index e84848432158..7f231c724e16 100644 --- a/Documentation/virt/kvm/arm/index.rst +++ b/Documentation/virt/kvm/arm/index.rst @@ -11,3 +11,4 @@ ARM hypercalls pvtime ptp_kvm + vcpu-features diff --git a/Documentation/virt/kvm/arm/vcpu-features.rst b/Documentation/virt/kvm/arm/vcpu-features.rst new file mode 100644 index 000000000000..f7cc6d8d8b74 --- /dev/null +++ b/Documentation/virt/kvm/arm/vcpu-features.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +vCPU feature selection on arm64 +=============================== + +KVM/arm64 provides two mechanisms that allow userspace to configure +the CPU features presented to the guest. + +KVM_ARM_VCPU_INIT +================= + +The ``KVM_ARM_VCPU_INIT`` ioctl accepts a bitmap of feature flags +(``struct kvm_vcpu_init::features``). Features enabled by this interface are +*opt-in* and may change/extend UAPI. See :ref:`KVM_ARM_VCPU_INIT` for complete +documentation of the features controlled by the ioctl. + +Otherwise, all CPU features supported by KVM are described by the architected +ID registers. + +The ID Registers +================ + +The Arm architecture specifies a range of *ID Registers* that describe the set +of architectural features supported by the CPU implementation. KVM initializes +the guest's ID registers to the maximum set of CPU features supported by the +system. The ID register values may be VM-scoped in KVM, meaning that the +values could be shared for all vCPUs in a VM. + +KVM allows userspace to *opt-out* of certain CPU features described by the ID +registers by writing values to them via the ``KVM_SET_ONE_REG`` ioctl. The ID +registers are mutable until the VM has started, i.e. userspace has called +``KVM_RUN`` on at least one vCPU in the VM. Userspace can discover what fields +are mutable in the ID registers using the ``KVM_ARM_GET_REG_WRITABLE_MASKS``. +See the :ref:`ioctl documentation <KVM_ARM_GET_REG_WRITABLE_MASKS>` for more +details. + +Userspace is allowed to *limit* or *mask* CPU features according to the rules +outlined by the architecture in DDI0487J.a D19.1.3 'Principles of the ID +scheme for fields in ID register'. KVM does not allow ID register values that +exceed the capabilities of the system. + +.. warning:: + It is **strongly recommended** that userspace modify the ID register values + before accessing the rest of the vCPU's CPU register state. KVM may use the + ID register values to control feature emulation. Interleaving ID register + modification with other system register accesses may lead to unpredictable + behavior. diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index 51e5e5762571..5817edb4e046 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -59,6 +59,13 @@ Groups: It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. + Note that to obtain reproducible results (the same VCPU being associated + with the same redistributor across a save/restore operation), VCPU creation + order, redistributor region creation order as well as the respective + interleaves of VCPU and region creation MUST be preserved. Any change in + either ordering may result in a different vcpu_id/redistributor association, + resulting in a VM that will fail to run at restore time. + Errors: ======= ============================================================= diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1095c6647e96..b85f46a73e21 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,9 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) +#define HCRX_GUEST_FLAGS \ + (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \ + (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0)) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d6725ff0bf6..13c948a0502d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -54,6 +54,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature) +{ + return test_bit(feature, vcpu->kvm->arch.vcpu_features); +} + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -62,7 +67,7 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) #else static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { - return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features); + return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); } #endif @@ -465,7 +470,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) @@ -565,12 +570,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) - -static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) -{ - return test_bit(feature, vcpu->arch.features); -} - static __always_inline void kvm_write_cptr_el2(u64 val) { if (has_vhe() || has_hvhe()) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index af06ccb7ee34..5653d3553e3e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -78,7 +78,7 @@ extern unsigned int __ro_after_init kvm_sve_max_vl; int __init kvm_arm_init_sve(void); u32 __attribute_const__ kvm_target_cpu(void); -int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +void kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); struct kvm_hyp_memcache { @@ -158,6 +158,16 @@ struct kvm_s2_mmu { phys_addr_t pgd_phys; struct kvm_pgtable *pgt; + /* + * VTCR value used on the host. For a non-NV guest (or a NV + * guest that runs in a context where its own S2 doesn't + * apply), its T0SZ value reflects that of the IPA size. + * + * For a shadow S2 MMU, T0SZ reflects the PARange exposed to + * the guest. + */ + u64 vtcr; + /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -202,12 +212,34 @@ struct kvm_protected_vm { struct kvm_hyp_memcache teardown_mc; }; +struct kvm_mpidr_data { + u64 mpidr_mask; + DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx); +}; + +static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) +{ + unsigned long mask = data->mpidr_mask; + u64 aff = mpidr & MPIDR_HWID_BITMASK; + int nbits, bit, bit_idx = 0; + u16 index = 0; + + /* + * If this looks like RISC-V's BEXT or x86's PEXT + * instructions, it isn't by accident. + */ + nbits = fls(mask); + for_each_set_bit(bit, &mask, nbits) { + index |= (aff & BIT(bit)) >> (bit - bit_idx); + bit_idx++; + } + + return index; +} + struct kvm_arch { struct kvm_s2_mmu mmu; - /* VTCR_EL2 value for this VM */ - u64 vtcr; - /* Interrupt controller */ struct vgic_dist vgic; @@ -239,15 +271,16 @@ struct kvm_arch { #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 /* Timer PPIs made immutable */ #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 - /* SMCCC filter initialized for the VM */ -#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 /* Initial ID reg values loaded */ -#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8 +#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 unsigned long flags; /* VM-wide vCPU feature set */ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES); + /* MPIDR to vcpu index mapping, optional */ + struct kvm_mpidr_data *mpidr_data; + /* * VM-wide PMU filter, implemented as a bitmap and big enough for * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). @@ -257,6 +290,9 @@ struct kvm_arch { cpumask_var_t supported_cpus; + /* PMCR_EL0.N value for the guest */ + u8 pmcr_n; + /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; @@ -574,9 +610,6 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; - /* feature flags */ - DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); - /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; @@ -1025,7 +1058,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, extern unsigned int __ro_after_init kvm_arm_vmid_bits; int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); -void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) @@ -1078,6 +1111,8 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, struct kvm_arm_copy_mte_tags *copy_tags); int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, struct kvm_arm_counter_offset *offset); +int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, + struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); @@ -1109,8 +1144,8 @@ static inline bool kvm_set_pmuserenr(u64 val) } #endif -void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu); int __init kvm_set_ipa_limit(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 66efd67ea7e8..145ce73fc16c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -93,6 +93,8 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu); void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); #else +void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu); +void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu); void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); @@ -111,11 +113,6 @@ void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); void __sve_restore_state(void *sve_pffr, u32 *fpsr); -#ifndef __KVM_NVHE_HYPERVISOR__ -void activate_traps_vhe_load(struct kvm_vcpu *vcpu); -void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); -#endif - u64 __guest_enter(struct kvm_vcpu *vcpu); bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 96a80e8f6226..d86c8661200a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -150,9 +150,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) */ #define KVM_PHYS_SHIFT (40) -#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) -#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) -#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) +#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr) +#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu)) +#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL)) #include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> @@ -224,16 +224,41 @@ static inline void __clean_dcache_guest_page(void *va, size_t size) kvm_flush_dcache_to_poc(va, size); } +static inline size_t __invalidate_icache_max_range(void) +{ + u8 iminline; + u64 ctr; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + ARM64_ALWAYS_SYSTEM, + kvm_compute_final_ctr_el0) + : "=r" (ctr)); + + iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2; + return MAX_DVM_OPS << iminline; +} + static inline void __invalidate_icache_guest_page(void *va, size_t size) { - if (icache_is_aliasing()) { - /* any kind of VIPT cache */ + /* + * VPIPT I-cache maintenance must be done from EL2. See comment in the + * nVHE flavor of __kvm_tlb_flush_vmid_ipa(). + */ + if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2) + return; + + /* + * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the + * invalidation range exceeds our arbitrary limit on invadations by + * cache line. + */ + if (icache_is_aliasing() || size > __invalidate_icache_max_range()) icache_inval_all_pou(); - } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 || - !icache_is_vpipt()) { - /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + else icache_inval_pou((unsigned long)va, (unsigned long)va + size); - } } void kvm_set_way_flush(struct kvm_vcpu *vcpu); @@ -299,7 +324,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, struct kvm_arch *arch) { - write_sysreg(arch->vtcr, vtcr_el2); + write_sysreg(mmu->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index fa23cc9c2adc..6cec8e9c6c91 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -2,13 +2,14 @@ #ifndef __ARM64_KVM_NESTED_H #define __ARM64_KVM_NESTED_H +#include <asm/kvm_emulate.h> #include <linux/kvm_host.h> static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { return (!__is_defined(__KVM_NVHE_HYPERVISOR__) && cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && - test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features)); + vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2)); } extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index c8dca8ae359c..23d27623e478 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/ |
