diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2024-07-16 09:50:44 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-07-16 09:50:44 -0400 |
| commit | 1c5a0b55abeb6d99ed0962c6a6fa611821949523 (patch) | |
| tree | a56535ba0cd23a1a5f3ad175685cba09cdb6459e | |
| parent | c8b8b8190a80b591aa73c27c70a668799f8db547 (diff) | |
| parent | bb032b2352c33be374136889789103d724f1b613 (diff) | |
| download | linux-1c5a0b55abeb6d99ed0962c6a6fa611821949523.tar.gz linux-1c5a0b55abeb6d99ed0962c6a6fa611821949523.tar.bz2 linux-1c5a0b55abeb6d99ed0962c6a6fa611821949523.zip | |
Merge tag 'kvmarm-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 changes for 6.11
- Initial infrastructure for shadow stage-2 MMUs, as part of nested
virtualization enablement
- Support for userspace changes to the guest CTR_EL0 value, enabling
(in part) migration of VMs between heterogenous hardware
- Fixes + improvements to pKVM's FF-A proxy, adding support for v1.1 of
the protocol
- FPSIMD/SVE support for nested, including merged trap configuration
and exception routing
- New command-line parameter to control the WFx trap behavior under KVM
- Introduce kCFI hardening in the EL2 hypervisor
- Fixes + cleanups for handling presence/absence of FEAT_TCRX
- Miscellaneous fixes + documentation updates
39 files changed, 2764 insertions, 380 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 941b3105248d..8cbbf9a72041 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2720,6 +2720,24 @@ [KVM,ARM,EARLY] Allow use of GICv4 for direct injection of LPIs. + kvm-arm.wfe_trap_policy= + [KVM,ARM] Control when to set WFE instruction trap for + KVM VMs. Traps are allowed but not guaranteed by the + CPU architecture. + + trap: set WFE instruction trap + + notrap: clear WFE instruction trap + + kvm-arm.wfi_trap_policy= + [KVM,ARM] Control when to set WFI instruction trap for + KVM VMs. Traps are allowed but not guaranteed by the + CPU architecture. + + trap: set WFI instruction trap + + notrap: clear WFI instruction trap + kvm_cma_resv_ratio=n [PPC,EARLY] Reserves given percentage from system memory area for contiguous memory allocation for KVM hash pagetable diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 806aa0e75afb..172cec187fc1 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -891,12 +891,12 @@ like this:: The irq_type field has the following values: -- irq_type[0]: +- KVM_ARM_IRQ_TYPE_CPU: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ -- irq_type[1]: +- KVM_ARM_IRQ_TYPE_SPI: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.) (the vcpu_index field is ignored) -- irq_type[2]: +- KVM_ARM_IRQ_TYPE_PPI: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.) (The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs) @@ -1927,7 +1927,7 @@ flags: If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier for the device that wrote the MSI message. For PCI, this is usually a -BFD identifier in the lower 16 bits. +BDF identifier in the lower 16 bits. On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, @@ -2992,7 +2992,7 @@ flags: If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier for the device that wrote the MSI message. For PCI, this is usually a -BFD identifier in the lower 16 bits. +BDF identifier in the lower 16 bits. On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, diff --git a/Documentation/virt/kvm/devices/arm-vgic.rst b/Documentation/virt/kvm/devices/arm-vgic.rst index 40bdeea1d86e..19f0c6756891 100644 --- a/Documentation/virt/kvm/devices/arm-vgic.rst +++ b/Documentation/virt/kvm/devices/arm-vgic.rst @@ -31,7 +31,7 @@ Groups: KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) Base address in the guest physical address space of the GIC virtual cpu interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. - This address needs to be 4K aligned and the region covers 4 KByte. + This address needs to be 4K aligned and the region covers 8 KByte. Errors: diff --git a/MAINTAINERS b/MAINTAINERS index da5352dbd4f3..55ae69b77b64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12077,6 +12077,8 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git +F: Documentation/virt/kvm/arm/ +F: Documentation/virt/kvm/devices/arm* F: arch/arm64/include/asm/kvm* F: arch/arm64/include/uapi/asm/kvm* F: arch/arm64/kvm/ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 7abf09df7033..8eccf497894e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -152,6 +152,7 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_FSC_ADDRSZ (0x00) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -379,6 +380,11 @@ #ifndef __ASSEMBLY__ #include <asm/types.h> +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + static inline bool esr_is_data_abort(unsigned long esr) { const unsigned long ec = ESR_ELx_EC(esr); @@ -386,6 +392,12 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + static inline bool esr_fsc_is_translation_fault(unsigned long esr) { /* Translation fault, level -1 */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b2adc2c6c82a..d81cc746e0eb 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,6 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a6330460d9e5..2181a11b9d92 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -232,6 +232,8 @@ extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, phys_addr_t start, unsigned long pages); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); + extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 21650e7924d4..a601a9305b10 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -11,6 +11,7 @@ #ifndef __ARM64_KVM_EMULATE_H__ #define __ARM64_KVM_EMULATE_H__ +#include <linux/bitfield.h> #include <linux/kvm_host.h> #include <asm/debug-monitors.h> @@ -55,6 +56,14 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) | + ESR_ELx_IL; + + kvm_inject_nested_sync(vcpu, esr); +} + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -69,39 +78,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (has_vhe() || has_hvhe()) - vcpu->arch.hcr_el2 |= HCR_E2H; - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { - /* route synchronous external abort exceptions to EL2 */ - vcpu->arch.hcr_el2 |= HCR_TEA; - /* trap error record accesses */ - vcpu->arch.hcr_el2 |= HCR_TERR; - } + if (!vcpu_has_run_once(vcpu)) + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) { - vcpu->arch.hcr_el2 |= HCR_FWB; - } else { - /* - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C - * get set in SCTLR_EL1 such that we can detect when the guest - * MMU gets turned on and do the necessary cache maintenance - * then. - */ + /* + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. + */ + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) vcpu->arch.hcr_el2 |= HCR_TVM; - } - - if (cpus_have_final_cap(ARM64_HAS_EVT) && - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) - vcpu->arch.hcr_el2 |= HCR_TID4; - else - vcpu->arch.hcr_el2 |= HCR_TID2; - - if (vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 &= ~HCR_RW; - - if (kvm_has_mte(vcpu->kvm)) - vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) @@ -660,4 +647,50 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) kvm_write_cptr_el2(val); } + +/* + * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE + * format if E2H isn't set. + */ +static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu) +{ + u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2); + + if (!vcpu_el2_e2h_is_set(vcpu)) + cptr = translate_cptr_el2_to_cpacr_el1(cptr); + + return cptr; +} + +static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu, + unsigned int xen) +{ + switch (xen) { + case 0b00: + case 0b10: + return true; + case 0b01: + return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu); + case 0b11: + default: + return false; + } +} + +#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \ + (!vcpu_has_nv(vcpu) ? false : \ + ____cptr_xen_trap_enabled(vcpu, \ + SYS_FIELD_GET(CPACR_ELx, xen, \ + vcpu_sanitised_cptr_el2(vcpu)))) + +static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN); +} + +static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 36b8e97bf49e..25a3b72fbacf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -189,6 +189,33 @@ struct kvm_s2_mmu { uint64_t split_page_chunk_size; struct kvm_arch *arch; + + /* + * For a shadow stage-2 MMU, the virtual vttbr used by the + * host to parse the guest S2. + * This either contains: + * - the virtual VTTBR programmed by the guest hypervisor with + * CnP cleared + * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid + * + * We also cache the full VTCR which gets used for TLB invalidation, + * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted + * to be cached in a TLB" to the letter. + */ + u64 tlb_vttbr; + u64 tlb_vtcr; + + /* + * true when this represents a nested context where virtual + * HCR_EL2.VM == 1 + */ + bool nested_stage2_enabled; + + /* + * 0: Nobody is currently using this, check vttbr for validity + * >0: Somebody is actively using this. + */ + atomic_t refcnt; }; struct kvm_arch_memory_slot { @@ -256,6 +283,14 @@ struct kvm_arch { */ u64 fgu[__NR_FGT_GROUP_IDS__]; + /* + * Stage 2 paging state for VMs with nested S2 using a virtual + * VMID. + */ + struct kvm_s2_mmu *nested_mmus; + size_t nested_mmus_size; + int nested_mmus_next; + /* Interrupt controller */ struct vgic_dist vgic; @@ -327,11 +362,11 @@ struct kvm_arch { * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) -#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) -#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; + u64 ctr_el0; + /* Masks for VNCR-baked sysregs */ struct kvm_sysreg_masks *sysreg_masks; @@ -423,6 +458,7 @@ enum vcpu_sysreg { MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ + ZCR_EL2, /* SVE Control Register (EL2) */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ @@ -867,6 +903,9 @@ struct kvm_vcpu_arch { #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) +#define vcpu_sve_zcr_elx(vcpu) \ + (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1) + #define vcpu_sve_state_size(vcpu) ({ \ size_t __size_ret; \ unsigned int __vcpu_vq; \ @@ -991,6 +1030,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; default: return false; } @@ -1036,6 +1076,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; default: return false; } @@ -1145,7 +1186,7 @@ int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); -void kvm_init_sysreg(struct kvm_vcpu *); +void kvm_calculate_traps(struct kvm_vcpu *vcpu); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); @@ -1306,6 +1347,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu); int __init kvm_set_ipa_limit(void); +u32 kvm_get_pa_bits(struct kvm *kvm); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); @@ -1355,6 +1397,24 @@ static inline void kvm_hyp_reserve(void) { } void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) +{ + switch (reg) { + case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7): + return &ka->id_regs[IDREG_IDX(reg)]; + case SYS_CTR_EL0: + return &ka->ctr_el0; + default: + WARN_ON_ONCE(1); + return NULL; + } +} + +#define kvm_read_vm_id_reg(kvm, reg) \ + ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; }) + +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); + #define __expand_field_sign_unsigned(id, fld, val) \ ((u64)SYS_FIELD_VALUE(id, fld, val)) @@ -1371,7 +1431,7 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); #define get_idreg_field_unsigned(kvm, id, fld) \ ({ \ - u64 __val = IDREG((kvm), SYS_##id); \ + u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \ FIELD_GET(id##_##fld##_MASK, __val); \ }) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b05bceca3385..c838309e4ec4 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -124,8 +124,8 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, #endif #ifdef __KVM_NVHE_HYPERVISOR__ -void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size, - phys_addr_t pgd, void *sp, void *cont_fn); +void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, + void (*fn)(void)); int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, unsigned long *per_cpu_base, u32 hyp_va_bits); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d5e48d870461..216ca424bb16 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -98,6 +98,7 @@ alternative_cb_end #include <asm/mmu_context.h> #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> +#include <asm/kvm_nested.h> void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -165,6 +166,10 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); void __init free_hyp_pgds(void); +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size); +void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); +void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); + void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_uninit_stage2_mmu(struct kvm *kvm); @@ -326,5 +331,26 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); } + +static inline u64 get_vmid(u64 vttbr) +{ + return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >> + VTTBR_VMID_SHIFT; +} + +static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu) +{ + return !(mmu->tlb_vttbr & VTTBR_CNP_BIT); +} + +static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +{ + /* + * Be careful, mmu may not be fully initialised so do look at + * *any* of its fields. + */ + return &kvm->arch.mmu != mmu; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 5e0ab0596246..5b06c31035a2 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -5,6 +5,7 @@ #include <linux/bitfield.h> #include <linux/kvm_host.h> #include <asm/kvm_emulate.h> +#include <asm/kvm_pgtable.h> static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { @@ -32,7 +33,7 @@ static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) { - u64 cpacr_el1 = 0; + u64 cpacr_el1 = CPACR_ELx_RES1; if (cptr_el2 & CPTR_EL2_TTA) cpacr_el1 |= CPACR_ELx_TTA; @@ -41,6 +42,8 @@ static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) if (!(cptr_el2 & CPTR_EL2_TZ)) cpacr_el1 |= CPACR_ELx_ZEN; + cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM); + return cpacr_el1; } @@ -61,6 +64,125 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) } extern bool forward_smc_trap(struct kvm_vcpu *vcpu); +extern void kvm_init_nested(struct kvm *kvm); +extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); +extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); +extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); + +union tlbi_info; + +extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, + const union tlbi_info *info, + void (*)(struct kvm_s2_mmu *, + const union tlbi_info *)); +extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); +extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); + +struct kvm_s2_trans { + phys_addr_t output; + unsigned long block_size; + bool writable; + bool readable; + int level; + u32 esr; + u64 upper_attr; +}; + +static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans) +{ + return trans->output; +} + +static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans) +{ + return trans->block_size; +} + +static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans) +{ + return trans->esr; +} + +static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans) +{ + return trans->readable; +} + +static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans) +{ + return trans->writable; +} + +static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) +{ + return !(trans->upper_attr & BIT(54)); +} + +extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, + struct kvm_s2_trans *result); +extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, + struct kvm_s2_trans *trans); +extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); +extern void kvm_nested_s2_wp(struct kvm *kvm); +extern void kvm_nested_s2_unmap(struct kvm *kvm); +extern void kvm_nested_s2_flush(struct kvm *kvm); + +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val); + +static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL1)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + +static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL2)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} int kvm_init_nv_sysregs(struct kvm *kvm); @@ -76,4 +198,11 @@ static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) } #endif +#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0) + +static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans) +{ + return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level); +} + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index af3b206fa423..be4152819456 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -654,6 +654,23 @@ #define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) /* TLBI instructions */ +#define TLBI_Op0 1 + +#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */ +#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */ + +#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */ +#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/ + +#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */ +#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */ +#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */ +#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */ +#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */ +#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */ +#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */ +#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */ + #define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) #define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) #define OP_TLBI_ASIDE1O |
