summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-02-16 10:25:12 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-02-16 10:25:12 -0800
commit82ff31645685559e3732f7143538c9fe88221453 (patch)
tree78c74a67eae91032767c7d798911f364260f9e8f
parentb878a1c072a4912e14a38e90a8b1883d4c513d9d (diff)
parentd3d0b8dfe06098d6d584266c35e9a0947f5b7132 (diff)
downloadlinux-82ff31645685559e3732f7143538c9fe88221453.tar.gz
linux-82ff31645685559e3732f7143538c9fe88221453.tar.bz2
linux-82ff31645685559e3732f7143538c9fe88221453.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Large set of fixes for vector handling, especially in the interactions between host and guest state. This fixes a number of bugs affecting actual deployments, and greatly simplifies the FP/SIMD/SVE handling. Thanks to Mark Rutland for dealing with this thankless task. - Fix an ugly race between vcpu and vgic creation/init, resulting in unexpected behaviours - Fix use of kernel VAs at EL2 when emulating timers with nVHE - Small set of pKVM improvements and cleanups x86: - Fix broken SNP support with KVM module built-in, ensuring the PSP module is initialized before KVM even when the module infrastructure cannot be used to order initcalls - Reject Hyper-V SEND_IPI hypercalls if the local APIC isn't being emulated by KVM to fix a NULL pointer dereference - Enter guest mode (L2) from KVM's perspective before initializing the vCPU's nested NPT MMU so that the MMU is properly tagged for L2, not L1 - Load the guest's DR6 outside of the innermost .vcpu_run() loop, as the guest's value may be stale if a VM-Exit is handled in the fastpath" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits) x86/sev: Fix broken SNP support with KVM module built-in KVM: SVM: Ensure PSP module is initialized if KVM module is built-in crypto: ccp: Add external API interface for PSP module initialization KVM: arm64: vgic: Hoist SGI/PPI alloc from vgic_init() to kvm_create_vgic() KVM: arm64: timer: Drop warning on failed interrupt signalling KVM: arm64: Fix alignment of kvm_hyp_memcache allocations KVM: arm64: Convert timer offset VA when accessed in HYP code KVM: arm64: Simplify warning in kvm_arch_vcpu_load_fp() KVM: arm64: Eagerly switch ZCR_EL{1,2} KVM: arm64: Mark some header functions as inline KVM: arm64: Refactor exit handlers KVM: arm64: Refactor CPTR trap deactivation KVM: arm64: Remove VHE host restore of CPACR_EL1.SMEN KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN KVM: arm64: Remove host FPSIMD saving for non-protected KVM KVM: arm64: Unconditionally save+flush host FPSIMD/SVE/SME state KVM: x86: Load DR6 with guest value only before entering .vcpu_run() loop KVM: nSVM: Enter guest mode before initializing nested NPT MMU KVM: selftests: Add CPUID tests for Hyper-V features that need in-kernel APIC KVM: selftests: Manage CPUID array in Hyper-V CPUID test's core helper ...
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h42
-rw-r--r--arch/arm64/include/asm/kvm_host.h24
-rw-r--r--arch/arm64/kernel/fpsimd.c25
-rw-r--r--arch/arm64/kvm/arch_timer.c16
-rw-r--r--arch/arm64/kvm/arm.c8
-rw-r--r--arch/arm64/kvm/fpsimd.c107
-rw-r--r--arch/arm64/kvm/hyp/entry.S5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h148
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c15
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c76
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c89
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c74
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/sev.h2
-rw-r--r--arch/x86/kvm/hyperv.c6
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/svm/nested.c10
-rw-r--r--arch/x86/kvm/svm/sev.c10
-rw-r--r--arch/x86/kvm/svm/svm.c13
-rw-r--r--arch/x86/kvm/vmx/main.c1
-rw-r--r--arch/x86/kvm/vmx/vmx.c10
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h1
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/virt/svm/sev.c23
-rw-r--r--drivers/crypto/ccp/sp-dev.c14
-rw-r--r--drivers/iommu/amd/init.c34
-rw-r--r--include/linux/psp-sev.h9
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c47
30 files changed, 421 insertions, 428 deletions
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 47f2cf408eed..78ec1ef2cfe8 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -605,48 +605,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
__cpacr_to_cptr_set(clr, set));\
} while (0)
-static __always_inline void kvm_write_cptr_el2(u64 val)
-{
- if (has_vhe() || has_hvhe())
- write_sysreg(val, cpacr_el1);
- else
- write_sysreg(val, cptr_el2);
-}
-
-/* Resets the value of cptr_el2 when returning to the host. */
-static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm)
-{
- u64 val;
-
- if (has_vhe()) {
- val = (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN);
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN_EL1EN;
- } else if (has_hvhe()) {
- val = CPACR_EL1_FPEN;
-
- if (!kvm_has_sve(kvm) || !guest_owns_fp_regs())
- val |= CPACR_EL1_ZEN;
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN;
- } else {
- val = CPTR_NVHE_EL2_RES1;
-
- if (kvm_has_sve(kvm) && guest_owns_fp_regs())
- val |= CPTR_EL2_TZ;
- if (!cpus_have_final_cap(ARM64_SME))
- val |= CPTR_EL2_TSM;
- }
-
- kvm_write_cptr_el2(val);
-}
-
-#ifdef __KVM_NVHE_HYPERVISOR__
-#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm))
-#else
-#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm)
-#endif
-
/*
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
* format if E2H isn't set.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cfa024de4e3..3a7ec98ef123 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -100,7 +100,7 @@ static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
void *(*to_va)(phys_addr_t phys))
{
- phys_addr_t *p = to_va(mc->head);
+ phys_addr_t *p = to_va(mc->head & PAGE_MASK);
if (!mc->nr_pages)
return NULL;
@@ -615,8 +615,6 @@ struct cpu_sve_state {
struct kvm_host_data {
#define KVM_HOST_DATA_FLAG_HAS_SPE 0
#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
-#define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2
-#define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3
#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
unsigned long flags;
@@ -624,23 +622,13 @@ struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
/*
- * All pointers in this union are hyp VA.
+ * Hyp VA.
* sve_state is only used in pKVM and if system_supports_sve().
*/
- union {
- struct user_fpsimd_state *fpsimd_state;
- struct cpu_sve_state *sve_state;
- };
-
- union {
- /* HYP VA pointer to the host storage for FPMR */
- u64 *fpmr_ptr;
- /*
- * Used by pKVM only, as it needs to provide storage
- * for the host
- */
- u64 fpmr;
- };
+ struct cpu_sve_state *sve_state;
+
+ /* Used by pKVM only. */
+ u64 fpmr;
/* Ownership of the FP regs */
enum {
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2b601d88762d..8370d55f0353 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1695,31 +1695,6 @@ void fpsimd_signal_preserve_current_state(void)
}
/*
- * Called by KVM when entering the guest.
- */
-void fpsimd_kvm_prepare(void)
-{
- if (!system_supports_sve())
- return;
-
- /*
- * KVM does not save host SVE state since we can only enter
- * the guest from a syscall so the ABI means that only the
- * non-saved SVE state needs to be saved. If we have left
- * SVE enabled for performance reasons then update the task
- * state to be FPSIMD only.
- */
- get_cpu_fpsimd_context();
-
- if (test_and_clear_thread_flag(TIF_SVE)) {
- sve_to_fpsimd(current);
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- put_cpu_fpsimd_context();
-}
-
-/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 231c0cd9c7b4..70802e4c91cf 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -447,21 +447,19 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
struct arch_timer_context *timer_ctx)
{
- int ret;
-
kvm_timer_update_status(timer_ctx, new_level);
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
timer_ctx->irq.level);
- if (!userspace_irqchip(vcpu->kvm)) {
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
- timer_irq(timer_ctx),
- timer_ctx->irq.level,
- timer_ctx);
- WARN_ON(ret);
- }
+ if (userspace_irqchip(vcpu->kvm))
+ return;
+
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ timer_irq(timer_ctx),
+ timer_ctx->irq.level,
+ timer_ctx);
}
/* Only called for a fully emulated timer */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 071a7d75be68..b8e55a441282 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2481,14 +2481,6 @@ static void finalize_init_hyp_mode(void)
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
kern_hyp_va(sve_state);
}
- } else {
- for_each_possible_cpu(cpu) {
- struct user_fpsimd_state *fpsimd_state;
-
- fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
- per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
- kern_hyp_va(fpsimd_state);
- }
}
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 4d3d1a2eb157..7f6e43d25691 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -54,50 +54,18 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
if (!system_supports_fpsimd())
return;
- fpsimd_kvm_prepare();
-
/*
- * We will check TIF_FOREIGN_FPSTATE just before entering the
- * guest in kvm_arch_vcpu_ctxflush_fp() and override this to
- * FP_STATE_FREE if the flag set.
+ * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
+ * that the host kernel is responsible for restoring this state upon
+ * return to userspace, and the hyp code doesn't need to save anything.
+ *
+ * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures
+ * that PSTATE.{SM,ZA} == {0,0}.
*/
- *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
- *host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
- *host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr);
-
- host_data_clear_flag(HOST_SVE_ENABLED);
- if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
- host_data_set_flag(HOST_SVE_ENABLED);
-
- if (system_supports_sme()) {
- host_data_clear_flag(HOST_SME_ENABLED);
- if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
- host_data_set_flag(HOST_SME_ENABLED);
-
- /*
- * If PSTATE.SM is enabled then save any pending FP
- * state and disable PSTATE.SM. If we leave PSTATE.SM
- * enabled and the guest does not enable SME via
- * CPACR_EL1.SMEN then operations that should be valid
- * may generate SME traps from EL1 to EL1 which we
- * can't intercept and which would confuse the guest.
- *
- * Do the same for PSTATE.ZA in the case where there
- * is state in the registers which has not already
- * been saved, this is very unlikely to happen.
- */
- if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
- *host_data_ptr(fp_owner) = FP_STATE_FREE;
- fpsimd_save_and_flush_cpu_state();
- }
- }
+ fpsimd_save_and_flush_cpu_state();
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
- /*
- * If normal guests gain SME support, maintain this behavior for pKVM
- * guests, which don't support SME.
- */
- WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
- read_sysreg_s(SYS_SVCR));
+ WARN_ON_ONCE(system_supports_sme() && read_sysreg_s(SYS_SVCR));
}
/*
@@ -162,52 +130,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
local_irq_save(flags);
- /*
- * If we have VHE then the Hyp code will reset CPACR_EL1 to
- * the default value and we need to reenable SME.
- */
- if (has_vhe() && system_supports_sme()) {
- /* Also restore EL0 state seen on entry */
- if (host_data_test_flag(HOST_SME_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN);
- else
- sysreg_clear_set(CPACR_EL1,
- CPACR_EL1_SMEN_EL0EN,
- CPACR_EL1_SMEN_EL1EN);
- isb();
- }
-
if (guest_owns_fp_regs()) {
- if (vcpu_has_sve(vcpu)) {
- u64 zcr = read_sysreg_el1(SYS_ZCR);
-
- /*
- * If the vCPU is in the hyp context then ZCR_EL1 is
- * loaded with its vEL2 counterpart.
- */
- __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr;
-
- /*
- * Restore the VL that was saved when bound to the CPU,
- * which is the maximum VL for the guest. Because the
- * layout of the data when saving the sve state depends
- * on the VL, we need to use a consistent (i.e., the
- * maximum) VL.
- * Note that this means that at guest exit ZCR_EL1 is
- * not necessarily the same as on guest entry.
- *
- * ZCR_EL2 holds the guest hypervisor's VL when running
- * a nested guest, which could be smaller than the
- * max for the vCPU. Similar to above, we first need to
- * switch to a VL consistent with the layout of the
- * vCPU's SVE state. KVM support for NV implies VHE, so
- * using the ZCR_EL1 alias is safe.
- */
- if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)))
- sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
- SYS_ZCR_EL1);
- }
-
/*
* Flush (save and invalidate) the fpsimd/sve state so that if
* the host tries to use fpsimd/sve, it's not using stale data
@@ -219,18 +142,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
* when needed.
*/
fpsimd_save_and_flush_cpu_state();
- } else if (has_vhe() && system_supports_sve()) {
- /*
- * The FPSIMD/SVE state in the CPU has not been touched, and we
- * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
- * reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE
- * for EL0. To avoid spurious traps, restore the trap state
- * seen by kvm_arch_vcpu_load_fp():
- */
- if (host_data_test_flag(HOST_SVE_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
- else
- sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
}
local_irq_restore(flags);
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 4433a234aa9b..9f4e8d68ab50 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
alternative_else_nop_endif
mrs x1, isr_el1
cbz x1, 1f
+
+ // Ensure that __guest_enter() always provides a context
+ // synchronization event so that callers don't need ISBs for anything
+ // that would usually be synchonized by the ERET.
+ isb
mov x0, #ARM_EXCEPTION_IRQ
ret
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index f838a45665f2..23bbe28eaaf9 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -326,7 +326,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
-static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
{
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
@@ -375,7 +375,87 @@ static inline void __hyp_sve_save_host(void)
true);
}
-static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ u64 zcr_el1, zcr_el2;
+
+ if (!guest_owns_fp_regs())
+ return;
+
+ if (vcpu_has_sve(vcpu)) {
+ /* A guest hypervisor may restrict the effective max VL. */
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
+ else
+ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+ zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu));
+ write_sysreg_el1(zcr_el1, SYS_ZCR);
+ }
+}
+
+static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ u64 zcr_el1, zcr_el2;
+
+ if (!guest_owns_fp_regs())
+ return;
+
+ /*
+ * When the guest owns the FP regs, we know that guest+hyp traps for
+ * any FPSIMD/SVE/SME features exposed to the guest have been disabled
+ * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
+ * prior to __guest_entry(). As __guest_entry() guarantees a context
+ * synchronization event, we don't need an ISB here to avoid taking
+ * traps for anything that was exposed to the guest.
+ */
+ if (vcpu_has_sve(vcpu)) {
+ zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
+
+ /*
+ * The guest's state is always saved using the guest's max VL.
+ * Ensure that the host has the guest's max VL active such that
+ * the host can save the guest's state lazily, but don't
+ * artificially restrict the host to the guest's max VL.
+ */
+ if (has_vhe()) {
+ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+ } else {
+ zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+ zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
+ write_sysreg_el1(zcr_el1, SYS_ZCR);
+ }
+ }
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (system_supports_sve()) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_EL1_ZEN, 0);
+
+ } else {
+ __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
+ }
+
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
+ *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
+}
+
/*
* We trap the first access to the FP/SIMD to save the host context and
@@ -383,7 +463,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
* If FP/SIMD is not implemented, handle the trap and inject an undefined
* instruction exception to the guest. Similarly for trapped SVE accesses.
*/
-static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
@@ -425,7 +505,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
/* Write out the host state if it's in the registers */
- if (host_owns_fp_regs())
+ if (is_protected_kvm_enabled() && host_owns_fp_regs())
kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
@@ -501,9 +581,22 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
+/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */
+static inline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt)
+{
+ u64 offset = 0;
+
+ if (ctxt->offset.vm_offset)
+ offset += *kern_hyp_va(ctxt->offset.vm_offset);
+ if (ctxt->offset.vcpu_offset)
+ offset += *kern_hyp_va(ctxt->offset.vcpu_offset);
+
+ return offset;
+}
+
static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
{
- return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt);
+ return arch_timer_read_cntpct_el0() - hyp_timer_get_offset(ctxt);
}
static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu)
@@ -587,7 +680,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
return true;
}
-static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
handle_tx2_tvm(vcpu))
@@ -607,7 +700,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
-static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
__vgic_v3_perform_cpuif_access(vcpu) == 1)
@@ -616,19 +709,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
-static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
+ u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
return false;
}
-static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
- __alias(kvm_hyp_handle_memory_fault);
-static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
- __alias(kvm_hyp_handle_memory_fault);
+#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault
+#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault
-static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
return true;
@@ -658,23 +750,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
-
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
-
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
* Returns true if the hypervisor handled the exit, and control should go back
* to the guest, or false if it hasn't.
*/
-static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+ const exit_handler_fn *handlers)
{
- const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
- exit_handler_fn fn;
-
- fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
-
+ exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
if (fn)
return fn(vcpu, exit_code);
@@ -704,20 +789,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code
* the guest, false when we should restore the host state and return to the
* main run loop.
*/
-static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+ const exit_handler_fn *handlers)
{
- /*
- * Save PSTATE early so that we can evaluate the vcpu mode
- * early on.
- */
- synchronize_vcpu_pstate(vcpu, exit_code);
-
- /*
- * Check whether we want to repaint the state one way or
- * another.
- */
- early_exit_filter(vcpu, exit_code);
-
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
@@ -747,7 +821,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
goto exit;
/* Check if there's an exit handler and allow it to handle the exit. */
- if (kvm_hyp_handle_exit(vcpu, exit_code))
+ if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
goto guest;
exit:
/* Return to the host kernel and handle the exit */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 6e12c070832f..2c37680d954c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -5,6 +5,7 @@
*/
#include <hyp/adjust_pc.h>
+#include <hyp/switch.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
@@ -83,7 +84,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
if (system_supports_sve())
__hyp_sve_restore_host();
else
- __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+ __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
if (has_fpmr)
write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
@@ -224,8 +225,12 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
sync_hyp_vcpu(hyp_vcpu);
} else {
+ struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu);
+
/* The host is fully trusted, run its vCPU directly. */
- ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu));
+ fpsimd_lazy_switch_to_guest(vcpu);
+ ret = __kvm_vcpu_run(vcpu);
+ fpsimd_lazy_switch_to_host(vcpu);
}
out:
cpu_reg(host_ctxt, 1) = ret;
@@ -675,12 +680,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
case ESR_ELx_EC_SMC64:
handle_host_smc(host_ctxt);
break;
- case ESR_ELx_EC_SVE:
- cpacr_clear_set(0, CPACR_EL1_ZEN);
- isb();
- sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
- SYS_ZCR_EL2);
- break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:
handle_host_mem_abort(host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 7ad7b133b81a..19c3c631708c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -943,10 +943,10 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip
ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
if (ret)
return ret;
- if (level != KVM_PGTABLE_LAST_LEVEL)
- return -E2BIG;
if (!kvm_pte_valid(pte))
return -ENOENT;
+ if (level != KVM_PGTABLE_LAST_LEVEL)
+ return -E2BIG;
state = guest_get_page_state(pte, ipa);
if (state != PKVM_PAGE_SHARED_BORROWED)
@@ -998,63 +998,73 @@ unlock:
return ret;
}
-int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
+static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
{
- struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
- u64 ipa = hyp_pfn_to_phys(gfn);
u64 phys;
int ret;
- if (prot & ~KVM_PGTABLE_PROT_RWX)
- return -EINVAL;
+ if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
+ return;
host_lock_component();
guest_lock_component(vm);
ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
guest_unlock_component(vm);
host_unlock_component();
- return ret;
+ WARN_ON(ret && ret != -ENOENT);
}
-int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
u64 ipa = hyp_pfn_to_phys(gfn);
- u64 phys;
int ret;
- host_lock_component();
- guest_lock_component(vm);
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
- ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
+ if (prot & ~KVM_PGTABLE_PROT_RWX)
+ return -EINVAL;
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
guest_unlock_component(vm);
- host_unlock_component();
return ret;
}
-int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
+int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
{
u64 ipa = hyp_pfn_to_phys(gfn);
- u64 phys;
int ret;
- host_lock_component();
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
+
+ assert_host_shared_guest(vm, ipa);
guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
+ guest_unlock_component(vm);
- ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
+ return ret;
+}
+
+int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
+{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ int ret;
+
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
guest_unlock_component(vm);
- host_unlock_component();
return ret;
}
@@ -1063,18 +1073,14 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
u64 ipa = hyp_pfn_to_phys(gfn);
- u64 phys;
- int ret;
-
- host_lock_component();
- guest_lock_component(vm);