summaryrefslogtreecommitdiff
path: root/arch/riscv/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 15:32:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 15:32:22 -0700
commite8069f5a8e3bdb5fdeeff895780529388592ee7a (patch)
treece35ab85db9b66a7e488707fccdb33ce54f696dd /arch/riscv/kvm
parenteded37770c9f80ecd5ba842359c4f1058d9812c3 (diff)
parent255006adb3da71bb75c334453786df781b415f54 (diff)
downloadlinux-e8069f5a8e3bdb5fdeeff895780529388592ee7a.tar.gz
linux-e8069f5a8e3bdb5fdeeff895780529388592ee7a.tar.bz2
linux-e8069f5a8e3bdb5fdeeff895780529388592ee7a.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM64: - Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of hugepage splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. RISC-V: - Redirect AMO load/store misaligned traps to KVM guest - Trap-n-emulate AIA in-kernel irqchip for KVM guest - Svnapot support for KVM Guest s390: - New uvdevice secret API - CMM selftest and fixes - fix racy access to target CPU for diag 9c x86: - Fix missing/incorrect #GP checks on ENCLS - Use standard mmu_notifier hooks for handling APIC access page - Drop now unnecessary TR/TSS load after VM-Exit on AMD - Print more descriptive information about the status of SEV and SEV-ES during module load - Add a test for splitting and reconstituting hugepages during and after dirty logging - Add support for CPU pinning in demand paging test - Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes included along the way - Add a "nx_huge_pages=never" option to effectively avoid creating NX hugepage recovery threads (because nx_huge_pages=off can be toggled at runtime) - Move handling of PAT out of MTRR code and dedup SVM+VMX code - Fix output of PIC poll command emulation when there's an interrupt - Add a maintainer's handbook to document KVM x86 processes, preferred coding style, testing expectations, etc. - Misc cleanups, fixes and comments Generic: - Miscellaneous bugfixes and cleanups Selftests: - Generate dependency files so that partial rebuilds work as expected" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits) Documentation/process: Add a maintainer handbook for KVM x86 Documentation/process: Add a label for the tip tree handbook's coding style KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index RISC-V: KVM: Remove unneeded semicolon RISC-V: KVM: Allow Svnapot extension for Guest/VM riscv: kvm: define vcpu_sbi_ext_pmu in header RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel emulation of AIA APLIC RISC-V: KVM: Implement device interface for AIA irqchip RISC-V: KVM: Skeletal in-kernel AIA irqchip support RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero RISC-V: KVM: Add APLIC related defines RISC-V: KVM: Add IMSIC related defines RISC-V: KVM: Implement guest external interrupt line management KVM: x86: Remove PRIx* definitions as they are solely for user space s390/uv: Update query for secret-UVCs s390/uv: replace scnprintf with sysfs_emit s390/uvdevice: Add 'Lock Secret Store' UVC ...
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r--arch/riscv/kvm/Kconfig4
-rw-r--r--arch/riscv/kvm/Makefile3
-rw-r--r--arch/riscv/kvm/aia.c274
-rw-r--r--arch/riscv/kvm/aia_aplic.c619
-rw-r--r--arch/riscv/kvm/aia_device.c673
-rw-r--r--arch/riscv/kvm/aia_imsic.c1084
-rw-r--r--arch/riscv/kvm/main.c3
-rw-r--r--arch/riscv/kvm/tlb.c2
-rw-r--r--arch/riscv/kvm/vcpu.c4
-rw-r--r--arch/riscv/kvm/vcpu_exit.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c80
-rw-r--r--arch/riscv/kvm/vm.c118
12 files changed, 2836 insertions, 30 deletions
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 28891e583259..dfc237d7875b 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -21,6 +21,10 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU
select HAVE_KVM_EVENTFD
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
+ select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_MSI
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 7b4c21f9aa6a..fee0671e2dc1 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -28,3 +28,6 @@ kvm-y += vcpu_sbi_hsm.o
kvm-y += vcpu_timer.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
+kvm-y += aia_device.o
+kvm-y += aia_aplic.o
+kvm-y += aia_imsic.o
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 4f1286fc7f17..585a3b42c52c 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -8,11 +8,49 @@
*/
#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
#include <asm/hwcap.h>
+#include <asm/kvm_aia_imsic.h>
+struct aia_hgei_control {
+ raw_spinlock_t lock;
+ unsigned long free_bitmap;
+ struct kvm_vcpu *owners[BITS_PER_LONG];
+};
+static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
+static int hgei_parent_irq;
+
+unsigned int kvm_riscv_aia_nr_hgei;
+unsigned int kvm_riscv_aia_max_ids;
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
+static int aia_find_hgei(struct kvm_vcpu *owner)
+{
+ int i, hgei;
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ hgei = -1;
+ for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
+ if (hgctrl->owners[i] == owner) {
+ hgei = i;
+ break;
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
+ return hgei;
+}
+
static void aia_set_hvictl(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -56,6 +94,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
+ int hgei;
unsigned long seip;
if (!kvm_riscv_aia_available())
@@ -74,6 +113,10 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;
+ hgei = aia_find_hgei(vcpu);
+ if (hgei > 0)
+ return !!(csr_read(CSR_HGEIP) & BIT(hgei));
+
return false;
}
@@ -323,8 +366,6 @@ static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel,
return KVM_INSN_CONTINUE_NEXT_SEPC;
}
-#define IMSIC_FIRST 0x70
-#define IMSIC_LAST 0xff
int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask)
@@ -348,6 +389,143 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
return KVM_INSN_EXIT_TO_USER_SPACE;
}
+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+ void __iomem **hgei_va, phys_addr_t *hgei_pa)
+{
+ int ret = -ENOENT;
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+ if (!kvm_riscv_aia_available() || !hgctrl)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ if (hgctrl->free_bitmap) {
+ ret = __ffs(hgctrl->free_bitmap);
+ hgctrl->free_bitmap &= ~BIT(ret);
+ hgctrl->owners[ret] = owner;
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ /* TODO: To be updated later by AIA IMSIC HW guest file support */
+ if (hgei_va)
+ *hgei_va = NULL;
+ if (hgei_pa)
+ *hgei_pa = 0;
+
+ return ret;
+}
+
+void kvm_riscv_aia_free_hgei(int cpu, int hgei)
+{
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+ if (!kvm_riscv_aia_available() || !hgctrl)
+ return;
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ if (hgei > 0 && hgei <= kvm_riscv_aia_nr_hgei) {
+ if (!(hgctrl->free_bitmap & BIT(hgei))) {
+ hgctrl->free_bitmap |= BIT(hgei);
+ hgctrl->owners[hgei] = NULL;
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+}
+
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
+{
+ int hgei;
+
+ if (!kvm_riscv_aia_available())
+ return;
+
+ hgei = aia_find_hgei(owner);
+ if (hgei > 0) {
+ if (enable)
+ csr_set(CSR_HGEIE, BIT(hgei));
+ else
+ csr_clear(CSR_HGEIE, BIT(hgei));
+ }
+}
+
+static irqreturn_t hgei_interrupt(int irq, void *dev_id)
+{
+ int i;
+ unsigned long hgei_mask, flags;
+ struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+ hgei_mask = csr_read(CSR_HGEIP) & csr_read(CSR_HGEIE);
+ csr_clear(CSR_HGEIE, hgei_mask);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ for_each_set_bit(i, &hgei_mask, BITS_PER_LONG) {
+ if (hgctrl->owners[i])
+ kvm_vcpu_kick(hgctrl->owners[i]);
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
+ return IRQ_HANDLED;
+}
+
+static int aia_hgei_init(void)
+{
+ int cpu, rc;
+ struct irq_domain *domain;
+ struct aia_hgei_control *hgctrl;
+
+ /* Initialize per-CPU guest external interrupt line management */
+ for_each_possible_cpu(cpu) {
+ hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+ raw_spin_lock_init(&hgctrl->lock);
+ if (kvm_riscv_aia_nr_hgei) {
+ hgctrl->free_bitmap =
+ BIT(kvm_riscv_aia_nr_hgei + 1) - 1;
+ hgctrl->free_bitmap &= ~BIT(0);
+ } else
+ hgctrl->free_bitmap = 0;
+ }
+
+ /* Find INTC irq domain */
+ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
+ DOMAIN_BUS_ANY);
+ if (!domain) {
+ kvm_err("unable to find INTC domain\n");
+ return -ENOENT;
+ }
+
+ /* Map per-CPU SGEI interrupt from INTC domain */
+ hgei_parent_irq = irq_create_mapping(domain, IRQ_S_GEXT);
+ if (!hgei_parent_irq) {
+ kvm_err("unable to map SGEI IRQ\n");
+ return -ENOMEM;
+ }
+
+ /* Request per-CPU SGEI interrupt */
+ rc = request_percpu_irq(hgei_parent_irq, hgei_interrupt,
+ "riscv-kvm", &aia_hgei);
+ if (rc) {
+ kvm_err("failed to request SGEI IRQ\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void aia_hgei_exit(void)
+{
+ /* Free per-CPU SGEI interrupt */
+ free_percpu_irq(hgei_parent_irq, &aia_hgei);
+}
+
void kvm_riscv_aia_enable(void)
{
if (!kvm_riscv_aia_available())
@@ -362,21 +540,105 @@ void kvm_riscv_aia_enable(void)
csr_write(CSR_HVIPRIO1H, 0x0);
csr_write(CSR_HVIPRIO2H, 0x0);
#endif
+
+ /* Enable per-CPU SGEI interrupt */
+ enable_percpu_irq(hgei_parent_irq,
+ irq_get_trigger_type(hgei_parent_irq));
+ csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
}
void kvm_riscv_aia_disable(void)
{
+ int i;
+ unsigned long flags;
+ struct kvm_vcpu *vcpu;
+ struct aia_hgei_control *hgctrl;
+
if (!kvm_riscv_aia_available())
return;
+ hgctrl = get_cpu_ptr(&aia_hgei);
+
+ /* Disable per-CPU SGEI interrupt */
+ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
+ disable_percpu_irq(hgei_parent_irq);
aia_set_hvictl(false);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ for (i = 0; i <= kvm_riscv_aia_nr_hgei; i++) {
+ vcpu = hgctrl->owners[i];
+ if (!vcpu)
+ continue;
+
+ /*
+ * We release hgctrl->lock before notifying IMSIC
+ * so that we don't have lock ordering issues.
+ */
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ /* Notify IMSIC */
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ /*
+ * Wakeup VCPU if it was blocked so that it can
+ * run on other HARTs
+ */
+ if (csr_read(CSR_HGEIE) & BIT(i)) {
+ csr_clear(CSR_HGEIE, BIT(i));
+ kvm_vcpu_kick(vcpu);
+ }
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
}
int kvm_riscv_aia_init(void)
{
+ int rc;
+
if (!riscv_isa_extension_available(NULL, SxAIA))
return -ENODEV;
+ /* Figure-out number of bits in HGEIE */
+ csr_write(CSR_HGEIE, -1UL);
+ kvm_riscv_aia_nr_hgei = fls_long(csr_read(CSR_HGEIE));
+ csr_write(CSR_HGEIE, 0);
+ if (kvm_riscv_aia_nr_hgei)
+ kvm_riscv_aia_nr_hgei--;
+
+ /*
+ * Number of usable HGEI lines should be minimum of per-HART
+ * IMSIC guest files and number of bits in HGEIE
+ *
+ * TODO: To be updated later by AIA IMSIC HW guest file support
+ */
+ kvm_riscv_aia_nr_hgei = 0;
+
+ /*
+ * Find number of guest MSI IDs
+ *
+ * TODO: To be updated later by AIA IMSIC HW guest file support
+ */
+ kvm_riscv_aia_max_ids = IMSIC_MAX_ID;
+
+ /* Initialize guest external interrupt line management */
+ rc = aia_hgei_init();
+ if (rc)
+ return rc;
+
+ /* Register device operations */
+ rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops,
+ KVM_DEV_TYPE_RISCV_AIA);
+ if (rc) {
+ aia_hgei_exit();
+ return rc;
+ }
+
/* Enable KVM AIA support */
static_branch_enable(&kvm_riscv_aia_available);
@@ -385,4 +647,12 @@ int kvm_riscv_aia_init(void)
void kvm_riscv_aia_exit(void)
{
+ if (!kvm_riscv_aia_available())
+ return;
+
+ /* Unregister device operations */
+ kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA);
+
+ /* Cleanup the HGEI state */
+ aia_hgei_exit();
}
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
new file mode 100644
index 000000000000..39e72aa016a4
--- /dev/null
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/kvm_aia_aplic.h>
+
+struct aplic_irq {
+ raw_spinlock_t lock;
+ u32 sourcecfg;
+ u32 state;
+#define APLIC_IRQ_STATE_PENDING BIT(0)
+#define APLIC_IRQ_STATE_ENABLED BIT(1)
+#define APLIC_IRQ_STATE_ENPEND (APLIC_IRQ_STATE_PENDING | \
+ APLIC_IRQ_STATE_ENABLED)
+#define APLIC_IRQ_STATE_INPUT BIT(8)
+ u32 target;
+};
+
+struct aplic {
+ struct kvm_io_device iodev;
+
+ u32 domaincfg;
+ u32 genmsi;
+
+ u32 nr_irqs;
+ u32 nr_words;
+ struct aplic_irq *irqs;
+};
+
+static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq)
+{
+ u32 ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return 0;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = irqd->sourcecfg;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ if (val & APLIC_SOURCECFG_D)
+ val = 0;
+ else
+ val &= APLIC_SOURCECFG_SM_MASK;
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ irqd->sourcecfg = val;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static u32 aplic_read_target(struct aplic *aplic, u32 irq)
+{
+ u32 ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return 0;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = irqd->target;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ val &= APLIC_TARGET_EIID_MASK |
+ (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) |
+ (APLIC_TARGET_GUEST_IDX_MASK << APLIC_TARGET_GUEST_IDX_SHIFT);
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ irqd->target = val;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_pending(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
+{
+ unsigned long flags, sm;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (!pending &&
+ ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
+ (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ goto skip_write_pending;
+
+ if (pending)
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+
+skip_write_pending:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_enabled(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ if (enabled)
+ irqd->state |= APLIC_IRQ_STATE_ENABLED;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_ENABLED;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_input(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_inject_msi(struct kvm *kvm, u32 irq, u32 target)
+{
+ u32 hart_idx, guest_idx, eiid;
+
+ hart_idx = target >> APLIC_TARGET_HART_IDX_SHIFT;
+ hart_idx &= APLIC_TARGET_HART_IDX_MASK;
+ guest_idx = target >> APLIC_TARGET_GUEST_IDX_SHIFT;
+ guest_idx &= APLIC_TARGET_GUEST_IDX_MASK;
+ eiid = target & APLIC_TARGET_EIID_MASK;
+ kvm_riscv_aia_inject_msi_by_id(kvm, hart_idx, guest_idx, eiid);
+}
+
+static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last)
+{
+ bool inject;
+ u32 irq, target;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!(aplic->domaincfg & APLIC_DOMAINCFG_IE))
+ return;
+
+ for (irq = first; irq <= last; irq++) {
+ if (!irq || aplic->nr_irqs <= irq)
+ continue;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ inject = false;
+ target = irqd->target;
+ if ((irqd->state & APLIC_IRQ_STATE_ENPEND) ==
+ APLIC_IRQ_STATE_ENPEND) {
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+ inject = true;
+ }
+
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ if (inject)
+ aplic_inject_msi(kvm, irq, target);
+ }
+}
+
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level)
+{
+ u32 target;
+ bool inject = false, ie;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!aplic || !source || (aplic->nr_irqs <= source))
+ return -ENODEV;
+ irqd = &aplic->irqs[source];
+ ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false;
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ if (irqd->sourcecfg & APLIC_SOURCECFG_D)
+ goto skip_unlock;
+
+ switch (irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK) {
+ case APLIC_SOURCECFG_SM_EDGE_RISE:
+ if (level && !(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_EDGE_FALL:
+ if (!level && (irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_LEVEL_HIGH:
+ if (level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_LEVEL_LOW:
+ if (!level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ }
+
+ if (level)
+ irqd->state |= APLIC_IRQ_STATE_INPUT;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_INPUT;
+
+ target = irqd->target;
+ if (ie && ((irqd->state & APLIC_IRQ_STATE_ENPEND) ==
+ APLIC_IRQ_STATE_ENPEND)) {
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+ inject = true;
+ }
+
+skip_unlock:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ if (inject)
+ aplic_inject_msi(kvm, source, target);
+
+ return 0;
+}
+
+static u32 aplic_read_input_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_input(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static u32 aplic_read_pending_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_pending(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static void aplic_write_pending_word(struct aplic *aplic, u32 word,
+ u32 val, bool pending)
+{
+ u32 i;
+
+ for (i = 0; i < 32; i++) {
+ if (val & BIT(i))
+ aplic_write_pending(aplic, word * 32 + i, pending);
+ }
+}
+
+static u32 aplic_read_enabled_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_enabled(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static void aplic_write_enabled_word(struct aplic *aplic, u32 word,
+ u32 val, bool enabled)
+{
+ u32 i;
+
+ for (i = 0; i < 32; i++) {
+ if (val & BIT(i))
+ aplic_write_enabled(aplic, word * 32 + i, enabled);
+ }
+}
+
+static int aplic_mmio_read_offset(struct kvm *kvm, gpa_t off, u32 *val32)
+{
+ u32 i;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if ((off & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ if (off == APLIC_DOMAINCFG) {
+ *val32 = APLIC_DOMAINCFG_RDONLY |
+ aplic->domaincfg | APLIC_DOMAINCFG_DM;
+ } else if ((off >= APLIC_SOURCECFG_BASE) &&
+ (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+ *val32 = aplic_read_sourcecfg(aplic, i);
+ } else if ((off >= APLIC_SETIP_BASE) &&
+ (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIP_BASE) >> 2;
+ *val32 = aplic_read_pending_word(aplic, i);
+ } else if (off == APLIC_SETIPNUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_CLRIP_BASE) &&
+ (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIP_BASE) >> 2;
+ *val32 = aplic_read_input_word(aplic, i);
+ } else if (off == APLIC_CLRIPNUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_SETIE_BASE) &&
+ (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIE_BASE) >> 2;
+ *val32 = aplic_read_enabled_word(aplic, i);
+ } else if (off == APLIC_SETIENUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_CLRIE_BASE) &&
+ (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+ *val32 = 0;
+ } else if (off == APLIC_CLRIENUM) {
+ *val32 = 0;
+ } else if (off == APLIC_SETIPNUM_LE) {
+ *val32 = 0;
+ } else if (off == APLIC_SETIPNUM_BE) {
+ *val32 = 0;
+ } else if (off == APLIC_GENMSI) {
+ *val32 = aplic->genmsi;
+ } else if ((off >= APLIC_TARGET_BASE) &&
+ (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+ *val32 = aplic_read_target(aplic, i);
+ } else
+ return -ENODEV;
+
+ return 0;
+}
+
+static int aplic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ if (len != 4)
+ return -EOPNOTSUPP;
+
+ return aplic_mmio_read_offset(vcpu->kvm,
+ addr - vcpu->kvm->arch.aia.aplic_addr,
+ val);
+}
+
+static int aplic_mmio_write_offset(struct kvm *kvm, gpa_t off, u32 val32)
+{
+ u32 i;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if ((off & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ if (off == APLIC_DOMAINCFG) {
+ /* Only IE bit writeable */
+ aplic->domaincfg = val32 & APLIC_DOMAINCFG_IE;
+ } else if ((off >= APLIC_SOURCECFG_BASE) &&
+ (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+ aplic_write_sourcecfg(aplic, i, val32);
+ } else if ((off >= APLIC_SETIP_BASE) &&
+ (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIP_BASE) >> 2;
+ aplic_write_pending_word(aplic, i, val32, true);
+ } else if (off == APLIC_SETIPNUM) {
+ aplic_write_pending(aplic, val32, true);
+ } else if ((off >= APLIC_CLRIP_BASE) &&
+ (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIP_BASE) >> 2;
+ aplic_write_pending_word(aplic, i, val32, false);
+ } else if (off == APLIC_CLRIPNUM) {
+ aplic_write_pending(aplic, val32, false);
+ } else if ((off >= APLIC_SETIE_BASE) &&
+ (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIE_BASE) >> 2;
+ aplic_write_enabled_word(aplic, i, val32, true);
+ } else if (off == APLIC_SETIENUM) {
+ aplic_write_enabled(aplic, val32, true);
+ } else if ((off >= APLIC_CLRIE_BASE) &&
+ (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIE_BASE) >> 2;
+ aplic_write_enabled_word(aplic, i, val32, false);
+ } else if (off == APLIC_CLRIENUM) {
+ aplic_write_enabled(aplic, val32, false);
+ } else if (off == APLIC_SETIPNUM_LE) {
+ aplic_write_pending(aplic, val32, true);
+ } else if (off == APLIC_SETIPNUM_BE) {
+ aplic_write_pending(aplic, __swab32(val32), true);
+ } else if (off == APLIC_GENMSI) {
+ aplic->genmsi = val32 & ~(APLIC_TARGET_GUEST_IDX_MASK <<
+ APLIC_TARGET_GUEST_IDX_SHIFT);
+ kvm_riscv_aia_inject_msi_by_id(kvm,
+ val32 >> APLIC_TARGET_HART_IDX_SHIFT, 0,
+ val32 & APLIC_TARGET_EIID_MASK);
+ } else if ((off >= APLIC_TARGET_BASE) &&
+ (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+ aplic_write_target(aplic, i, val32);
+ } else
+ return -ENODEV;
+
+ aplic_update_irq_range(kvm, 1, aplic->nr_irqs - 1);
+
+ return 0;
+}
+
+static int aplic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ if (len != 4)
+ return -EOPNOTSUPP;
+
+ return aplic_mmio_write_offset(vcpu->kvm,
+ addr - vcpu->kvm->arch.aia.aplic_addr,
+ *((const u32 *)val));
+}
+
+static struct kvm_io_device_ops aplic_iodoev_ops = {
+ .read = aplic_mmio_read,
+ .write = aplic_mmio_write,
+};
+
+int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v)
+{
+ int rc;
+
+ if (!kvm->arch.aia.aplic_state)
+ return -ENODEV;
+
+ rc = aplic_mmio_write_offset(kvm, type, v);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v)
+{
+ int rc;
+
+ if (!kvm->arch.aia.aplic_state)
+ return -ENODEV;
+
+ rc = aplic_mmio_read_offset(kvm, type, v);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvm_riscv_aia_aplic_has_attr(struct kvm *kvm, unsigned long type)
+{
+ int rc;
+ u32 val;
+
+ if (!kvm->arch.aia.aplic_state)
+ return -ENODEV;
+
+ rc = aplic_mmio_read_offset(kvm, type, &val);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvm_riscv_aia_aplic_init(struct kvm *kvm)
+{
+ int i, ret = 0;
+ struct aplic *aplic;
+
+ /* Do nothing if we have zero sources */
+ if (!kvm->arch.aia.nr_sources)
+ return 0;
+
+ /* Allocate APLIC global state */
+ aplic = kzalloc(sizeof(*aplic), GFP_KERNEL);
+ if (!aplic)
+ return -ENOMEM;
+ kvm->arch.aia.aplic_state = aplic;
+
+ /* Setup APLIC IRQs */
+ aplic->nr_irqs = kvm->arch.aia.nr_sources + 1;
+ aplic->nr_words = DIV_ROUND_UP(aplic->nr_irqs, 32);
+ aplic->irqs = kcalloc(aplic->nr_irqs,
+ sizeof(*aplic->irqs), GFP_KERNEL);
+ if (!aplic->irqs) {
+ ret = -ENOMEM;
+ goto fail_free_aplic;
+ }
+ for (i = 0; i < aplic->nr_irqs; i++)
+ raw_spin_lock_init(&aplic->irqs[i].lock);
+
+ /* Setup IO device */
+ kvm_iodevice_init(&aplic->iodev, &aplic_iodoev_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ kvm->arch.aia.aplic_addr,
+ KVM_DEV_RISCV_APLIC_SIZE,
+ &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret)
+ goto fail_free_aplic_irqs;
+
+ /* Setup default IRQ routing */
+ ret = kvm_riscv_setup_default_irq_routing(kvm, aplic->nr_irqs);
+ if (ret)
+ goto fail_unreg_iodev;
+
+ return 0;
+
+fail_unreg_iodev:
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+fail_free_aplic_irqs:
+ kfree(aplic->irqs);
+fail_free_aplic:<