diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-30 15:22:09 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-30 15:22:09 -0700 |
| commit | b25f62ccb490680a8cee755ac4528909395e0711 (patch) | |
| tree | 149d2d35f6efbca401e7a7b5a3b161a2c00bd0ec | |
| parent | 9070577ae9d6065e447d422bdf85a09f89eaa9e8 (diff) | |
| parent | ff598081e5b9d0bdd6874bfe340811bbb75b35e4 (diff) | |
| download | linux-b25f62ccb490680a8cee755ac4528909395e0711.tar.gz linux-b25f62ccb490680a8cee755ac4528909395e0711.tar.bz2 linux-b25f62ccb490680a8cee755ac4528909395e0711.zip | |
Merge tag 'vfio-v6.5-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Adjust log levels for common messages (Oleksandr Natalenko, Alex
Williamson)
- Support for dynamic MSI-X allocation (Reinette Chatre)
- Enable and report PCIe AtomicOp Completer capabilities (Alex
Williamson)
- Cleanup Kconfigs for vfio bus drivers (Alex Williamson)
- Add support for CDX bus based devices (Nipun Gupta)
- Fix race with concurrent mdev initialization (Eric Farman)
* tag 'vfio-v6.5-rc1' of https://github.com/awilliam/linux-vfio:
vfio/mdev: Move the compat_class initialization to module init
vfio/cdx: add support for CDX bus
vfio/fsl: Create Kconfig sub-menu
vfio/platform: Cleanup Kconfig
vfio/pci: Cleanup Kconfig
vfio/pci-core: Add capability for AtomicOp completer support
vfio/pci: Also demote hiding standard cap messages
vfio/pci: Clear VFIO_IRQ_INFO_NORESIZE for MSI-X
vfio/pci: Support dynamic MSI-X
vfio/pci: Probe and store ability to support dynamic MSI-X
vfio/pci: Use bitfield for struct vfio_pci_core_device flags
vfio/pci: Update stale comment
vfio/pci: Remove interrupt context counter
vfio/pci: Use xarray for interrupt context storage
vfio/pci: Move to single error path
vfio/pci: Prepare for dynamic interrupt context storage
vfio/pci: Remove negative check on unsigned vector
vfio/pci: Consolidate irq cleanup on MSI/MSI-X disable
vfio/pci: demote hiding ecap messages to debug level
| -rw-r--r-- | MAINTAINERS | 7 | ||||
| -rw-r--r-- | drivers/vfio/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/vfio/Makefile | 5 | ||||
| -rw-r--r-- | drivers/vfio/cdx/Kconfig | 17 | ||||
| -rw-r--r-- | drivers/vfio/cdx/Makefile | 8 | ||||
| -rw-r--r-- | drivers/vfio/cdx/main.c | 234 | ||||
| -rw-r--r-- | drivers/vfio/cdx/private.h | 28 | ||||
| -rw-r--r-- | drivers/vfio/fsl-mc/Kconfig | 6 | ||||
| -rw-r--r-- | drivers/vfio/mdev/mdev_core.c | 23 | ||||
| -rw-r--r-- | drivers/vfio/pci/Kconfig | 8 | ||||
| -rw-r--r-- | drivers/vfio/pci/hisilicon/Kconfig | 4 | ||||
| -rw-r--r-- | drivers/vfio/pci/mlx5/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 8 | ||||
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 46 | ||||
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 305 | ||||
| -rw-r--r-- | drivers/vfio/platform/Kconfig | 18 | ||||
| -rw-r--r-- | drivers/vfio/platform/Makefile | 9 | ||||
| -rw-r--r-- | drivers/vfio/platform/reset/Kconfig | 2 | ||||
| -rw-r--r-- | include/linux/cdx/cdx_bus.h | 1 | ||||
| -rw-r--r-- | include/linux/mod_devicetable.h | 6 | ||||
| -rw-r--r-- | include/linux/vfio_pci_core.h | 26 | ||||
| -rw-r--r-- | include/uapi/linux/vfio.h | 18 | ||||
| -rw-r--r-- | scripts/mod/devicetable-offsets.c | 1 | ||||
| -rw-r--r-- | scripts/mod/file2alias.c | 17 |
24 files changed, 654 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index d2326e843748..910a3bd814b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22254,6 +22254,13 @@ F: Documentation/filesystems/vfat.rst F: fs/fat/ F: tools/testing/selftests/filesystems/fat/ +VFIO CDX DRIVER +M: Nipun Gupta <nipun.gupta@amd.com> +M: Nikhil Agarwal <nikhil.agarwal@amd.com> +L: kvm@vger.kernel.org +S: Maintained +F: drivers/vfio/cdx/* + VFIO DRIVER M: Alex Williamson <alex.williamson@redhat.com> L: kvm@vger.kernel.org diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 89e06c981e43..aba36f5be4ec 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -57,6 +57,7 @@ source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" source "drivers/vfio/fsl-mc/Kconfig" +source "drivers/vfio/cdx/Kconfig" endif source "virt/lib/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 70e7dcb302ef..66f418aef5a9 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -10,7 +10,8 @@ vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_VFIO_PCI) += pci/ -obj-$(CONFIG_VFIO_PLATFORM) += platform/ +obj-$(CONFIG_VFIO_PCI_CORE) += pci/ +obj-$(CONFIG_VFIO_PLATFORM_BASE) += platform/ obj-$(CONFIG_VFIO_MDEV) += mdev/ obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/ +obj-$(CONFIG_VFIO_CDX) += cdx/ diff --git a/drivers/vfio/cdx/Kconfig b/drivers/vfio/cdx/Kconfig new file mode 100644 index 000000000000..e6de0a0caa32 --- /dev/null +++ b/drivers/vfio/cdx/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# VFIO CDX configuration +# +# Copyright (C) 2022-2023, Advanced Micro Devices, Inc. +# + +config VFIO_CDX + tristate "VFIO support for CDX bus devices" + depends on CDX_BUS + select EVENTFD + help + Driver to enable VFIO support for the devices on CDX bus. + This is required to make use of CDX devices present in + the system using the VFIO framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile new file mode 100644 index 000000000000..cd4a2e6fe609 --- /dev/null +++ b/drivers/vfio/cdx/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2022-2023, Advanced Micro Devices, Inc. +# + +obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o + +vfio-cdx-objs := main.o diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c new file mode 100644 index 000000000000..c376a69d2db2 --- /dev/null +++ b/drivers/vfio/cdx/main.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#include <linux/vfio.h> +#include <linux/cdx/cdx_bus.h> + +#include "private.h" + +static int vfio_cdx_open_device(struct vfio_device *core_vdev) +{ + struct vfio_cdx_device *vdev = + container_of(core_vdev, struct vfio_cdx_device, vdev); + struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev); + int count = cdx_dev->res_count; + int i; + + vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region), + GFP_KERNEL_ACCOUNT); + if (!vdev->regions) + return -ENOMEM; + + for (i = 0; i < count; i++) { + struct resource *res = &cdx_dev->res[i]; + + vdev->regions[i].addr = res->start; + vdev->regions[i].size = resource_size(res); + vdev->regions[i].type = res->flags; + /* + * Only regions addressed with PAGE granularity may be + * MMAP'ed securely. + */ + if (!(vdev->regions[i].addr & ~PAGE_MASK) && + !(vdev->regions[i].size & ~PAGE_MASK)) + vdev->regions[i].flags |= + VFIO_REGION_INFO_FLAG_MMAP; + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; + if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY)) + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE; + } + + return 0; +} + +static void vfio_cdx_close_device(struct vfio_device *core_vdev) +{ + struct vfio_cdx_device *vdev = + container_of(core_vdev, struct vfio_cdx_device, vdev); + + kfree(vdev->regions); + cdx_dev_reset(core_vdev->dev); +} + +static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev, + struct vfio_device_info __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs); + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + struct vfio_device_info info; + + if (copy_from_user(&info, arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.flags = VFIO_DEVICE_FLAGS_CDX; + info.flags |= VFIO_DEVICE_FLAGS_RESET; + + info.num_regions = cdx_dev->res_count; + info.num_irqs = 0; + + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; +} + +static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev, + struct vfio_region_info __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_region_info, offset); + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + struct vfio_region_info info; + + if (copy_from_user(&info, arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= cdx_dev->res_count) + return -EINVAL; + + /* map offset to the physical address */ + info.offset = vfio_cdx_index_to_offset(info.index); + info.size = vdev->regions[info.index].size; + info.flags = vdev->regions[info.index].flags; + + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; +} + +static long vfio_cdx_ioctl(struct vfio_device *core_vdev, + unsigned int cmd, unsigned long arg) +{ + struct vfio_cdx_device *vdev = + container_of(core_vdev, struct vfio_cdx_device, vdev); + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + return vfio_cdx_ioctl_get_info(vdev, uarg); + case VFIO_DEVICE_GET_REGION_INFO: + return vfio_cdx_ioctl_get_region_info(vdev, uarg); + case VFIO_DEVICE_RESET: + return cdx_dev_reset(core_vdev->dev); + default: + return -ENOTTY; + } +} + +static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region, + struct vm_area_struct *vma) +{ + u64 size = vma->vm_end - vma->vm_start; + u64 pgoff, base; + + pgoff = vma->vm_pgoff & + ((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + base = pgoff << PAGE_SHIFT; + + if (base + size > region.size) + return -EINVAL; + + vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff; + vma->vm_page_prot = pgprot_device(vma->vm_page_prot); + + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + size, vma->vm_page_prot); +} + +static int vfio_cdx_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma) +{ + struct vfio_cdx_device *vdev = + container_of(core_vdev, struct vfio_cdx_device, vdev); + struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev); + unsigned int index; + + index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT); + + if (index >= cdx_dev->res_count) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) && + (vma->vm_flags & VM_READ)) + return -EPERM; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) && + (vma->vm_flags & VM_WRITE)) + return -EPERM; + + return vfio_cdx_mmap_mmio(vdev->regions[index], vma); +} + +static const struct vfio_device_ops vfio_cdx_ops = { + .name = "vfio-cdx", + .open_device = vfio_cdx_open_device, + .close_device = vfio_cdx_close_device, + .ioctl = vfio_cdx_ioctl, + .mmap = vfio_cdx_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, +}; + +static int vfio_cdx_probe(struct cdx_device *cdx_dev) +{ + struct vfio_cdx_device *vdev; + struct device *dev = &cdx_dev->dev; + int ret; + + vdev = vfio_alloc_device(vfio_cdx_device, vdev, dev, + &vfio_cdx_ops); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + ret = vfio_register_group_dev(&vdev->vdev); + if (ret) + goto out_uninit; + + dev_set_drvdata(dev, vdev); + return 0; + +out_uninit: + vfio_put_device(&vdev->vdev); + return ret; +} + +static int vfio_cdx_remove(struct cdx_device *cdx_dev) +{ + struct device *dev = &cdx_dev->dev; + struct vfio_cdx_device *vdev = dev_get_drvdata(dev); + + vfio_unregister_group_dev(&vdev->vdev); + vfio_put_device(&vdev->vdev); + + return 0; +} + +static const struct cdx_device_id vfio_cdx_table[] = { + { CDX_DEVICE_DRIVER_OVERRIDE(CDX_ANY_ID, CDX_ANY_ID, + CDX_ID_F_VFIO_DRIVER_OVERRIDE) }, /* match all by default */ + {} +}; + +MODULE_DEVICE_TABLE(cdx, vfio_cdx_table); + +static struct cdx_driver vfio_cdx_driver = { + .probe = vfio_cdx_probe, + .remove = vfio_cdx_remove, + .match_id_table = vfio_cdx_table, + .driver = { + .name = "vfio-cdx", + .owner = THIS_MODULE, + }, + .driver_managed_dma = true, +}; + +module_driver(vfio_cdx_driver, cdx_driver_register, cdx_driver_unregister); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("VFIO for CDX devices - User Level meta-driver"); diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h new file mode 100644 index 000000000000..8bdc117ea88e --- /dev/null +++ b/drivers/vfio/cdx/private.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef VFIO_CDX_PRIVATE_H +#define VFIO_CDX_PRIVATE_H + +#define VFIO_CDX_OFFSET_SHIFT 40 + +static inline u64 vfio_cdx_index_to_offset(u32 index) +{ + return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT); +} + +struct vfio_cdx_region { + u32 flags; + u32 type; + u64 addr; + resource_size_t size; +}; + +struct vfio_cdx_device { + struct vfio_device vdev; + struct vfio_cdx_region *regions; +}; + +#endif /* VFIO_CDX_PRIVATE_H */ diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig index 597d338c5c8a..7d1d690348f0 100644 --- a/drivers/vfio/fsl-mc/Kconfig +++ b/drivers/vfio/fsl-mc/Kconfig @@ -1,6 +1,8 @@ +menu "VFIO support for FSL_MC bus devices" + depends on FSL_MC_BUS + config VFIO_FSL_MC tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" - depends on FSL_MC_BUS select EVENTFD help Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc @@ -8,3 +10,5 @@ config VFIO_FSL_MC fsl-mc bus devices using the VFIO framework. If you don't know what to do here, say N. + +endmenu diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 58f91b3bd670..ed4737de4528 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -72,12 +72,6 @@ int mdev_register_parent(struct mdev_parent *parent, struct device *dev, parent->nr_types = nr_types; atomic_set(&parent->available_instances, mdev_driver->max_instances); - if (!mdev_bus_compat_class) { - mdev_bus_compat_class = class_compat_register("mdev_bus"); - if (!mdev_bus_compat_class) - return -ENOMEM; - } - ret = parent_create_sysfs_files(parent); if (ret) return ret; @@ -251,13 +245,24 @@ int mdev_device_remove(struct mdev_device *mdev) static int __init mdev_init(void) { - return bus_register(&mdev_bus_type); + int ret; + + ret = bus_register(&mdev_bus_type); + if (ret) + return ret; + + mdev_bus_compat_class = class_compat_register("mdev_bus"); + if (!mdev_bus_compat_class) { + bus_unregister(&mdev_bus_type); + return -ENOMEM; + } + + return 0; } static void __exit mdev_exit(void) { - if (mdev_bus_compat_class) - class_compat_unregister(mdev_bus_compat_class); + class_compat_unregister(mdev_bus_compat_class); bus_unregister(&mdev_bus_type); } diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index f9d0c908e738..86bb7835cf3c 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -if PCI && MMU +menu "VFIO support for PCI devices" + depends on PCI && MMU + config VFIO_PCI_CORE tristate select VFIO_VIRQFD @@ -7,9 +9,11 @@ config VFIO_PCI_CORE config VFIO_PCI_MMAP def_bool y if !S390 + depends on VFIO_PCI_CORE config VFIO_PCI_INTX def_bool y if !S390 + depends on VFIO_PCI_CORE config VFIO_PCI tristate "Generic VFIO support for any PCI device" @@ -59,4 +63,4 @@ source "drivers/vfio/pci/mlx5/Kconfig" source "drivers/vfio/pci/hisilicon/Kconfig" -endif +endmenu diff --git a/drivers/vfio/pci/hisilicon/Kconfig b/drivers/vfio/pci/hisilicon/Kconfig index 5daa0f45d2f9..cbf1c32f6ebf 100644 --- a/drivers/vfio/pci/hisilicon/Kconfig +++ b/drivers/vfio/pci/hisilicon/Kconfig @@ -1,13 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only config HISI_ACC_VFIO_PCI - tristate "VFIO PCI support for HiSilicon ACC devices" + tristate "VFIO support for HiSilicon ACC PCI devices" depends on ARM64 || (COMPILE_TEST && 64BIT) - depends on VFIO_PCI_CORE depends on PCI_MSI depends on CRYPTO_DEV_HISI_QM depends on CRYPTO_DEV_HISI_HPRE depends on CRYPTO_DEV_HISI_SEC2 depends on CRYPTO_DEV_HISI_ZIP + select VFIO_PCI_CORE help This provides generic PCI support for HiSilicon ACC devices using the VFIO framework. diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig index 29ba9c504a75..7088edc4fb28 100644 --- a/drivers/vfio/pci/mlx5/Kconfig +++ b/drivers/vfio/pci/mlx5/Kconfig @@ -2,7 +2,7 @@ config MLX5_VFIO_PCI tristate "VFIO support for MLX5 PCI devices" depends on MLX5_CORE - depends on VFIO_PCI_CORE + select VFIO_PCI_CORE help This provides migration support for MLX5 devices using the VFIO framework. diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 948cdd464f4e..7e2e62ab0869 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1566,8 +1566,8 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev) } if (!len) { - pci_info(pdev, "%s: hiding cap %#x@%#x\n", __func__, - cap, pos); + pci_dbg(pdev, "%s: hiding cap %#x@%#x\n", __func__, + cap, pos); *prev = next; pos = next; continue; @@ -1643,8 +1643,8 @@ static int vfio_ecap_init(struct vfio_pci_core_device *vdev) } if (!len) { - pci_info(pdev, "%s: hiding ecap %#x@%#x\n", - __func__, ecap, epos); + pci_dbg(pdev, "%s: hiding ecap %#x@%#x\n", + __func__, ecap, epos); /* If not the first in the chain, we can skip over it */ if (prev) { diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index a5ab416cf476..20d7b69ea6ff 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -530,8 +530,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) vdev->msix_bar = table & PCI_MSIX_TABLE_BIR; vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET; vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16; - } else + vdev->has_dyn_msix = pci_msix_can_alloc_dyn(pdev); + } else { vdev->msix_bar = 0xFF; + vdev->has_dyn_msix = false; + } if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; @@ -882,6 +885,37 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, } EXPORT_SYMBOL_GPL(vfio_pci_core_register_dev_region); +static int vfio_pci_info_atomic_cap(struct vfio_pci_core_device *vdev, + struct vfio_info_cap *caps) +{ + struct vfio_device_info_cap_pci_atomic_comp cap = { + .header.id = VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP, + .header.version = 1 + }; + struct pci_dev *pdev = pci_physfn(vdev->pdev); + u32 devcap2; + + pcie_capability_read_dword(pdev, PCI_EXP_DEVCAP2, &devcap2); + + if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + !pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32)) + cap.flags |= VFIO_PCI_ATOMIC_COMP32; + + if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + !pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64)) + cap.flags |= VFIO_PCI_ATOMIC_COMP64; + + if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP128) && + !pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + cap.flags |= VFIO_PCI_ATOMIC_COMP128; + + if (!cap.flags) + return -ENODEV; + + return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); +} + static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, struct vfio_device_info __user *arg) { @@ -920,6 +954,13 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, return ret; } + ret = vfio_pci_info_atomic_cap(vdev, &caps); + if (ret && ret != -ENODEV) { + pci_warn(vdev->pdev, + "Failed to setup AtomicOps info capability\n"); + return ret; + } + if (caps.size) { info.flags |= VFIO_DEVICE_FLAGS_CAPS; if (info.argsz < sizeof(info) + caps.size) { @@ -1111,7 +1152,7 @@ static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev, if (info.index == VFIO_PCI_INTX_IRQ_INDEX) info.flags |= (VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED); - else + else if (info.index != VFIO_PCI_MSIX_IRQ_INDEX || !vdev->has_dyn_msix) info.flags |= VFIO_IRQ_INFO_NORESIZE; return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; @@ -2102,6 +2143,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) INIT_LIST_HEAD(&vdev->vma_list); INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); + xa_init(&vdev->ctx); return 0; } diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index bffb0741518b..cbb4bcbfbf83 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -48,6 +48,39 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev) vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX); } +static +struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev, + unsigned long index) +{ + return xa_load(&vdev->ctx, index); +} + +static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev, + struct vfio_pci_irq_ctx *ctx, unsigned long index) +{ + xa_erase(&vdev->ctx, index); + kfree(ctx); +} + +static struct vfio_pci_irq_ctx * +vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index) +{ + struct vfio_pci_irq_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); + if (!ctx) + return NULL; + + ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT); + if (ret) { + kfree(ctx); + return NULL; + } + + return ctx; +} + /* * INTx */ @@ -55,14 +88,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) { struct vfio_pci_core_device *vdev = opaque; - if (likely(is_intx(vdev) && !vdev->virq_disabled)) - eventfd_signal(vdev->ctx[0].trigger, 1); + if (likely(is_intx(vdev) && !vdev->virq_disabled)) { + struct vfio_pci_irq_ctx *ctx; + + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + return; + eventfd_signal(ctx->trigger, 1); + } } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_irq_ctx *ctx; unsigned long flags; bool masked_changed = false; @@ -77,7 +117,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) if (unlikely(!is_intx(vdev))) { if (vdev->pci_2_3) pci_intx(pdev, 0); - } else if (!vdev->ctx[0].masked) { + goto out_unlock; + } + + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + goto out_unlock; + + if (!ctx->masked) { /* * Can't use check_and_mask here because we always want to * mask, not just when something is pending. @@ -87,10 +134,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) else disable_irq_nosync(pdev->irq); - vdev->ctx[0].masked = true; + ctx->masked = true; masked_changed = true; } +out_unlock: spin_unlock_irqrestore(&vdev->irqlock, flags); return masked_changed; } @@ -105,6 +153,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) { struct vfio_pci_core_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_irq_ctx *ctx; unsigned long flags; int ret = 0; @@ -117,7 +166,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) if (unlikely(!is_intx(vdev))) { if (vdev->pci_2_3) pci_intx(pdev, 1); - } else if (vdev->ctx[0].masked && !vdev->virq_disabled) { + goto out_unlock; + } + + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + goto out_unlock; + + if (ctx->masked && !vdev->virq_disabled) { /* * A pending interrupt here would immediately trigger, * but we can avoid that overhead by just re-sending @@ -129,9 +185,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) } else enable_irq(pdev->irq); - vdev->ctx[0].masked = (ret > 0); + ctx->masked = (ret > 0); } +out_unlock: spin_unlock_irqrestore(&vdev->irqlock, flags); return ret; @@ -146,18 +203,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { struct vfio_pci_core_device *vdev = dev_id; + struct vfio_pci_irq_ctx *ctx; unsigned long flags; int ret = IRQ_NONE; + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + return ret; + spin_lock_irqsave(&vdev->irqlock, flags); if (!vdev->pci_2_3) { disable_irq_nosync(vdev->pdev->irq); - vdev->ctx[0].masked = true; + ctx->masked = true; ret = IRQ_HANDLED; - } else if (!vdev->ctx[0].masked && /* may be shared */ + } else if (!ctx->masked && /* may be shared */ pci_check_and_mask_intx(vdev->pdev)) { - vdev->ctx[0].masked = true; + ctx->masked = true; ret = IRQ_HANDLED; } @@ -171,27 +233,27 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) static int vfio_intx_enable(struct vfio_pci_core_device *vdev) { + struct vfio_pci_irq_ctx *ctx; + if (!is_irq_none(vdev)) return -EINVAL; if (!vdev->pdev->irq) return -ENODEV; - vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); - if (!vdev->ctx) + ctx = vfio_irq_ctx_alloc(vdev, 0); + if (!ctx) return -ENOMEM; - vdev->num_ctx = 1; - /* * If the virtual interrupt is masked, restore it. Devices * supporting DisINTx can be masked at the hardware level * here, non-PCI-2.3 devices will have to wait until the * interrupt is enabled. */ - vdev->ctx[0].masked = vdev->virq_disabled; + ctx->masked = vdev->virq_disabled; if (vdev->pci_2_3) - pci_intx(vdev->pdev, !vdev->ctx[0].masked); + pci_intx(vdev->pdev, !ctx->masked); vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; @@ -202,41 +264,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) { struct pci_dev *pdev = vdev->pdev; unsigned long irqflags = IRQF_SHARED; + struct vfio_pci_irq_ctx *ctx; struct eventfd_ctx *trigger; unsigned long flags; int ret; - if (vdev->ctx[0].trigger) { + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + return -EINVAL; + + if (ctx->trigger) { free_irq(pdev->irq, vdev); - kfree(vdev->ctx[0].name); - eventfd_ctx_put(vdev->ctx[0].trigger); - vdev->ctx[0].trigger = NULL; + kfree(ctx->name); + eventfd_ctx_put(ctx->trigger); + ctx->trigger = NULL; } if (fd < 0) /* Disable only */ return 0; - vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", - pci_name(pdev)); - if (!vdev->ctx[0].name) + ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", + pci_name(pdev)); + if (!ctx->name) return -ENOMEM; trigger = eventfd_ctx_fdget(fd); if (IS_ERR(trigger)) { - kfree(vdev->ctx[0].name); + kfree(ctx->name); return PTR_ERR(trigger); } - vdev->ctx[0].trigger = trigger; + ctx->trigger = trigger; if (!vdev->pci_2_3) irqflags = 0; ret = request_irq(pdev->irq, vfio_intx_handler, - irqflags, vdev->ctx[0].name, vdev); + irqflags, ctx->name, vdev); if (ret) { - vdev->ctx[0].trigger = NULL; - kfree(vdev->ctx[0].name); + ctx->trigger = NULL; + kfree(ctx->name); eventfd_ctx_put(trigger); return ret; } @@ -246,7 +313,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) * disable_irq won't. */ spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && vdev->ctx[0].masked) + if (!vdev->pci_2_3 && ctx->masked) disable_irq_nosync(pdev->irq); spin_unlock_irqrestore(&vdev->irqlock, flags); @@ -255,12 +322,17 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_core_device *vdev) { - vfio_virqfd_disable(&vdev->ctx[0].unmask); - vfio_virqfd_disable(&vdev->ctx[0].mask); + struct vfio_pci_irq_ctx *ctx; + + ctx = vfio_irq_ctx_get(vdev, 0); + WARN_ON_ONCE(!ctx); + if (ctx) { + vfio_virqfd_disable(&ctx->unmask); + vfio_virqfd_disable(&ctx->mask); + } vfio_intx_set_signal(vdev, -1); |
