diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-02 13:41:33 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-02 13:41:33 -0700 |
| commit | 89b6b8cd92c068cd1bdf877ec7fb1392568ef35d (patch) | |
| tree | 36d88a0da54c7a691581fd34f89ea0469d29ae02 /drivers/vfio | |
| parent | 9ae5fceb9a20154d74586fe17d1096b981b23e34 (diff) | |
| parent | ea870730d83fc13a5fa2bd0e175176d7ac8a400a (diff) | |
| download | linux-89b6b8cd92c068cd1bdf877ec7fb1392568ef35d.tar.gz linux-89b6b8cd92c068cd1bdf877ec7fb1392568ef35d.tar.bz2 linux-89b6b8cd92c068cd1bdf877ec7fb1392568ef35d.zip | |
Merge tag 'vfio-v5.15-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Fix dma-valid return WAITED implementation (Anthony Yznaga)
- SPDX license cleanups (Cai Huoqing)
- Split vfio-pci-core from vfio-pci and enhance PCI driver matching to
support future vendor provided vfio-pci variants (Yishai Hadas, Max
Gurtovoy, Jason Gunthorpe)
- Replace duplicated reflck with core support for managing first open,
last close, and device sets (Jason Gunthorpe, Max Gurtovoy, Yishai
Hadas)
- Fix non-modular mdev support and don't nag about request callback
support (Christoph Hellwig)
- Add semaphore to protect instruction intercept handler and replace
open-coded locks in vfio-ap driver (Tony Krowiak)
- Convert vfio-ap to vfio_register_group_dev() API (Jason Gunthorpe)
* tag 'vfio-v5.15-rc1' of git://github.com/awilliam/linux-vfio: (37 commits)
vfio/pci: Introduce vfio_pci_core.ko
vfio: Use kconfig if XX/endif blocks instead of repeating 'depends on'
vfio: Use select for eventfd
PCI / VFIO: Add 'override_only' support for VFIO PCI sub system
PCI: Add 'override_only' field to struct pci_device_id
vfio/pci: Move module parameters to vfio_pci.c
vfio/pci: Move igd initialization to vfio_pci.c
vfio/pci: Split the pci_driver code out of vfio_pci_core.c
vfio/pci: Include vfio header in vfio_pci_core.h
vfio/pci: Rename ops functions to fit core namings
vfio/pci: Rename vfio_pci_device to vfio_pci_core_device
vfio/pci: Rename vfio_pci_private.h to vfio_pci_core.h
vfio/pci: Rename vfio_pci.c to vfio_pci_core.c
vfio/ap_ops: Convert to use vfio_register_group_dev()
s390/vfio-ap: replace open coded locks for VFIO_GROUP_NOTIFY_SET_KVM notification
s390/vfio-ap: r/w lock for PQAP interception handler function pointer
vfio/type1: Fix vfio_find_dma_valid return
vfio-pci/zdev: Remove repeated verbose license text
vfio: platform: reset: Convert to SPDX identifier
vfio: Remove struct vfio_device_ops open/release
...
Diffstat (limited to 'drivers/vfio')
25 files changed, 2547 insertions, 2812 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 67d0bf4efa16..6130d00252ed 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -1,12 +1,22 @@ # SPDX-License-Identifier: GPL-2.0-only +menuconfig VFIO + tristate "VFIO Non-Privileged userspace driver framework" + select IOMMU_API + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + help + VFIO provides a framework for secure userspace device drivers. + See Documentation/driver-api/vfio.rst for more details. + + If you don't know what to do here, say N. + +if VFIO config VFIO_IOMMU_TYPE1 tristate - depends on VFIO default n config VFIO_IOMMU_SPAPR_TCE tristate - depends on VFIO && SPAPR_TCE_IOMMU + depends on SPAPR_TCE_IOMMU default VFIO config VFIO_SPAPR_EEH @@ -16,22 +26,11 @@ config VFIO_SPAPR_EEH config VFIO_VIRQFD tristate - depends on VFIO && EVENTFD + select EVENTFD default n -menuconfig VFIO - tristate "VFIO Non-Privileged userspace driver framework" - select IOMMU_API - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) - help - VFIO provides a framework for secure userspace device drivers. - See Documentation/driver-api/vfio.rst for more details. - - If you don't know what to do here, say N. - -menuconfig VFIO_NOIOMMU +config VFIO_NOIOMMU bool "VFIO No-IOMMU support" - depends on VFIO help VFIO is built on the ability to isolate devices using the IOMMU. Only with an IOMMU can userspace access to DMA capable devices be @@ -48,4 +47,6 @@ source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" source "drivers/vfio/fsl-mc/Kconfig" +endif + source "virt/lib/Kconfig" diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig index b1a527d6b6f2..597d338c5c8a 100644 --- a/drivers/vfio/fsl-mc/Kconfig +++ b/drivers/vfio/fsl-mc/Kconfig @@ -1,6 +1,7 @@ config VFIO_FSL_MC tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" - depends on VFIO && FSL_MC_BUS && EVENTFD + depends on FSL_MC_BUS + select EVENTFD help Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc (Management Complex) devices. This is required to passthrough diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 90cad109583b..0ead91bfa838 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -19,81 +19,10 @@ static struct fsl_mc_driver vfio_fsl_mc_driver; -static DEFINE_MUTEX(reflck_lock); - -static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck) -{ - kref_get(&reflck->kref); -} - -static void vfio_fsl_mc_reflck_release(struct kref *kref) -{ - struct vfio_fsl_mc_reflck *reflck = container_of(kref, - struct vfio_fsl_mc_reflck, - kref); - - mutex_destroy(&reflck->lock); - kfree(reflck); - mutex_unlock(&reflck_lock); -} - -static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck) -{ - kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock); -} - -static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void) -{ - struct vfio_fsl_mc_reflck *reflck; - - reflck = kzalloc(sizeof(*reflck), GFP_KERNEL); - if (!reflck) - return ERR_PTR(-ENOMEM); - - kref_init(&reflck->kref); - mutex_init(&reflck->lock); - - return reflck; -} - -static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev) -{ - int ret = 0; - - mutex_lock(&reflck_lock); - if (is_fsl_mc_bus_dprc(vdev->mc_dev)) { - vdev->reflck = vfio_fsl_mc_reflck_alloc(); - ret = PTR_ERR_OR_ZERO(vdev->reflck); - } else { - struct device *mc_cont_dev = vdev->mc_dev->dev.parent; - struct vfio_device *device; - struct vfio_fsl_mc_device *cont_vdev; - - device = vfio_device_get_from_dev(mc_cont_dev); - if (!device) { - ret = -ENODEV; - goto unlock; - } - - cont_vdev = - container_of(device, struct vfio_fsl_mc_device, vdev); - if (!cont_vdev || !cont_vdev->reflck) { - vfio_device_put(device); - ret = -ENODEV; - goto unlock; - } - vfio_fsl_mc_reflck_get(cont_vdev->reflck); - vdev->reflck = cont_vdev->reflck; - vfio_device_put(device); - } - -unlock: - mutex_unlock(&reflck_lock); - return ret; -} - -static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev) +static int vfio_fsl_mc_open_device(struct vfio_device *core_vdev) { + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); struct fsl_mc_device *mc_dev = vdev->mc_dev; int count = mc_dev->obj_desc.region_count; int i; @@ -136,58 +65,30 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev) kfree(vdev->regions); } -static int vfio_fsl_mc_open(struct vfio_device *core_vdev) -{ - struct vfio_fsl_mc_device *vdev = - container_of(core_vdev, struct vfio_fsl_mc_device, vdev); - int ret = 0; - - mutex_lock(&vdev->reflck->lock); - if (!vdev->refcnt) { - ret = vfio_fsl_mc_regions_init(vdev); - if (ret) - goto out; - } - vdev->refcnt++; -out: - mutex_unlock(&vdev->reflck->lock); - - return ret; -} -static void vfio_fsl_mc_release(struct vfio_device *core_vdev) +static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev) { struct vfio_fsl_mc_device *vdev = container_of(core_vdev, struct vfio_fsl_mc_device, vdev); + struct fsl_mc_device *mc_dev = vdev->mc_dev; + struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); + struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); int ret; - mutex_lock(&vdev->reflck->lock); + vfio_fsl_mc_regions_cleanup(vdev); - if (!(--vdev->refcnt)) { - struct fsl_mc_device *mc_dev = vdev->mc_dev; - struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); - struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); + /* reset the device before cleaning up the interrupts */ + ret = dprc_reset_container(mc_cont->mc_io, 0, mc_cont->mc_handle, + mc_cont->obj_desc.id, + DPRC_RESET_OPTION_NON_RECURSIVE); - vfio_fsl_mc_regions_cleanup(vdev); + if (WARN_ON(ret)) + dev_warn(&mc_cont->dev, + "VFIO_FLS_MC: reset device has failed (%d)\n", ret); - /* reset the device before cleaning up the interrupts */ - ret = dprc_reset_container(mc_cont->mc_io, 0, - mc_cont->mc_handle, - mc_cont->obj_desc.id, - DPRC_RESET_OPTION_NON_RECURSIVE); + vfio_fsl_mc_irqs_cleanup(vdev); - if (ret) { - dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n", - ret); - WARN_ON(1); - } - - vfio_fsl_mc_irqs_cleanup(vdev); - - fsl_mc_cleanup_irq_pool(mc_cont); - } - - mutex_unlock(&vdev->reflck->lock); + fsl_mc_cleanup_irq_pool(mc_cont); } static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev, @@ -504,8 +405,8 @@ static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev, static const struct vfio_device_ops vfio_fsl_mc_ops = { .name = "vfio-fsl-mc", - .open = vfio_fsl_mc_open, - .release = vfio_fsl_mc_release, + .open_device = vfio_fsl_mc_open_device, + .close_device = vfio_fsl_mc_close_device, .ioctl = vfio_fsl_mc_ioctl, .read = vfio_fsl_mc_read, .write = vfio_fsl_mc_write, @@ -625,13 +526,16 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) vdev->mc_dev = mc_dev; mutex_init(&vdev->igate); - ret = vfio_fsl_mc_reflck_attach(vdev); + if (is_fsl_mc_bus_dprc(mc_dev)) + ret = vfio_assign_device_set(&vdev->vdev, &mc_dev->dev); + else + ret = vfio_assign_device_set(&vdev->vdev, mc_dev->dev.parent); if (ret) - goto out_kfree; + goto out_uninit; ret = vfio_fsl_mc_init_device(vdev); if (ret) - goto out_reflck; + goto out_uninit; ret = vfio_register_group_dev(&vdev->vdev); if (ret) { @@ -639,12 +543,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) goto out_device; } - /* - * This triggers recursion into vfio_fsl_mc_probe() on another device - * and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the - * vfio_add_group_dev() above. It has no impact on this vdev, so it is - * safe to be after the vfio device is made live. - */ ret = vfio_fsl_mc_scan_container(mc_dev); if (ret) goto out_group_dev; @@ -655,9 +553,8 @@ out_group_dev: vfio_unregister_group_dev(&vdev->vdev); out_device: vfio_fsl_uninit_device(vdev); -out_reflck: - vfio_fsl_mc_reflck_put(vdev->reflck); -out_kfree: +out_uninit: + vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); out_group_put: vfio_iommu_group_put(group, dev); @@ -674,8 +571,8 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) dprc_remove_devices(mc_dev, NULL, 0); vfio_fsl_uninit_device(vdev); - vfio_fsl_mc_reflck_put(vdev->reflck); + vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 0d9f3002df7f..77e584093a23 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -120,7 +120,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, if (start != 0 || count != 1) return -EINVAL; - mutex_lock(&vdev->reflck->lock); + mutex_lock(&vdev->vdev.dev_set->lock); ret = fsl_mc_populate_irq_pool(mc_cont, FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS); if (ret) @@ -129,7 +129,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, ret = vfio_fsl_mc_irqs_allocate(vdev); if (ret) goto unlock; - mutex_unlock(&vdev->reflck->lock); + mutex_unlock(&vdev->vdev.dev_set->lock); if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { s32 fd = *(s32 *)data; @@ -154,7 +154,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, return 0; unlock: - mutex_unlock(&vdev->reflck->lock); + mutex_unlock(&vdev->vdev.dev_set->lock); return ret; } diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h index 89700e00e77d..4ad63ececb91 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h @@ -22,11 +22,6 @@ struct vfio_fsl_mc_irq { char *name; }; -struct vfio_fsl_mc_reflck { - struct kref kref; - struct mutex lock; -}; - struct vfio_fsl_mc_region { u32 flags; u32 type; @@ -39,9 +34,7 @@ struct vfio_fsl_mc_device { struct vfio_device vdev; struct fsl_mc_device *mc_dev; struct notifier_block nb; - int refcnt; struct vfio_fsl_mc_region *regions; - struct vfio_fsl_mc_reflck *reflck; struct mutex igate; struct vfio_fsl_mc_irq *mc_irqs; }; diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index 763c877a1318..646dbed44eb2 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -2,7 +2,6 @@ config VFIO_MDEV tristate "Mediated device driver framework" - depends on VFIO default n help Provides a framework to virtualize devices. diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index e4581ec093a6..b314101237fe 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -138,10 +138,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) if (!dev) return -EINVAL; - /* Not mandatory, but its absence could be a problem */ - if (!ops->request) - dev_info(dev, "Driver cannot be asked to release device\n"); - mutex_lock(&parent_list_lock); /* Check for duplicate */ @@ -398,7 +394,7 @@ static void __exit mdev_exit(void) mdev_bus_unregister(); } -module_init(mdev_init) +subsys_initcall(mdev_init) module_exit(mdev_exit) MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index 39ef7489fe47..7a9883048216 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -17,24 +17,24 @@ #include "mdev_private.h" -static int vfio_mdev_open(struct vfio_device *core_vdev) +static int vfio_mdev_open_device(struct vfio_device *core_vdev) { struct mdev_device *mdev = to_mdev_device(core_vdev->dev); struct mdev_parent *parent = mdev->type->parent; - if (unlikely(!parent->ops->open)) - return -EINVAL; + if (unlikely(!parent->ops->open_device)) + return 0; - return parent->ops->open(mdev); + return parent->ops->open_device(mdev); } -static void vfio_mdev_release(struct vfio_device *core_vdev) +static void vfio_mdev_close_device(struct vfio_device *core_vdev) { struct mdev_device *mdev = to_mdev_device(core_vdev->dev); struct mdev_parent *parent = mdev->type->parent; - if (likely(parent->ops->release)) - parent->ops->release(mdev); + if (likely(parent->ops->close_device)) + parent->ops->close_device(mdev); } static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, @@ -44,7 +44,7 @@ static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->ioctl)) - return -EINVAL; + return 0; return parent->ops->ioctl(mdev, cmd, arg); } @@ -100,8 +100,8 @@ static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count) static const struct vfio_device_ops vfio_mdev_dev_ops = { .name = "vfio-mdev", - .open = vfio_mdev_open, - .release = vfio_mdev_release, + .open_device = vfio_mdev_open_device, + .close_device = vfio_mdev_close_device, .ioctl = vfio_mdev_unlocked_ioctl, .read = vfio_mdev_read, .write = vfio_mdev_write, @@ -120,12 +120,16 @@ static int vfio_mdev_probe(struct mdev_device *mdev) vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); ret = vfio_register_group_dev(vdev); - if (ret) { - kfree(vdev); - return ret; - } + if (ret) + goto out_uninit; + dev_set_drvdata(&mdev->dev, vdev); return 0; + +out_uninit: + vfio_uninit_group_dev(vdev); + kfree(vdev); + return ret; } static void vfio_mdev_remove(struct mdev_device *mdev) @@ -133,6 +137,7 @@ static void vfio_mdev_remove(struct mdev_device *mdev) struct vfio_device *vdev = dev_get_drvdata(&mdev->dev); vfio_unregister_group_dev(vdev); + vfio_uninit_group_dev(vdev); kfree(vdev); } diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5e2e1b9a9fd3..860424ccda1b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,19 +1,29 @@ # SPDX-License-Identifier: GPL-2.0-only -config VFIO_PCI - tristate "VFIO support for PCI devices" - depends on VFIO && PCI && EVENTFD - depends on MMU +if PCI && MMU +config VFIO_PCI_CORE + tristate select VFIO_VIRQFD select IRQ_BYPASS_MANAGER + +config VFIO_PCI_MMAP + def_bool y if !S390 + +config VFIO_PCI_INTX + def_bool y if !S390 + +config VFIO_PCI + tristate "Generic VFIO support for any PCI device" + select VFIO_PCI_CORE help - Support for the PCI VFIO bus driver. This is required to make - use of PCI drivers using the VFIO framework. + Support for the generic PCI VFIO bus driver which can connect any + PCI device to the VFIO framework. If you don't know what to do here, say N. +if VFIO_PCI config VFIO_PCI_VGA - bool "VFIO PCI support for VGA devices" - depends on VFIO_PCI && X86 && VGA_ARB + bool "Generic VFIO PCI support for VGA devices" + depends on X86 && VGA_ARB help Support for VGA extension to VFIO PCI. This exposes an additional region on VGA devices for accessing legacy VGA addresses used by @@ -21,17 +31,9 @@ config VFIO_PCI_VGA If you don't know what to do here, say N. -config VFIO_PCI_MMAP - depends on VFIO_PCI - def_bool y if !S390 - -config VFIO_PCI_INTX - depends on VFIO_PCI - def_bool y if !S390 - config VFIO_PCI_IGD - bool "VFIO PCI extensions for Intel graphics (GVT-d)" - depends on VFIO_PCI && X86 + bool "Generic VFIO PCI extensions for Intel graphics (GVT-d)" + depends on X86 default y help Support for Intel IGD specific extensions to enable direct @@ -40,3 +42,5 @@ config VFIO_PCI_IGD and LPC bridge config space. To enable Intel IGD assignment through vfio-pci, say Y. +endif +endif diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 3ff42093962f..349d68d242b4 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o -vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o -vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o +vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-core-$(CONFIG_S390) += vfio_pci_zdev.o +obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o +vfio-pci-y := vfio_pci.o +vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cf27df8048db..a5ce92beb655 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * * Copyright (C) 2012 Red Hat, Inc. All rights reserved. * Author: Alex Williamson <alex.williamson@redhat.com> * @@ -18,19 +20,13 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/notifier.h> -#include <linux/pci.h> #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> -#include <linux/vfio.h> -#include <linux/vgaarb.h> -#include <linux/nospec.h> -#include <linux/sched/mm.h> -#include "vfio_pci_private.h" +#include <linux/vfio_pci_core.h> -#define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_DESC "VFIO PCI - User Level meta-driver" @@ -64,15 +60,6 @@ static bool disable_denylist; module_param(disable_denylist, bool, 0444); MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); -static inline bool vfio_vga_disabled(void) -{ -#ifdef CONFIG_VFIO_PCI_VGA - return disable_vga; -#else - return true; -#endif -} - static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) { switch (pdev->vendor) { @@ -111,2269 +98,103 @@ static bool vfio_pci_is_denylisted(struct pci_dev *pdev) return true; } -/* - * Our VGA arbiter participation is limited since we don't know anything - * about the device itself. However, if the device is the only VGA device - * downstream of a bridge and VFIO VGA support is disabled, then we can - * safely return legacy VGA IO and memory as not decoded since the user - * has no way to get to it and routing can be disabled externally at the - * bridge. - */ -static unsigned int vfio_pci_set_decode(struct pci_dev *pdev, bool single_vga) -{ - struct pci_dev *tmp = NULL; - unsigned char max_busnr; - unsigned int decodes; - - if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus)) - return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | - VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; - - max_busnr = pci_bus_max_busnr(pdev->bus); - decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; - - while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) { - if (tmp == pdev || - pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) || - pci_is_root_bus(tmp->bus)) - continue; - - if (tmp->bus->number >= pdev->bus->number && - tmp->bus->number <= max_busnr) { - pci_dev_put(tmp); - decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; - break; - } - } - - return decodes; -} - -static inline bool vfio_pci_is_vga(struct pci_dev *pdev) -{ - return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; -} - -static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) -{ - struct resource *res; - int i; - struct vfio_pci_dummy_resource *dummy_res; - - for (i = 0; i < PCI_STD_NUM_BARS; i++) { - int bar = i + PCI_STD_RESOURCES; - - res = &vdev->pdev->resource[bar]; - - if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP)) - goto no_mmap; - - if (!(res->flags & IORESOURCE_MEM)) - goto no_mmap; - - /* - * The PCI core shouldn't set up a resource with a - * type but zero size. But there may be bugs that - * cause us to do that. - */ - if (!resource_size(res)) - goto no_mmap; - - if (resource_size(res) >= PAGE_SIZE) { - vdev->bar_mmap_supported[bar] = true; - continue; - } - - if (!(res->start & ~PAGE_MASK)) { - /* - * Add a dummy resource to reserve the remainder - * of the exclusive page in case that hot-add - * device's bar is assigned into it. - */ - dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL); - if (dummy_res == NULL) - goto no_mmap; - - dummy_res->resource.name = "vfio sub-page reserved"; - dummy_res->resource.start = res->end + 1; - dummy_res->resource.end = res->start + PAGE_SIZE - 1; - dummy_res->resource.flags = res->flags; - if (request_resource(res->parent, - &dummy_res->resource)) { - kfree(dummy_res); - goto no_mmap; - } - dummy_res->index = bar; - list_add(&dummy_res->res_next, - &vdev->dummy_resources_list); - vdev->bar_mmap_supported[bar] = true; - continue; - } - /* - * Here we don't handle the case when the BAR is not page - * aligned because we can't expect the BAR will be - * assigned into the same location in a page in guest - * when we passthrough the BAR. And it's hard to access - * this BAR in userspace because we have no way to get - * the BAR's location in a page. - */ -no_mmap: - vdev->bar_mmap_supported[bar] = false; - } -} - -static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); -static void vfio_pci_disable(struct vfio_pci_device *vdev); -static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data); - -/* - * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND - * _and_ the ability detect when the device is asserting INTx via PCI_STATUS. - * If a device implements the former but not the latter we would typically - * expect broken_intx_masking be set and require an exclusive interrupt. - * However since we do have control of the device's ability to assert INTx, - * we can instead pretend that the device does not implement INTx, virtualizing - * the pin register to report zero and maintaining DisINTx set on the host. - */ -static bool vfio_pci_nointx(struct pci_dev *pdev) -{ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - /* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */ - case 0x1572: - case 0x1574: - case 0x1580 ... 0x1581: - case 0x1583 ... 0x158b: - case 0x37d0 ... 0x37d2: - /* X550 */ - case 0x1563: - return true; - default: - return false; - } - } - - return false; -} - -static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev) +static int vfio_pci_open_device(struct vfio_device *core_vdev) { + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); struct pci_dev *pdev = vdev->pdev; - u16 pmcsr; - - if (!pdev->pm_cap) - return; - - pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr); - - vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET); -} - -/* - * pci_set_power_state() wrapper handling devices which perform a soft reset on - * D3->D0 transition. Save state prior to D0/1/2->D3, stash it on the vdev, - * restore when returned to D0. Saved separately from pci_saved_state for use - * by PM capability emulation and separately from pci_dev internal saved state - * to avoid it being overwritten and consumed around other resets. - */ -int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state) -{ - struct pci_dev *pdev = vdev->pdev; - bool needs_restore = false, needs_save = false; int ret; - if (vdev->needs_pm_restore) { - if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) { - pci_save_state(pdev); - needs_save = true; - } - - if (pdev->current_state >= PCI_D3hot && state <= PCI_D0) - needs_restore = true; - } - - ret = pci_set_power_state(pdev, state); - - if (!ret) { - /* D3 might be unsupported via quirk, skip unless in D3 */ - if (needs_save && pdev->current_state >= PCI_D3hot) { - vdev->pm_save = pci_store_saved_state(pdev); - } else if (needs_restore) { - pci_load_and_free_saved_state(pdev, &vdev->pm_save); - pci_restore_state(pdev); |
