diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-30 20:36:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-30 20:36:01 -0700 |
commit | ec0e2dc81072300acbda5deca2e02f98485eafa9 (patch) | |
tree | 994ebae628855d50b49c0da483ac29142dd85ff8 /drivers | |
parent | b6f6167ea8a424d14b41c172fe7a5f49e164f221 (diff) | |
parent | 642265e22ecc7fe05c49cb8e1e0000a049df9857 (diff) | |
download | linux-ec0e2dc81072300acbda5deca2e02f98485eafa9.tar.gz linux-ec0e2dc81072300acbda5deca2e02f98485eafa9.tar.bz2 linux-ec0e2dc81072300acbda5deca2e02f98485eafa9.zip |
Merge tag 'vfio-v6.6-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- VFIO direct character device (cdev) interface support. This extracts
the vfio device fd from the container and group model, and is
intended to be the native uAPI for use with IOMMUFD (Yi Liu)
- Enhancements to the PCI hot reset interface in support of cdev usage
(Yi Liu)
- Fix a potential race between registering and unregistering vfio files
in the kvm-vfio interface and extend use of a lock to avoid extra
drop and acquires (Dmitry Torokhov)
- A new vfio-pci variant driver for the AMD/Pensando Distributed
Services Card (PDS) Ethernet device, supporting live migration (Brett
Creeley)
- Cleanups to remove redundant owner setup in cdx and fsl bus drivers,
and simplify driver init/exit in fsl code (Li Zetao)
- Fix uninitialized hole in data structure and pad capability
structures for alignment (Stefan Hajnoczi)
* tag 'vfio-v6.6-rc1' of https://github.com/awilliam/linux-vfio: (53 commits)
vfio/pds: Send type for SUSPEND_STATUS command
vfio/pds: fix return value in pds_vfio_get_lm_file()
pds_core: Fix function header descriptions
vfio: align capability structures
vfio/type1: fix cap_migration information leak
vfio/fsl-mc: Use module_fsl_mc_driver macro to simplify the code
vfio/cdx: Remove redundant initialization owner in vfio_cdx_driver
vfio/pds: Add Kconfig and documentation
vfio/pds: Add support for firmware recovery
vfio/pds: Add support for dirty page tracking
vfio/pds: Add VFIO live migration support
vfio/pds: register with the pds_core PF
pds_core: Require callers of register/unregister to pass PF drvdata
vfio/pds: Initial support for pds VFIO driver
vfio: Commonize combine_ranges for use in other VFIO drivers
kvm/vfio: avoid bouncing the mutex when adding and deleting groups
kvm/vfio: ensure kvg instance stays around in kvm_vfio_group_add()
docs: vfio: Add vfio device cdev description
vfio: Compile vfio_group infrastructure optionally
vfio: Move the IOMMU_CAP_CACHE_COHERENCY check in __vfio_register_dev()
...
Diffstat (limited to 'drivers')
40 files changed, 3403 insertions, 343 deletions
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index de675d799c7d..9cd9e9da60dd 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1474,6 +1474,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index ada693ea51a7..99d4b075df49 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -14,8 +14,8 @@ config IOMMUFD if IOMMUFD config IOMMUFD_VFIO_CONTAINER bool "IOMMUFD provides the VFIO container /dev/vfio/vfio" - depends on VFIO && !VFIO_CONTAINER - default VFIO && !VFIO_CONTAINER + depends on VFIO_GROUP && !VFIO_CONTAINER + default VFIO_GROUP && !VFIO_CONTAINER help IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on IOMMUFD providing compatibility emulation to give the same ioctls. diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ed2937a4e196..1d49aad48c67 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -99,6 +99,36 @@ out_group_put: EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); /** + * iommufd_ctx_has_group - True if any device within the group is bound + * to the ictx + * @ictx: iommufd file descriptor + * @group: Pointer to a physical iommu_group struct + * + * True if any device within the group has been bound to this ictx, ex. via + * iommufd_device_bind(), therefore implying ictx ownership of the group. + */ +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) +{ + struct iommufd_object *obj; + unsigned long index; + + if (!ictx || !group) + return false; + + xa_lock(&ictx->objects); + xa_for_each(&ictx->objects, index, obj) { + if (obj->type == IOMMUFD_OBJ_DEVICE && + container_of(obj, struct iommufd_device, obj)->group == group) { + xa_unlock(&ictx->objects); + return true; + } + } + xa_unlock(&ictx->objects); + return false; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); + +/** * iommufd_device_unbind - Undo iommufd_device_bind() * @idev: Device returned by iommufd_device_bind() * @@ -113,6 +143,18 @@ void iommufd_device_unbind(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) +{ + return idev->ictx; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); + +u32 iommufd_device_to_id(struct iommufd_device *idev) +{ + return idev->obj.id; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); + static int iommufd_device_setup_msi(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt, phys_addr_t sw_msi_start) @@ -441,6 +483,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; + mutex_init(&access->ioas_lock); return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); @@ -457,26 +500,60 @@ void iommufd_access_destroy(struct iommufd_access *access) } EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); +void iommufd_access_detach(struct iommufd_access *access) +{ + struct iommufd_ioas *cur_ioas = access->ioas; + + mutex_lock(&access->ioas_lock); + if (WARN_ON(!access->ioas)) + goto out; + /* + * Set ioas to NULL to block any further iommufd_access_pin_pages(). + * iommufd_access_unpin_pages() can continue using access->ioas_unpin. + */ + access->ioas = NULL; + + if (access->ops->unmap) { + mutex_unlock(&access->ioas_lock); + access->ops->unmap(access->data, 0, ULONG_MAX); + mutex_lock(&access->ioas_lock); + } + iopt_remove_access(&cur_ioas->iopt, access); + refcount_dec(&cur_ioas->obj.users); +out: + access->ioas_unpin = NULL; + mutex_unlock(&access->ioas_lock); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); + int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { struct iommufd_ioas *new_ioas; int rc = 0; - if (access->ioas) + mutex_lock(&access->ioas_lock); + if (WARN_ON(access->ioas || access->ioas_unpin)) { + mutex_unlock(&access->ioas_lock); return -EINVAL; + } new_ioas = iommufd_get_ioas(access->ictx, ioas_id); - if (IS_ERR(new_ioas)) + if (IS_ERR(new_ioas)) { + mutex_unlock(&access->ioas_lock); return PTR_ERR(new_ioas); + } rc = iopt_add_access(&new_ioas->iopt, access); if (rc) { + mutex_unlock(&access->ioas_lock); iommufd_put_object(&new_ioas->obj); return rc; } iommufd_ref_to_users(&new_ioas->obj); access->ioas = new_ioas; + access->ioas_unpin = new_ioas; + mutex_unlock(&access->ioas_lock); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); @@ -531,8 +608,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; @@ -540,6 +617,17 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) return; + mutex_lock(&access->ioas_lock); + /* + * The driver must be doing something wrong if it calls this before an + * iommufd_access_attach() or after an iommufd_access_detach(). + */ + if (WARN_ON(!access->ioas_unpin)) { + mutex_unlock(&access->ioas_lock); + return; + } + iopt = &access->ioas_unpin->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( @@ -549,6 +637,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, min(last_iova, iopt_area_last_iova(area)))); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); @@ -594,8 +683,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; int rc; @@ -610,6 +699,13 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -640,6 +736,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return 0; err_remove: @@ -654,6 +751,7 @@ err_remove: iopt_area_last_iova(area)))); } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); @@ -673,8 +771,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t length, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; int rc; @@ -684,6 +782,13 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -710,6 +815,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, rc = -ENOENT; err_out: up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f9790983699c..0349d7aea59a 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -296,6 +296,8 @@ struct iommufd_access { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_ioas *ioas; + struct iommufd_ioas *ioas_unpin; + struct mutex ioas_lock; const struct iommufd_access_ops *ops; void *data; unsigned long iova_alignment; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 4cf5f73f2708..5f34be2a1f7f 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -50,7 +50,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, * before calling iommufd_object_finalize(). */ rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, - xa_limit_32b, GFP_KERNEL_ACCOUNT); + xa_limit_31b, GFP_KERNEL_ACCOUNT); if (rc) goto out_free; return obj; @@ -418,6 +418,30 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); /** + * iommufd_ctx_from_fd - Acquires a reference to the iommufd context + * @fd: File descriptor to obtain the reference from + * + * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success + * the caller is responsible to call iommufd_ctx_put(). + */ +struct iommufd_ctx *iommufd_ctx_from_fd(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &iommufd_fops) { + fput(file); + return ERR_PTR(-EBADFD); + } + /* fget is the same as iommufd_ctx_get() */ + return file->private_data; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, IOMMUFD); + +/** * iommufd_ctx_put - Put back a reference * @ictx: Context to put back */ diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index fe02517c73cc..6c810bf80f99 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -483,6 +483,8 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, rc = cap_size; goto out_put; } + cap_size = ALIGN(cap_size, sizeof(u64)); + if (last_cap && info.argsz >= total_cap_size && put_user(total_cap_size, &last_cap->next)) { rc = -EFAULT; diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 6787a5fae908..11c23a7f3172 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -8,24 +8,19 @@ /** * pds_client_register - Link the client to the firmware - * @pf_pdev: ptr to the PF driver struct + * @pf: ptr to the PF driver's private data struct * @devname: name that includes service into, e.g. pds_core.vDPA * * Return: positive client ID (ci) on success, or * negative for error */ -int pds_client_register(struct pci_dev *pf_pdev, char *devname) +int pds_client_register(struct pdsc *pf, char *devname) { union pds_core_adminq_comp comp = {}; union pds_core_adminq_cmd cmd = {}; - struct pdsc *pf; int err; u16 ci; - pf = pci_get_drvdata(pf_pdev); - if (pf->state) - return -ENXIO; - cmd.client_reg.opcode = PDS_AQ_CMD_CLIENT_REG; strscpy(cmd.client_reg.devname, devname, sizeof(cmd.client_reg.devname)); @@ -53,23 +48,18 @@ EXPORT_SYMBOL_GPL(pds_client_register); /** * pds_client_unregister - Unlink the client from the firmware - * @pf_pdev: ptr to the PF driver struct + * @pf: ptr to the PF driver's private data struct * @client_id: id returned from pds_client_register() * * Return: 0 on success, or * negative for error */ -int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id) +int pds_client_unregister(struct pdsc *pf, u16 client_id) { union pds_core_adminq_comp comp = {}; union pds_core_adminq_cmd cmd = {}; - struct pdsc *pf; int err; - pf = pci_get_drvdata(pf_pdev); - if (pf->state) - return -ENXIO; - cmd.client_unreg.opcode = PDS_AQ_CMD_CLIENT_UNREG; cmd.client_unreg.client_id = cpu_to_le16(client_id); @@ -198,7 +188,7 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) padev = pf->vfs[cf->vf_id].padev; if (padev) { - pds_client_unregister(pf->pdev, padev->client_id); + pds_client_unregister(pf, padev->client_id); auxiliary_device_delete(&padev->aux_dev); auxiliary_device_uninit(&padev->aux_dev); padev->client_id = 0; @@ -243,7 +233,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) */ snprintf(devname, sizeof(devname), "%s.%s.%d", PDS_CORE_DRV_NAME, pf->viftype_status[vt].name, cf->uid); - client_id = pds_client_register(pf->pdev, devname); + client_id = pds_client_register(pf, devname); if (client_id < 0) { err = client_id; goto out_unlock; @@ -252,7 +242,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) padev = pdsc_auxbus_dev_register(cf, pf, client_id, pf->viftype_status[vt].name); if (IS_ERR(padev)) { - pds_client_unregister(pf->pdev, client_id); + pds_client_unregister(pf, client_id); err = PTR_ERR(padev); goto out_unlock; } diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 5b53b94f13c7..cba4971618ff 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -632,6 +632,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 0509f80622cd..4db538a55192 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -2020,6 +2020,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, .request = vfio_ap_mdev_request }; diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index aba36f5be4ec..6bda6dbb4878 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -4,6 +4,8 @@ menuconfig VFIO select IOMMU_API depends on IOMMUFD || !IOMMUFD select INTERVAL_TREE + select VFIO_GROUP if SPAPR_TCE_IOMMU || IOMMUFD=n + select VFIO_DEVICE_CDEV if !VFIO_GROUP select VFIO_CONTAINER if IOMMUFD=n help VFIO provides a framework for secure userspace device drivers. @@ -12,9 +14,33 @@ menuconfig VFIO If you don't know what to do here, say N. if VFIO +config VFIO_DEVICE_CDEV + bool "Support for the VFIO cdev /dev/vfio/devices/vfioX" + depends on IOMMUFD && !SPAPR_TCE_IOMMU + default !VFIO_GROUP + help + The VFIO device cdev is another way for userspace to get device + access. Userspace gets device fd by opening device cdev under + /dev/vfio/devices/vfioX, and then bind the device fd with an iommufd + to set up secure DMA context for device access. This interface does + not support noiommu. + + If you don't know what to do here, say N. + +config VFIO_GROUP + bool "Support for the VFIO group /dev/vfio/$group_id" + default y + help + VFIO group support provides the traditional model for accessing + devices through VFIO and is used by the majority of userspace + applications and drivers making use of VFIO. + + If you don't know what to do here, say Y. + config VFIO_CONTAINER bool "Support for the VFIO container /dev/vfio/vfio" select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + depends on VFIO_GROUP default y help The VFIO container is the classic interface to VFIO for establishing @@ -36,6 +62,7 @@ endif config VFIO_NOIOMMU bool "VFIO No-IOMMU support" + depends on VFIO_GROUP help VFIO is built on the ability to isolate devices using the IOMMU. Only with an IOMMU can userspace access to DMA capable devices be diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 66f418aef5a9..c82ea032d352 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -2,8 +2,9 @@ obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ - group.o \ iova_bitmap.o +vfio-$(CONFIG_VFIO_DEVICE_CDEV) += device_cdev.o +vfio-$(CONFIG_VFIO_GROUP) += group.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c index c376a69d2db2..de56686581ae 100644 --- a/drivers/vfio/cdx/main.c +++ b/drivers/vfio/cdx/main.c @@ -223,7 +223,6 @@ static struct cdx_driver vfio_cdx_driver = { .match_id_table = vfio_cdx_table, .driver = { .name = "vfio-cdx", - .owner = THIS_MODULE, }, .driver_managed_dma = true, }; diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c new file mode 100644 index 000000000000..e75da0a70d1f --- /dev/null +++ b/drivers/vfio/device_cdev.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Intel Corporation. + */ +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include "vfio.h" + +static dev_t device_devt; + +void vfio_init_device_cdev(struct vfio_device *device) +{ + device->device.devt = MKDEV(MAJOR(device_devt), device->index); + cdev_init(&device->cdev, &vfio_device_fops); + device->cdev.owner = THIS_MODULE; +} + +/* + * device access via the fd opened by this function is blocked until + * .open_device() is called successfully during BIND_IOMMUFD. + */ +int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) +{ + struct vfio_device *device = container_of(inode->i_cdev, + struct vfio_device, cdev); + struct vfio_device_file *df; + int ret; + + /* Paired with the put in vfio_device_fops_release() */ + if (!vfio_device_try_get_registration(device)) + return -ENODEV; + + df = vfio_allocate_device_file(device); + if (IS_ERR(df)) { + ret = PTR_ERR(df); + goto err_put_registration; + } + + filep->private_data = df; + + return 0; + +err_put_registration: + vfio_device_put_registration(device); + return ret; +} + +static void vfio_df_get_kvm_safe(struct vfio_device_file *df) +{ + spin_lock(&df->kvm_ref_lock); + vfio_device_get_kvm_safe(df->device, df->kvm); + spin_unlock(&df->kvm_ref_lock); +} + +long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, + struct vfio_device_bind_iommufd __user *arg) +{ + struct vfio_device *device = df->device; + struct vfio_device_bind_iommufd bind; + unsigned long minsz; + int ret; + + static_assert(__same_type(arg->out_devid, df->devid)); + + minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid); + + if (copy_from_user(&bind, arg, minsz)) + return -EFAULT; + + if (bind.argsz < minsz || bind.flags || bind.iommufd < 0) + return -EINVAL; + + /* BIND_IOMMUFD only allowed for cdev fds */ + if (df->group) + return -EINVAL; + + ret = vfio_device_block_group(device); + if (ret) + return ret; + + mutex_lock(&device->dev_set->lock); + /* one device cannot be bound twice */ + if (df->access_granted) { + ret = -EINVAL; + goto out_unlock; + } + + df->iommufd = iommufd_ctx_from_fd(bind.iommufd); + if (IS_ERR(df->iommufd)) { + ret = PTR_ERR(df->iommufd); + df->iommufd = NULL; + goto out_unlock; + } + + /* + * Before the device open, get the KVM pointer currently + * associated with the device file (if there is) and obtain + * a reference. This reference is held until device closed. + * Save the pointer in the device for use by drivers. + */ + vfio_df_get_kvm_safe(df); + + ret = vfio_df_open(df); + if (ret) + goto out_put_kvm; + + ret = copy_to_user(&arg->out_devid, &df->devid, + sizeof(df->devid)) ? -EFAULT : 0; + if (ret) + goto out_close_device; + + device->cdev_opened = true; + /* + * Paired with smp_load_acquire() in vfio_device_fops::ioctl/ + * read/write/mmap + */ + smp_store_release(&df->access_granted, true); + mutex_unlock(&device->dev_set->lock); + return 0; + +out_close_device: + vfio_df_close(df); +out_put_kvm: + vfio_device_put_kvm(device); + iommufd_ctx_put(df->iommufd); + df->iommufd = NULL; +out_unlock: + mutex_unlock(&device->dev_set->lock); + vfio_device_unblock_group(device); + return ret; +} + +void vfio_df_unbind_iommufd(struct vfio_device_file *df) +{ + struct vfio_device *device = df->device; + + /* + * In the time of close, there is no contention with another one + * changing this flag. So read df->access_granted without lock + * and no smp_load_acquire() is ok. + */ + if (!df->access_granted) |