diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-20 14:56:50 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-20 14:56:50 -0700 |
| commit | 30aec6e1bb617e1349d7fa5498898d7d4351d71e (patch) | |
| tree | 31d30bc1ae4e5ec09efc91a8a51dba80d0a1f2ee /drivers/vfio | |
| parent | 70ec81c2e2b4005465ad0d042e90b36087c36104 (diff) | |
| parent | cbb325e77fbe62a06184175aa98c9eb98736c3e8 (diff) | |
| download | linux-30aec6e1bb617e1349d7fa5498898d7d4351d71e.tar.gz linux-30aec6e1bb617e1349d7fa5498898d7d4351d71e.tar.bz2 linux-30aec6e1bb617e1349d7fa5498898d7d4351d71e.zip | |
Merge tag 'vfio-v6.10-rc1' of https://github.com/awilliam/linux-vfio
Pull vfio updates from Alex Williamson:
- The vfio fsl-mc bus driver has become orphaned. We'll consider
removing it in future releases if a new maintainer isn't found (Alex
Williamson)
- Improved usage of opaque data in vfio-pci INTx handling, avoiding
lookups of the eventfd through the interrupt and irqfd runtime paths
(Alex Williamson)
- Resolve an error path memory leak introduced in vfio-pci interrupt
code (Ye Bin)
- Addition of interrupt support for vfio devices exposed on the CDX
bus, including a new MSI allocation helper and export of existing
helpers for MSI alloc and free (Nipun Gupta)
- A new vfio-pci variant driver supporting migration of Intel QAT VF
devices for the GEN4 PFs (Xin Zeng & Yahui Cao)
- Resolve a possibly circular locking dependency in vfio-pci by
avoiding copy_to_user() from a PCI bus walk callback (Alex
Williamson)
- Trivial docs update to remove a duplicate semicolon (Foryun Ma)
* tag 'vfio-v6.10-rc1' of https://github.com/awilliam/linux-vfio:
vfio/pci: Restore zero affected bus reset devices warning
vfio: remove an extra semicolon
vfio/pci: Collect hot-reset devices to local buffer
vfio/qat: Add vfio_pci driver for Intel QAT SR-IOV VF devices
vfio/cdx: add interrupt support
genirq/msi: Add MSI allocation helper and export MSI functions
vfio/pci: fix potential memory leak in vfio_intx_enable()
vfio/pci: Pass eventfd context object through irqfd
vfio/pci: Pass eventfd context to IRQ handler
MAINTAINERS: Orphan vfio fsl-mc bus driver
Diffstat (limited to 'drivers/vfio')
| -rw-r--r-- | drivers/vfio/cdx/Makefile | 2 | ||||
| -rw-r--r-- | drivers/vfio/cdx/intr.c | 217 | ||||
| -rw-r--r-- | drivers/vfio/cdx/main.c | 63 | ||||
| -rw-r--r-- | drivers/vfio/cdx/private.h | 18 | ||||
| -rw-r--r-- | drivers/vfio/pci/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/vfio/pci/Makefile | 2 | ||||
| -rw-r--r-- | drivers/vfio/pci/qat/Kconfig | 12 | ||||
| -rw-r--r-- | drivers/vfio/pci/qat/Makefile | 3 | ||||
| -rw-r--r-- | drivers/vfio/pci/qat/main.c | 702 | ||||
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 81 | ||||
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 61 |
11 files changed, 1098 insertions, 65 deletions
diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile index cd4a2e6fe609..df92b320122a 100644 --- a/drivers/vfio/cdx/Makefile +++ b/drivers/vfio/cdx/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o -vfio-cdx-objs := main.o +vfio-cdx-objs := main.o intr.o diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c new file mode 100644 index 000000000000..986fa2a45fa4 --- /dev/null +++ b/drivers/vfio/cdx/intr.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#include <linux/vfio.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/eventfd.h> +#include <linux/msi.h> +#include <linux/interrupt.h> + +#include "linux/cdx/cdx_bus.h" +#include "private.h" + +static irqreturn_t vfio_cdx_msihandler(int irq_no, void *arg) +{ + struct eventfd_ctx *trigger = arg; + + eventfd_signal(trigger); + return IRQ_HANDLED; +} + +static int vfio_cdx_msi_enable(struct vfio_cdx_device *vdev, int nvec) +{ + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + struct device *dev = vdev->vdev.dev; + int msi_idx, ret; + + vdev->cdx_irqs = kcalloc(nvec, sizeof(struct vfio_cdx_irq), GFP_KERNEL); + if (!vdev->cdx_irqs) + return -ENOMEM; + + ret = cdx_enable_msi(cdx_dev); + if (ret) { + kfree(vdev->cdx_irqs); + return ret; + } + + /* Allocate cdx MSIs */ + ret = msi_domain_alloc_irqs(dev, MSI_DEFAULT_DOMAIN, nvec); + if (ret) { + cdx_disable_msi(cdx_dev); + kfree(vdev->cdx_irqs); + return ret; + } + + for (msi_idx = 0; msi_idx < nvec; msi_idx++) + vdev->cdx_irqs[msi_idx].irq_no = msi_get_virq(dev, msi_idx); + + vdev->msi_count = nvec; + vdev->config_msi = 1; + + return 0; +} + +static int vfio_cdx_msi_set_vector_signal(struct vfio_cdx_device *vdev, + int vector, int fd) +{ + struct eventfd_ctx *trigger; + int irq_no, ret; + + if (vector < 0 || vector >= vdev->msi_count) + return -EINVAL; + + irq_no = vdev->cdx_irqs[vector].irq_no; + + if (vdev->cdx_irqs[vector].trigger) { + free_irq(irq_no, vdev->cdx_irqs[vector].trigger); + kfree(vdev->cdx_irqs[vector].name); + eventfd_ctx_put(vdev->cdx_irqs[vector].trigger); + vdev->cdx_irqs[vector].trigger = NULL; + } + + if (fd < 0) + return 0; + + vdev->cdx_irqs[vector].name = kasprintf(GFP_KERNEL, "vfio-msi[%d](%s)", + vector, dev_name(vdev->vdev.dev)); + if (!vdev->cdx_irqs[vector].name) + return -ENOMEM; + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + kfree(vdev->cdx_irqs[vector].name); + return PTR_ERR(trigger); + } + + ret = request_irq(irq_no, vfio_cdx_msihandler, 0, + vdev->cdx_irqs[vector].name, trigger); + if (ret) { + kfree(vdev->cdx_irqs[vector].name); + eventfd_ctx_put(trigger); + return ret; + } + + vdev->cdx_irqs[vector].trigger = trigger; + + return 0; +} + +static int vfio_cdx_msi_set_block(struct vfio_cdx_device *vdev, + unsigned int start, unsigned int count, + int32_t *fds) +{ + int i, j, ret = 0; + + if (start >= vdev->msi_count || start + count > vdev->msi_count) + return -EINVAL; + + for (i = 0, j = start; i < count && !ret; i++, j++) { + int fd = fds ? fds[i] : -1; + + ret = vfio_cdx_msi_set_vector_signal(vdev, j, fd); + } + + if (ret) { + for (--j; j >= (int)start; j--) + vfio_cdx_msi_set_vector_signal(vdev, j, -1); + } + + return ret; +} + +static void vfio_cdx_msi_disable(struct vfio_cdx_device *vdev) +{ + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + struct device *dev = vdev->vdev.dev; + + vfio_cdx_msi_set_block(vdev, 0, vdev->msi_count, NULL); + + if (!vdev->config_msi) + return; + + msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN); + cdx_disable_msi(cdx_dev); + kfree(vdev->cdx_irqs); + + vdev->cdx_irqs = NULL; + vdev->msi_count = 0; + vdev->config_msi = 0; +} + +static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev, + unsigned int index, unsigned int start, + unsigned int count, u32 flags, + void *data) +{ + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + int i; + + if (start + count > cdx_dev->num_msi) + return -EINVAL; + + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) { + vfio_cdx_msi_disable(vdev); + return 0; + } + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + s32 *fds = data; + int ret; + + if (vdev->config_msi) + return vfio_cdx_msi_set_block(vdev, start, count, + fds); + ret = vfio_cdx_msi_enable(vdev, cdx_dev->num_msi); + if (ret) + return ret; + + ret = vfio_cdx_msi_set_block(vdev, start, count, fds); + if (ret) + vfio_cdx_msi_disable(vdev); + + return ret; + } + + for (i = start; i < start + count; i++) { + if (!vdev->cdx_irqs[i].trigger) + continue; + if (flags & VFIO_IRQ_SET_DATA_NONE) { + eventfd_signal(vdev->cdx_irqs[i].trigger); + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + u8 *bools = data; + + if (bools[i - start]) + eventfd_signal(vdev->cdx_irqs[i].trigger); + } + } + + return 0; +} + +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev, + u32 flags, unsigned int index, + unsigned int start, unsigned int count, + void *data) +{ + if (flags & VFIO_IRQ_SET_ACTION_TRIGGER) + return vfio_cdx_set_msi_trigger(vdev, index, start, + count, flags, data); + else + return -EINVAL; +} + +/* Free All IRQs for the given device */ +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev) +{ + /* + * Device does not support any interrupt or the interrupts + * were not configured + */ + if (!vdev->cdx_irqs) + return; + + vfio_cdx_set_msi_trigger(vdev, 0, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL); +} diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c index 9cff8d75789e..67465fad5b4b 100644 --- a/drivers/vfio/cdx/main.c +++ b/drivers/vfio/cdx/main.c @@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev) kfree(vdev->regions); cdx_dev_reset(core_vdev->dev); + vfio_cdx_irqs_cleanup(vdev); } static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags, @@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev, info.flags |= VFIO_DEVICE_FLAGS_RESET; info.num_regions = cdx_dev->res_count; - info.num_irqs = 0; + info.num_irqs = cdx_dev->num_msi ? 1 : 0; return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; } @@ -152,6 +153,62 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev, return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; } +static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev, + struct vfio_irq_info __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_irq_info, count); + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + struct vfio_irq_info info; + + if (copy_from_user(&info, arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= 1) + return -EINVAL; + + if (!cdx_dev->num_msi) + return -EINVAL; + + info.flags = VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE; + info.count = cdx_dev->num_msi; + + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; +} + +static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev, + struct vfio_irq_set __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_irq_set, count); + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev); + struct vfio_irq_set hdr; + size_t data_size = 0; + u8 *data = NULL; + int ret = 0; + + if (copy_from_user(&hdr, arg, minsz)) + return -EFAULT; + + ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi, + 1, &data_size); + if (ret) + return ret; + + if (data_size) { + data = memdup_user(arg->data, data_size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index, + hdr.start, hdr.count, data); + kfree(data); + + return ret; +} + static long vfio_cdx_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { @@ -164,6 +221,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev, return vfio_cdx_ioctl_get_info(vdev, uarg); case VFIO_DEVICE_GET_REGION_INFO: return vfio_cdx_ioctl_get_region_info(vdev, uarg); + case VFIO_DEVICE_GET_IRQ_INFO: + return vfio_cdx_ioctl_get_irq_info(vdev, uarg); + case VFIO_DEVICE_SET_IRQS: + return vfio_cdx_ioctl_set_irqs(vdev, uarg); case VFIO_DEVICE_RESET: return cdx_dev_reset(core_vdev->dev); default: diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h index 8e9d25913728..dc56729b3114 100644 --- a/drivers/vfio/cdx/private.h +++ b/drivers/vfio/cdx/private.h @@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index) return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT); } +struct vfio_cdx_irq { + u32 flags; + u32 count; + int irq_no; + struct eventfd_ctx *trigger; + char *name; +}; + struct vfio_cdx_region { u32 flags; u32 type; @@ -23,8 +31,18 @@ struct vfio_cdx_region { struct vfio_cdx_device { struct vfio_device vdev; struct vfio_cdx_region *regions; + struct vfio_cdx_irq *cdx_irqs; u32 flags; #define BME_SUPPORT BIT(0) + u32 msi_count; + u8 config_msi; }; +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev, + u32 flags, unsigned int index, + unsigned int start, unsigned int count, + void *data); + +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev); + #endif /* VFIO_CDX_PRIVATE_H */ diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 15821a2d77d2..bf50ffa10bde 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -69,4 +69,6 @@ source "drivers/vfio/pci/virtio/Kconfig" source "drivers/vfio/pci/nvgrace-gpu/Kconfig" +source "drivers/vfio/pci/qat/Kconfig" + endmenu diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index ce7a61f1d912..cf00c0a7e55c 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -17,3 +17,5 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds/ obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/ obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/ + +obj-$(CONFIG_QAT_VFIO_PCI) += qat/ diff --git a/drivers/vfio/pci/qat/Kconfig b/drivers/vfio/pci/qat/Kconfig new file mode 100644 index 000000000000..bf52cfa4b595 --- /dev/null +++ b/drivers/vfio/pci/qat/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +config QAT_VFIO_PCI + tristate "VFIO support for QAT VF PCI devices" + select VFIO_PCI_CORE + depends on CRYPTO_DEV_QAT_4XXX + help + This provides migration support for Intel(R) QAT Virtual Function + using the VFIO framework. + + To compile this as a module, choose M here: the module + will be called qat_vfio_pci. If you don't know what to do here, + say N. diff --git a/drivers/vfio/pci/qat/Makefile b/drivers/vfio/pci/qat/Makefile new file mode 100644 index 000000000000..5fe5c4ec19d3 --- /dev/null +++ b/drivers/vfio/pci/qat/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_QAT_VFIO_PCI) += qat_vfio_pci.o +qat_vfio_pci-y := main.o diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c new file mode 100644 index 000000000000..e36740a282e7 --- /dev/null +++ b/drivers/vfio/pci/qat/main.c @@ -0,0 +1,702 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ + +#include <linux/anon_inodes.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/sizes.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vfio_pci_core.h> +#include <linux/qat/qat_mig_dev.h> + +/* + * The migration data of each Intel QAT VF device is encapsulated into a + * 4096 bytes block. The data consists of two parts. + * The first is a pre-configured set of attributes of the VF being migrated, + * which are only set when it is created. This can be migrated during pre-copy + * stage and used for a device compatibility check. + * The second is the VF state. This includes the required MMIO regions and + * the shadow states maintained by the QAT PF driver. This part can only be + * saved when the VF is fully quiesced and be migrated during stop-copy stage. + * Both these 2 parts of data are saved in hierarchical structures including + * a preamble section and several raw state sections. + * When the pre-configured part of the migration data is fully retrieved from + * user space, the preamble section are used to validate the correctness of + * the data blocks and check the version compatibility. The raw state sections + * are then used to do a device compatibility check. + * When the device transits from RESUMING state, the VF states are extracted + * from the raw state sections of the VF state part of the migration data and + * then loaded into the device. + */ + +struct qat_vf_migration_file { + struct file *filp; + /* protects migration region context */ + struct mutex lock; + bool disabled; + struct qat_vf_core_device *qat_vdev; + ssize_t filled_size; +}; + +struct qat_vf_core_device { + struct vfio_pci_core_device core_device; + struct qat_mig_dev *mdev; + /* protects migration state */ + struct mutex state_mutex; + enum vfio_device_mig_state mig_state; + struct qat_vf_migration_file *resuming_migf; + struct qat_vf_migration_file *saving_migf; +}; + +static int qat_vf_pci_open_device(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = + container_of(core_vdev, struct qat_vf_core_device, + core_device.vdev); + struct vfio_pci_core_device *vdev = &qat_vdev->core_device; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + ret = qat_vfmig_open(qat_vdev->mdev); + if (ret) { + vfio_pci_core_disable(vdev); + return ret; + } + qat_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + + vfio_pci_core_finish_enable(vdev); + + return 0; +} + +static void qat_vf_disable_fd(struct qat_vf_migration_file *migf) +{ + mutex_lock(&migf->lock); + migf->disabled = true; + migf->filp->f_pos = 0; + migf->filled_size = 0; + mutex_unlock(&migf->lock); +} + +static void qat_vf_disable_fds(struct qat_vf_core_device *qat_vdev) +{ + if (qat_vdev->resuming_migf) { + qat_vf_disable_fd(qat_vdev->resuming_migf); + fput(qat_vdev->resuming_migf->filp); + qat_vdev->resuming_migf = NULL; + } + + if (qat_vdev->saving_migf) { + qat_vf_disable_fd(qat_vdev->saving_migf); + fput(qat_vdev->saving_migf->filp); + qat_vdev->saving_migf = NULL; + } +} + +static void qat_vf_pci_close_device(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = container_of(core_vdev, + struct qat_vf_core_device, core_device.vdev); + + qat_vfmig_close(qat_vdev->mdev); + qat_vf_disable_fds(qat_vdev); + vfio_pci_core_close_device(core_vdev); +} + +static long qat_vf_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct qat_vf_migration_file *migf = filp->private_data; + struct qat_vf_core_device *qat_vdev = migf->qat_vdev; + struct qat_mig_dev *mig_dev = qat_vdev->mdev; + struct vfio_precopy_info info; + loff_t *pos = &filp->f_pos; + unsigned long minsz; + int ret = 0; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&qat_vdev->state_mutex); + if (qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && + qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + mutex_unlock(&qat_vdev->state_mutex); + return -EINVAL; + } + + mutex_lock(&migf->lock); + if (migf->disabled) { + ret = -ENODEV; + goto out; + } + + if (*pos > mig_dev->setup_size) { + ret = -EINVAL; + goto out; + } + + info.dirty_bytes = 0; + info.initial_bytes = mig_dev->setup_size - *pos; + +out: + mutex_unlock(&migf->lock); + mutex_unlock(&qat_vdev->state_mutex); + if (ret) + return ret; + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; +} + +static ssize_t qat_vf_save_read(struct file *filp, char __user *buf, + size_t len, loff_t *pos) +{ + struct qat_vf_migration_file *migf = filp->private_data; + struct qat_mig_dev *mig_dev = migf->qat_vdev->mdev; + ssize_t done = 0; + loff_t *offs; + int ret; + + if (pos) + return -ESPIPE; + offs = &filp->f_pos; + + mutex_lock(&migf->lock); + if (*offs > migf->filled_size || *offs < 0) { + done = -EINVAL; + goto out_unlock; + } + + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + len = min_t(size_t, migf->filled_size - *offs, len); + if (len) { + ret = copy_to_user(buf, mig_dev->state + *offs, len); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *offs += len; + done = len; + } + +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static int qat_vf_release_file(struct inode *inode, struct file *filp) +{ + struct qat_vf_migration_file *migf = filp->private_data; + + qat_vf_disable_fd(migf); + mutex_destroy(&migf->lock); + kfree(migf); + + return 0; +} + +static const struct file_operations qat_vf_save_fops = { + .owner = THIS_MODULE, + .read = qat_vf_save_read, + .unlocked_ioctl = qat_vf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .release = qat_vf_release_file, + .llseek = no_llseek, +}; + +static int qat_vf_save_state(struct qat_vf_core_device *qat_vdev, + struct qat_vf_migration_file *migf) +{ + int ret; + + ret = qat_vfmig_save_state(qat_vdev->mdev); + if (ret) + return ret; + migf->filled_size = qat_vdev->mdev->state_size; + + return 0; +} + +static int qat_vf_save_setup(struct qat_vf_core_device *qat_vdev, + struct qat_vf_migration_file *migf) +{ + int ret; + + ret = qat_vfmig_save_setup(qat_vdev->mdev); + if (ret) + return ret; + migf->filled_size = qat_vdev->mdev->setup_size; + + return 0; +} + +/* + * Allocate a file handler for user space and then save the migration data for + * the device being migrated. If this is called in the pre-copy stage, save the + * pre-configured device data. Otherwise, if this is called in the stop-copy + * stage, save the device state. In both cases, update the data size which can + * then be read from user space. + */ +static struct qat_vf_migration_file * +qat_vf_save_device_data(struct qat_vf_core_device *qat_vdev, bool pre_copy) +{ + struct qat_vf_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("qat_vf_mig", &qat_vf_save_fops, + migf, O_RDONLY); + ret = PTR_ERR_OR_ZERO(migf->filp); + if (ret) { + kfree(migf); + return ERR_PTR(ret); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + + if (pre_copy) + ret = qat_vf_save_setup(qat_vdev, migf); + else + ret = qat_vf_save_state(qat_vdev, migf); + if (ret) { + fput(migf->filp); + return ERR_PTR(ret); + } + + migf->qat_vdev = qat_vdev; + + return migf; +} + +static ssize_t qat_vf_resume_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct qat_vf_migration_file *migf = filp->private_data; + struct qat_mig_dev *mig_dev = migf->qat_vdev->mdev; + loff_t end, *offs; + ssize_t done = 0; + int ret; + + if (pos) + return -ESPIPE; + offs = &filp->f_pos; + + if (*offs < 0 || + check_add_overflow((loff_t)len, *offs, &end)) + return -EOVERFLOW; + + if (end > mig_dev->state_size) + return -ENOMEM; + + mutex_lock(&migf->lock); + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + ret = copy_from_user(mig_dev->state + *offs, buf, len); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *offs += len; + migf->filled_size += len; + + /* + * Load the pre-configured device data first to check if the target + * device is compatible with the source device. + */ + ret = qat_vfmig_load_setup(mig_dev, migf->filled_size); + if (ret && ret != -EAGAIN) { + done = ret; + goto out_unlock; + } + done = len; + +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static const struct file_operations qat_vf_resume_fops = { + .owner = THIS_MODULE, + .write = qat_vf_resume_write, + .release = qat_vf_release_file, + .llseek = no_llseek, +}; + +static struct qat_vf_migration_file * +qat_vf_resume_device_data(struct qat_vf_core_device *qat_vdev) +{ + struct qat_vf_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("qat_vf_mig", &qat_vf_resume_fops, migf, O_WRONLY); + ret = PTR_ERR_OR_ZERO(migf->filp); + if (ret) { + kfree(migf); + return ERR_PTR(ret); + } + + migf->qat_vdev = qat_vdev; + migf->filled_size = 0; + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + + return migf; +} + +static int qat_vf_load_device_data(struct qat_vf_core_device *qat_vdev) +{ + return qat_vfmig_load_state(qat_vdev->mdev); +} + +static struct file *qat_vf_pci_step_device_state(struct qat_vf_core_device *qat_vdev, u32 new) +{ + u32 cur = qat_vdev->mig_state; + int ret; + + /* + * As the device is not capable of just stopping P2P DMAs, suspend the + * device completely once any of the P2P states are reached. + * When it is suspended, all its MMIO registers can still be operated + * correctly, jobs submitted through ring are queued while no jobs are + * processed by the device. The MMIO states can be safely migrated to + * the target VF during stop-copy stage and restored correctly in the + * target VF. All queued jobs can be resumed then. + */ + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + ret = qat_vfmig_suspend(qat_vdev->mdev); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { + qat_vfmig_resume(qat_vdev->mdev); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P)) + return NULL; + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct qat_vf_migration_file *migf; + + migf = qat_vf_save_device_data(qat_vdev, false); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + qat_vdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { + struct qat_vf_migration_file *migf; + + migf = qat_vf_resume_device_data(qat_vdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + qat_vdev->resuming_migf = migf; + return migf->filp; + } + + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { + qat_vf_disable_fds(qat_vdev); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + struct qat_vf_migration_file *migf; + + migf = qat_vf_save_device_data(qat_vdev, true); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + qat_vdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct qat_vf_migration_file *migf = qat_vdev->saving_migf; + + if (!migf) + return ERR_PTR(-EINVAL); + ret = qat_vf_save_state(qat_vdev, migf); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { + ret = qat_vf_load_device_data(qat_vdev); + if (ret) + return ERR_PTR(ret); + + qat_vf_disable_fds(qat_vdev); + return NULL; + } + + /* vfio_mig_get_next_state() does not use arcs other than the above */ + WARN_ON(true); + return ERR_PTR(-EINVAL); +} + +static void qat_vf_reset_done(struct qat_vf_core_device *qat_vdev) +{ + qat_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + qat_vfmig_reset(qat_vdev->mdev); + qat_vf_disable_fds(qat_vdev); +} + +static struct file *qat_vf_pci_set_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct qat_vf_core_device *qat_vdev = container_of(vdev, + struct qat_vf_core_device, core_device.vdev); + enum vfio_device_mig_state next_state; + struct file *res = NULL; + int ret; + + mutex_lock(&qat_vdev->state_mutex); + while (new_state != qat_vdev->mig_state) { + ret = vfio_mig_get_next_state(vdev, qat_vdev->mig_state, + new_state, &next_state); + if (ret) { + res = ERR_PTR(ret); + break; + } + res = qat_vf_pci_step_device_state(qat_vdev, next_state); + if (IS_ERR(res)) + break; + qat_vdev->mig_state = next_state; + if (WARN_ON(res && new_state != qat_vdev->mig_state)) { + fput(res); + res = ERR_PTR(-EINVAL); + break; + } + } + mutex_unlock(&qat_vdev->state_mutex); + + return res; +} + +static int qat_vf_pci_get_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state *curr_state) +{ + struct qat_vf_core_device *qat_vdev = container_of(vdev, + struct qat_vf_core_device, core_device.vdev); + + mutex_lock(&qat_vdev->state_mutex); + *curr_state = qat_vdev->mig_state; + mutex_unlock(&qat_vdev->state_mutex); + + return 0; +} + +static int qat_vf_pci_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + struct qat_vf_core_device *qat_vdev = container_of(vdev, + struct qat_vf_core_device, core_device.vdev); + + mutex_lock(&qat_vdev->state_mutex); + *stop_copy_length = qat_vdev->mdev->state_size; + mutex_unlock(&qat_vdev->state_mutex); + + return 0; +} + +static const struct vfio_migration_ops qat_vf_pci_mig_ops = { + .migration_set_state = qat_vf_pci_set_device_state, + .migration_get_state = qat_vf_pci_get_device_state, + .migration_get_data_size = qat_vf_pci_get_data_size, +}; + +static void qat_vf_pci_release_dev(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = container_of(core_vdev, + struct qat_vf_core_device, core_device.vdev); + + qat_vfmig_cleanup(qat_vdev->mdev); + qat_vfmig_destroy(qat_vdev->mdev); + mutex_destroy(&qat_vdev->state_mutex); + vfio_pci_core_release_dev(core_vdev); +} + +static int qat_vf_pci_init_dev(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = container_of(core_vdev, + struct qat_vf_core_device, core_device.vdev); + struct qat_mig_dev *mdev; + struct pci_dev *parent; + int ret, vf_id; + + core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + core_vdev->mig_ops = &qat_vf_pci_mig_ops; + + ret = vfio_pci_core_init_dev(core_vdev); + if (ret) + return ret; + + mutex_init(&qat_vdev->state_mutex); + + parent = pci_physfn(qat_vdev->core_device.pdev); + vf_id = pci_iov_vf_id(qat_vdev->core_device.pdev); + if (vf_id < 0) { + ret = -ENODEV; + goto err_rel; + } + + mdev = qat_vfmig_create(parent, vf_id); + if (IS_ERR(mdev)) { + ret = PTR_ERR(mdev); + goto |
