summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/driver-api/vfio-mediated-device.rst108
-rw-r--r--Documentation/driver-api/vfio.rst82
-rw-r--r--Documentation/s390/vfio-ap.rst1
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/s390/Kconfig8
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--drivers/gpu/drm/i915/Kconfig3
-rw-r--r--drivers/vfio/container.c7
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c2
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c4
-rw-r--r--drivers/vfio/group.c46
-rw-r--r--drivers/vfio/mdev/Kconfig8
-rw-r--r--drivers/vfio/mdev/mdev_sysfs.c2
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c4
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c79
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h28
-rw-r--r--drivers/vfio/pci/mlx5/main.c261
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c6
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c7
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c10
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c2
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c12
-rw-r--r--drivers/vfio/platform/vfio_platform_irq.c8
-rw-r--r--drivers/vfio/vfio.h25
-rw-r--r--drivers/vfio/vfio_iommu_type1.c248
-rw-r--r--drivers/vfio/vfio_main.c70
-rw-r--r--drivers/vfio/virqfd.c2
-rw-r--r--include/linux/vfio.h6
-rw-r--r--include/uapi/linux/vfio.h15
-rw-r--r--samples/Kconfig19
-rw-r--r--samples/vfio-mdev/README.rst100
33 files changed, 756 insertions, 422 deletions
diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst
index fdf7d69378ec..bbd548b66b42 100644
--- a/Documentation/driver-api/vfio-mediated-device.rst
+++ b/Documentation/driver-api/vfio-mediated-device.rst
@@ -60,7 +60,7 @@ devices as examples, as these devices are the first devices to use this module::
| mdev.ko |
| +-----------+ | mdev_register_parent() +--------------+
| | | +<------------------------+ |
- | | | | | nvidia.ko |<-> physical
+ | | | | | ccw_device.ko|<-> physical
| | | +------------------------>+ | device
| | | | callbacks +--------------+
| | Physical | |
@@ -69,12 +69,6 @@ devices as examples, as these devices are the first devices to use this module::
| | | | | i915.ko |<-> physical
| | | +------------------------>+ | device
| | | | callbacks +--------------+
- | | | |
- | | | | mdev_register_parent() +--------------+
- | | | +<------------------------+ |
- | | | | | ccw_device.ko|<-> physical
- | | | +------------------------>+ | device
- | | | | callbacks +--------------+
| +-----------+ |
+---------------+
@@ -270,106 +264,6 @@ these callbacks are supported in the TYPE1 IOMMU module. To enable them for
other IOMMU backend modules, such as PPC64 sPAPR module, they need to provide
these two callback functions.
-Using the Sample Code
-=====================
-
-mtty.c in samples/vfio-mdev/ directory is a sample driver program to
-demonstrate how to use the mediated device framework.
-
-The sample driver creates an mdev device that simulates a serial port over a PCI
-card.
-
-1. Build and load the mtty.ko module.
-
- This step creates a dummy device, /sys/devices/virtual/mtty/mtty/
-
- Files in this device directory in sysfs are similar to the following::
-
- # tree /sys/devices/virtual/mtty/mtty/
- /sys/devices/virtual/mtty/mtty/
- |-- mdev_supported_types
- | |-- mtty-1
- | | |-- available_instances
- | | |-- create
- | | |-- device_api
- | | |-- devices
- | | `-- name
- | `-- mtty-2
- | |-- available_instances
- | |-- create
- | |-- device_api
- | |-- devices
- | `-- name
- |-- mtty_dev
- | `-- sample_mtty_dev
- |-- power
- | |-- autosuspend_delay_ms
- | |-- control
- | |-- runtime_active_time
- | |-- runtime_status
- | `-- runtime_suspended_time
- |-- subsystem -> ../../../../class/mtty
- `-- uevent
-
-2. Create a mediated device by using the dummy device that you created in the
- previous step::
-
- # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \
- /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create
-
-3. Add parameters to qemu-kvm::
-
- -device vfio-pci,\
- sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
-
-4. Boot the VM.
-
- In the Linux guest VM, with no hardware on the host, the device appears
- as follows::
-
- # lspci -s 00:05.0 -xxvv
- 00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
- Subsystem: Device 4348:3253
- Physical Slot: 5
- Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
- Stepping- SERR- FastB2B- DisINTx-
- Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
- <TAbort- <MAbort- >SERR- <PERR- INTx-
- Interrupt: pin A routed to IRQ 10
- Region 0: I/O ports at c150 [size=8]
- Region 1: I/O ports at c158 [size=8]
- Kernel driver in use: serial
- 00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
- 10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
- 20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
- 30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
-
- In the Linux guest VM, dmesg output for the device is as follows:
-
- serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
- 0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
- 0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
-
-
-5. In the Linux guest VM, check the serial ports::
-
- # setserial -g /dev/ttyS*
- /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
- /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
- /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
-
-6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
- /dev/ttyS2 with hardware flow control disabled.
-
-7. Type data on the minicom terminal or send data to the terminal emulation
- program and read the data.
-
- Data is loop backed from hosts mtty driver.
-
-8. Destroy the mediated device that you created::
-
- # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
-
References
==========
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
index c663b6f97825..50b690f7f663 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -249,19 +249,21 @@ VFIO bus driver API
VFIO bus drivers, such as vfio-pci make use of only a few interfaces
into VFIO core. When devices are bound and unbound to the driver,
-the driver should call vfio_register_group_dev() and
-vfio_unregister_group_dev() respectively::
+Following interfaces are called when devices are bound to and
+unbound from the driver::
- void vfio_init_group_dev(struct vfio_device *device,
- struct device *dev,
- const struct vfio_device_ops *ops);
- void vfio_uninit_group_dev(struct vfio_device *device);
int vfio_register_group_dev(struct vfio_device *device);
+ int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
-The driver should embed the vfio_device in its own structure and call
-vfio_init_group_dev() to pre-configure it before going to registration
-and call vfio_uninit_group_dev() after completing the un-registration.
+The driver should embed the vfio_device in its own structure and use
+vfio_alloc_device() to allocate the structure, and can register
+@init/@release callbacks to manage any private state wrapping the
+vfio_device::
+
+ vfio_alloc_device(dev_struct, member, dev, ops);
+ void vfio_put_device(struct vfio_device *device);
+
vfio_register_group_dev() indicates to the core to begin tracking the
iommu_group of the specified dev and register the dev as owned by a VFIO bus
driver. Once vfio_register_group_dev() returns it is possible for userspace to
@@ -270,28 +272,64 @@ ready before calling it. The driver provides an ops structure for callbacks
similar to a file operations structure::
struct vfio_device_ops {
- int (*open)(struct vfio_device *vdev);
+ char *name;
+ int (*init)(struct vfio_device *vdev);
void (*release)(struct vfio_device *vdev);
+ int (*bind_iommufd)(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+ void (*unbind_iommufd)(struct vfio_device *vdev);
+ int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
+ int (*open_device)(struct vfio_device *vdev);
+ void (*close_device)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos);
- ssize_t (*write)(struct vfio_device *vdev,
- const char __user *buf,
- size_t size, loff_t *ppos);
+ ssize_t (*write)(struct vfio_device *vdev, const char __user *buf,
+ size_t count, loff_t *size);
long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
- int (*mmap)(struct vfio_device *vdev,
- struct vm_area_struct *vma);
+ int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
+ void (*request)(struct vfio_device *vdev, unsigned int count);
+ int (*match)(struct vfio_device *vdev, char *buf);
+ void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
+ int (*device_feature)(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz);
};
Each function is passed the vdev that was originally registered
-in the vfio_register_group_dev() call above. This allows the bus driver
-to obtain its private data using container_of(). The open/release
-callbacks are issued when a new file descriptor is created for a
-device (via VFIO_GROUP_GET_DEVICE_FD). The ioctl interface provides
-a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap
-interfaces implement the device region access defined by the device's
-own VFIO_DEVICE_GET_REGION_INFO ioctl.
+in the vfio_register_group_dev() or vfio_register_emulated_iommu_dev()
+call above. This allows the bus driver to obtain its private data using
+container_of().
+
+::
+
+ - The init/release callbacks are issued when vfio_device is initialized
+ and released.
+
+ - The open/close device callbacks are issued when the first
+ instance of a file descriptor for the device is created (eg.
+ via VFIO_GROUP_GET_DEVICE_FD) for a user session.
+
+ - The ioctl callback provides a direct pass through for some VFIO_DEVICE_*
+ ioctls.
+
+ - The [un]bind_iommufd callbacks are issued when the device is bound to
+ and unbound from iommufd.
+
+ - The attach_ioas callback is issued when the device is attached to an
+ IOAS managed by the bound iommufd. The attached IOAS is automatically
+ detached when the device is unbound from iommufd.
+
+ - The read/write/mmap callbacks implement the device region access defined
+ by the device's own VFIO_DEVICE_GET_REGION_INFO ioctl.
+
+ - The request callback is issued when device is going to be unregistered,
+ such as when trying to unbind the device from the vfio bus driver.
+ - The dma_unmap callback is issued when a range of iovas are unmapped
+ in the container or IOAS attached by the device. Drivers which make
+ use of the vfio page pinning interface must implement this callback in
+ order to unpin pages within the dma_unmap range. Drivers must tolerate
+ this callback even before calls to open_device().
PPC64 sPAPR implementation note
-------------------------------
diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst
index 00f4a04f6d4c..d46e98c7c1ec 100644
--- a/Documentation/s390/vfio-ap.rst
+++ b/Documentation/s390/vfio-ap.rst
@@ -553,7 +553,6 @@ These are the steps:
* ZCRYPT
* S390_AP_IOMMU
* VFIO
- * VFIO_MDEV
* KVM
If using make menuconfig select the following to build the vfio_ap module::
diff --git a/MAINTAINERS b/MAINTAINERS
index a3d7c8945762..84bd678b8af9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21882,7 +21882,6 @@ F: tools/testing/selftests/filesystems/fat/
VFIO DRIVER
M: Alex Williamson <alex.williamson@redhat.com>
-R: Cornelia Huck <cohuck@redhat.com>
L: kvm@vger.kernel.org
S: Maintained
T: git https://github.com/awilliam/linux-vfio.git
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 933771b0b07a..078cd1a773a3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -714,7 +714,9 @@ config EADM_SCH
config VFIO_CCW
def_tristate n
prompt "Support for VFIO-CCW subchannels"
- depends on S390_CCW_IOMMU && VFIO_MDEV
+ depends on S390_CCW_IOMMU
+ depends on VFIO
+ select VFIO_MDEV
help
This driver allows usage of I/O subchannels via VFIO-CCW.
@@ -724,8 +726,10 @@ config VFIO_CCW
config VFIO_AP
def_tristate n
prompt "VFIO support for AP devices"
- depends on S390_AP_IOMMU && VFIO_MDEV && KVM
+ depends on S390_AP_IOMMU && KVM
+ depends on VFIO
depends on ZCRYPT
+ select VFIO_MDEV
help
This driver grants access to Adjunct Processor (AP) devices
via the VFIO mediated device interface.
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 74b35ec2ad28..3c68fe49042c 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -594,7 +594,6 @@ CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
-CONFIG_VFIO_MDEV=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index cec71268e3bc..9ab91632f74c 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -583,7 +583,6 @@ CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
-CONFIG_VFIO_MDEV=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 9c0990c0ec87..3d1cd04ac5fa 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -127,9 +127,10 @@ config DRM_I915_GVT_KVMGT
depends on X86
depends on 64BIT
depends on KVM
- depends on VFIO_MDEV
+ depends on VFIO
select DRM_I915_GVT
select KVM_EXTERNAL_WRITE_TRACKING
+ select VFIO_MDEV
help
Choose this option if you want to enable Intel GVT-g graphics
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index 89f10becf962..d53d08f16973 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -360,7 +360,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
{
struct vfio_container *container;
- container = kzalloc(sizeof(*container), GFP_KERNEL);
+ container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
if (!container)
return -ENOMEM;
@@ -376,11 +376,6 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
static int vfio_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver = container->iommu_driver;
-
- if (driver && driver->ops->notify)
- driver->ops->notify(container->iommu_data,
- VFIO_IOMMU_CONTAINER_CLOSE);
filep->private_data = NULL;
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index defeb8510ace..c89a047a4cd8 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -28,7 +28,7 @@ static int vfio_fsl_mc_open_device(struct vfio_device *core_vdev)
int i;
vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!vdev->regions)
return -ENOMEM;
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
index 64d01f3fb13d..c51229fccbd6 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
@@ -29,7 +29,7 @@ static int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev)
irq_count = mc_dev->obj_desc.irq_count;
- mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL);
+ mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL_ACCOUNT);
if (!mc_irq)
return -ENOMEM;
@@ -77,7 +77,7 @@ static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev,
if (fd < 0) /* Disable only */
return 0;
- irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
+ irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)",
hwirq, dev_name(&vdev->mc_dev->dev));
if (!irq->name)
return -ENOMEM;
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index e166ad7ce6e7..27d5ba7cf9dc 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -140,7 +140,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
ret = iommufd_vfio_compat_ioas_create(iommufd);
if (ret) {
- iommufd_ctx_put(group->iommufd);
+ iommufd_ctx_put(iommufd);
goto out_unlock;
}
@@ -157,6 +157,18 @@ out_unlock:
return ret;
}
+static void vfio_device_group_get_kvm_safe(struct vfio_device *device)
+{
+ spin_lock(&device->group->kvm_ref_lock);
+ if (!device->group->kvm)
+ goto unlock;
+
+ _vfio_device_get_kvm_safe(device, device->group->kvm);
+
+unlock:
+ spin_unlock(&device->group->kvm_ref_lock);
+}
+
static int vfio_device_group_open(struct vfio_device *device)
{
int ret;
@@ -167,13 +179,23 @@ static int vfio_device_group_open(struct vfio_device *device)
goto out_unlock;
}
+ mutex_lock(&device->dev_set->lock);
+
/*
- * Here we pass the KVM pointer with the group under the lock. If the
- * device driver will use it, it must obtain a reference and release it
- * during close_device.
+ * Before the first device open, get the KVM pointer currently
+ * associated with the group (if there is one) and obtain a reference
+ * now that will be held until the open_count reaches 0 again. Save
+ * the pointer in the device for use by drivers.
*/
- ret = vfio_device_open(device, device->group->iommufd,
- device->group->kvm);
+ if (device->open_count == 0)
+ vfio_device_group_get_kvm_safe(device);
+
+ ret = vfio_device_open(device, device->group->iommufd);
+
+ if (device->open_count == 0)
+ vfio_device_put_kvm(device);
+
+ mutex_unlock(&device->dev_set->lock);
out_unlock:
mutex_unlock(&device->group->group_lock);
@@ -183,7 +205,14 @@ out_unlock:
void vfio_device_group_close(struct vfio_device *device)
{
mutex_lock(&device->group->group_lock);
+ mutex_lock(&device->dev_set->lock);
+
vfio_device_close(device, device->group->iommufd);
+
+ if (device->open_count == 0)
+ vfio_device_put_kvm(device);
+
+ mutex_unlock(&device->dev_set->lock);
mutex_unlock(&device->group->group_lock);
}
@@ -453,6 +482,7 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
refcount_set(&group->drivers, 1);
mutex_init(&group->group_lock);
+ spin_lock_init(&group->kvm_ref_lock);
INIT_LIST_HEAD(&group->device_list);
mutex_init(&group->device_lock);
group->iommu_group = iommu_group;
@@ -806,9 +836,9 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
if (!vfio_file_is_group(file))
return;
- mutex_lock(&group->group_lock);
+ spin_lock(&group->kvm_ref_lock);
group->kvm = kvm;
- mutex_unlock(&group->group_lock);
+ spin_unlock(&group->kvm_ref_lock);
}
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
index 646dbed44eb2..e5fb84e07965 100644
--- a/drivers/vfio/mdev/Kconfig
+++ b/drivers/vfio/mdev/Kconfig
@@ -1,10 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
config VFIO_MDEV
- tristate "Mediated device driver framework"
- default n
- help
- Provides a framework to virtualize devices.
- See Documentation/driver-api/vfio-mediated-device.rst for more details.
-
- If you don't know what do here, say N.
+ tristate
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index abe3359dd477..e4490639d383 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -96,7 +96,7 @@ static MDEV_TYPE_ATTR_RO(device_api);
static ssize_t name_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
- return sprintf(buf, "%s\n",
+ return sysfs_emit(buf, "%s\n",
mtype->pretty_name ? mtype->pretty_name : mtype->sysfs_name);
}
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 0bba3b05c6c7..a117eaf21c14 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -744,7 +744,7 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev)
{
struct hisi_acc_vf_migration_file *migf;
- migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
if (!migf)
return ERR_PTR(-ENOMEM);
@@ -863,7 +863,7 @@ hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev)
struct hisi_acc_vf_migration_file *migf;
int ret;
- migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
if (!migf)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 64e68d13cb98..deed156e6165 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -7,6 +7,29 @@
enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id)
+{
+ int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_cap = NULL, *cap;
+ int ret;
+
+ query_cap = kzalloc(query_sz, GFP_KERNEL);
+ if (!query_cap)
+ return -ENOMEM;
+
+ ret = mlx5_vport_get_other_func_cap(mdev, func_id, query_cap,
+ MLX5_CAP_GENERAL_2);
+ if (ret)
+ goto out;
+
+ cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+ if (!MLX5_GET(cmd_hca_cap_2, cap, migratable))
+ ret = -EOPNOTSUPP;
+out:
+ kfree(query_cap);
+ return ret;
+}
+
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id);
static void
@@ -195,6 +218,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
if (mvdev->vf_id < 0)
goto end;
+ ret = mlx5vf_is_migratable(mvdev->mdev, mvdev->vf_id + 1);
+ if (ret)
+ goto end;
+
if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
&mvdev->vhca_id))
goto end;
@@ -373,7 +400,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *buf;
int ret;
- buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
if (!buf)
return ERR_PTR(-ENOMEM);
@@ -473,7 +500,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
}
static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
- size_t image_size)
+ size_t image_size, bool initial_pre_copy)
{
struct mlx5_vf_migration_file *migf = header_buf->migf;
struct mlx5_vf_migration_header header = {};
@@ -481,7 +508,9 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
struct page *page;
u8 *to_buff;
- header.image_size = cpu_to_le64(image_size);
+ header.record_size = cpu_to_le64(image_size);
+ header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY);
+ header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA);
page = mlx5vf_get_migration_page(header_buf, 0);
if (!page)
return -EINVAL;
@@ -489,12 +518,13 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
memcpy(to_buff, &header, sizeof(header));
kunmap_local(to_buff);
header_buf->length = sizeof(header);
- header_buf->header_image_size = image_size;
header_buf->start_pos = header_buf->migf->max_pos;
migf->max_pos += header_buf->length;
spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&header_buf->buf_elm, &migf->buf_list);
spin_unlock_irqrestore(&migf->list_lock, flags);
+ if (initial_pre_copy)
+ migf->pre_copy_initial_bytes += sizeof(header);
return 0;
}
@@ -508,11 +538,14 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
if (!status) {
size_t image_size;
unsigned long flags;
+ bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY &&
+ !async_data->last_chunk;
image_size = MLX5_GET(save_vhca_state_out, async_data->out,
actual_image_size);
if (async_data->header_buf) {
- status = add_buf_header(async_data->header_buf, image_size);
+ status = add_buf_header(async_data->header_buf, image_size,
+ initial_pre_copy);
if (status)
goto err;
}
@@ -522,6 +555,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
spin_unlock_irqrestore(&migf->list_lock, flags);
+ if (initial_pre_copy)
+ migf->pre_copy_initial_bytes += image_size;
migf->state = async_data->last_chunk ?
MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY;
wake_up_interruptible(&migf->poll_wait);
@@ -583,11 +618,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
}
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
- header_buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
- if (IS_ERR(header_buf)) {
- err = PTR_ERR(header_buf);
- goto err_free;
+ if (async_data->last_chunk && migf->buf_header) {
+ header_buf = migf->buf_header;
+ migf->buf_header = NULL;
+ } else {
+ header_buf = mlx5vf_get_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(header_buf)) {
+ err = PTR_ERR(header_buf);
+ goto err_free;
+ }
}
}
@@ -790,7 +830,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
for (i = 0; i < num_ranges; i++) {
void *addr_range_i_base = range_list_ptr + record_size * i;
- unsigned long length = node->last - node->start;
+ unsigned long length = node->last - node->start + 1;
MLX5_SET64(page_track_range, addr_range_i_base, start_address,
node->start);
@@ -800,7 +840,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
}
WARN_ON(node);
- log_addr_space_size = ilog2(total_ranges_len);
+ log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len));
if (log_addr_space_size <
(MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
log_addr_space_size >
@@ -1032,18 +1072,18 @@ mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev,
void *in;
int err;
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL_ACCOUNT);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
- log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
- log_rq_sz = ilog2(qp->rq.wqe_cnt);
err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node);
if (err)
goto err_free;
if (max_recv_wr) {
+ qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
+ log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
+ log_rq_sz = ilog2(qp->rq.wqe_cnt);
err = mlx5_frag_buf_alloc_node(mdev,
wq_get_byte_sz(log_rq_sz, log_rq_stride),
&qp->buf, mdev->priv.numa_node);
@@ -1213,12 +1253,13 @@ static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
int i;
recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!recv_buf->page_list)
return -ENOMEM;
for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done,
+ filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT,
+ npages - done,
recv_buf->page_list + done);
if (!filled)
goto err;
@@ -1248,7 +1289,7