diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/i915/Kconfig | 3 | ||||
-rw-r--r-- | drivers/vfio/container.c | 7 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc.c | 2 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 4 | ||||
-rw-r--r-- | drivers/vfio/group.c | 46 | ||||
-rw-r--r-- | drivers/vfio/mdev/Kconfig | 8 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_sysfs.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.c | 79 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.h | 28 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/main.c | 261 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 7 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_igd.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 10 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 2 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_common.c | 12 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_irq.c | 8 | ||||
-rw-r--r-- | drivers/vfio/vfio.h | 25 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 248 | ||||
-rw-r--r-- | drivers/vfio/vfio_main.c | 70 | ||||
-rw-r--r-- | drivers/vfio/virqfd.c | 2 |
22 files changed, 564 insertions, 272 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 9c0990c0ec87..3d1cd04ac5fa 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -127,9 +127,10 @@ config DRM_I915_GVT_KVMGT depends on X86 depends on 64BIT depends on KVM - depends on VFIO_MDEV + depends on VFIO select DRM_I915_GVT select KVM_EXTERNAL_WRITE_TRACKING + select VFIO_MDEV help Choose this option if you want to enable Intel GVT-g graphics diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index 89f10becf962..d53d08f16973 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -360,7 +360,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) { struct vfio_container *container; - container = kzalloc(sizeof(*container), GFP_KERNEL); + container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); if (!container) return -ENOMEM; @@ -376,11 +376,6 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) static int vfio_fops_release(struct inode *inode, struct file *filep) { struct vfio_container *container = filep->private_data; - struct vfio_iommu_driver *driver = container->iommu_driver; - - if (driver && driver->ops->notify) - driver->ops->notify(container->iommu_data, - VFIO_IOMMU_CONTAINER_CLOSE); filep->private_data = NULL; diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index defeb8510ace..c89a047a4cd8 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -28,7 +28,7 @@ static int vfio_fsl_mc_open_device(struct vfio_device *core_vdev) int i; vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!vdev->regions) return -ENOMEM; diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 64d01f3fb13d..c51229fccbd6 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -29,7 +29,7 @@ static int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev) irq_count = mc_dev->obj_desc.irq_count; - mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL); + mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL_ACCOUNT); if (!mc_irq) return -ENOMEM; @@ -77,7 +77,7 @@ static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev, if (fd < 0) /* Disable only */ return 0; - irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", + irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)", hwirq, dev_name(&vdev->mc_dev->dev)); if (!irq->name) return -ENOMEM; diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index e166ad7ce6e7..27d5ba7cf9dc 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -140,7 +140,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, ret = iommufd_vfio_compat_ioas_create(iommufd); if (ret) { - iommufd_ctx_put(group->iommufd); + iommufd_ctx_put(iommufd); goto out_unlock; } @@ -157,6 +157,18 @@ out_unlock: return ret; } +static void vfio_device_group_get_kvm_safe(struct vfio_device *device) +{ + spin_lock(&device->group->kvm_ref_lock); + if (!device->group->kvm) + goto unlock; + + _vfio_device_get_kvm_safe(device, device->group->kvm); + +unlock: + spin_unlock(&device->group->kvm_ref_lock); +} + static int vfio_device_group_open(struct vfio_device *device) { int ret; @@ -167,13 +179,23 @@ static int vfio_device_group_open(struct vfio_device *device) goto out_unlock; } + mutex_lock(&device->dev_set->lock); + /* - * Here we pass the KVM pointer with the group under the lock. If the - * device driver will use it, it must obtain a reference and release it - * during close_device. + * Before the first device open, get the KVM pointer currently + * associated with the group (if there is one) and obtain a reference + * now that will be held until the open_count reaches 0 again. Save + * the pointer in the device for use by drivers. */ - ret = vfio_device_open(device, device->group->iommufd, - device->group->kvm); + if (device->open_count == 0) + vfio_device_group_get_kvm_safe(device); + + ret = vfio_device_open(device, device->group->iommufd); + + if (device->open_count == 0) + vfio_device_put_kvm(device); + + mutex_unlock(&device->dev_set->lock); out_unlock: mutex_unlock(&device->group->group_lock); @@ -183,7 +205,14 @@ out_unlock: void vfio_device_group_close(struct vfio_device *device) { mutex_lock(&device->group->group_lock); + mutex_lock(&device->dev_set->lock); + vfio_device_close(device, device->group->iommufd); + + if (device->open_count == 0) + vfio_device_put_kvm(device); + + mutex_unlock(&device->dev_set->lock); mutex_unlock(&device->group->group_lock); } @@ -453,6 +482,7 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, refcount_set(&group->drivers, 1); mutex_init(&group->group_lock); + spin_lock_init(&group->kvm_ref_lock); INIT_LIST_HEAD(&group->device_list); mutex_init(&group->device_lock); group->iommu_group = iommu_group; @@ -806,9 +836,9 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) if (!vfio_file_is_group(file)) return; - mutex_lock(&group->group_lock); + spin_lock(&group->kvm_ref_lock); group->kvm = kvm; - mutex_unlock(&group->group_lock); + spin_unlock(&group->kvm_ref_lock); } EXPORT_SYMBOL_GPL(vfio_file_set_kvm); diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index 646dbed44eb2..e5fb84e07965 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -1,10 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_MDEV - tristate "Mediated device driver framework" - default n - help - Provides a framework to virtualize devices. - See Documentation/driver-api/vfio-mediated-device.rst for more details. - - If you don't know what do here, say N. + tristate diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index abe3359dd477..e4490639d383 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -96,7 +96,7 @@ static MDEV_TYPE_ATTR_RO(device_api); static ssize_t name_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, char *buf) { - return sprintf(buf, "%s\n", + return sysfs_emit(buf, "%s\n", mtype->pretty_name ? mtype->pretty_name : mtype->sysfs_name); } diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 0bba3b05c6c7..a117eaf21c14 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -744,7 +744,7 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct hisi_acc_vf_migration_file *migf; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); @@ -863,7 +863,7 @@ hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev) struct hisi_acc_vf_migration_file *migf; int ret; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 64e68d13cb98..deed156e6165 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -7,6 +7,29 @@ enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; +static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_cap = NULL, *cap; + int ret; + + query_cap = kzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_cap(mdev, func_id, query_cap, + MLX5_CAP_GENERAL_2); + if (ret) + goto out; + + cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(cmd_hca_cap_2, cap, migratable)) + ret = -EOPNOTSUPP; +out: + kfree(query_cap); + return ret; +} + static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, u16 *vhca_id); static void @@ -195,6 +218,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (mvdev->vf_id < 0) goto end; + ret = mlx5vf_is_migratable(mvdev->mdev, mvdev->vf_id + 1); + if (ret) + goto end; + if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, &mvdev->vhca_id)) goto end; @@ -373,7 +400,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *buf; int ret; - buf = kzalloc(sizeof(*buf), GFP_KERNEL); + buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); if (!buf) return ERR_PTR(-ENOMEM); @@ -473,7 +500,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) } static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, - size_t image_size) + size_t image_size, bool initial_pre_copy) { struct mlx5_vf_migration_file *migf = header_buf->migf; struct mlx5_vf_migration_header header = {}; @@ -481,7 +508,9 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, struct page *page; u8 *to_buff; - header.image_size = cpu_to_le64(image_size); + header.record_size = cpu_to_le64(image_size); + header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY); + header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA); page = mlx5vf_get_migration_page(header_buf, 0); if (!page) return -EINVAL; @@ -489,12 +518,13 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, memcpy(to_buff, &header, sizeof(header)); kunmap_local(to_buff); header_buf->length = sizeof(header); - header_buf->header_image_size = image_size; header_buf->start_pos = header_buf->migf->max_pos; migf->max_pos += header_buf->length; spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&header_buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); + if (initial_pre_copy) + migf->pre_copy_initial_bytes += sizeof(header); return 0; } @@ -508,11 +538,14 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) if (!status) { size_t image_size; unsigned long flags; + bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && + !async_data->last_chunk; image_size = MLX5_GET(save_vhca_state_out, async_data->out, actual_image_size); if (async_data->header_buf) { - status = add_buf_header(async_data->header_buf, image_size); + status = add_buf_header(async_data->header_buf, image_size, + initial_pre_copy); if (status) goto err; } @@ -522,6 +555,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); + if (initial_pre_copy) + migf->pre_copy_initial_bytes += image_size; migf->state = async_data->last_chunk ? MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; wake_up_interruptible(&migf->poll_wait); @@ -583,11 +618,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, } if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - header_buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(header_buf)) { - err = PTR_ERR(header_buf); - goto err_free; + if (async_data->last_chunk && migf->buf_header) { + header_buf = migf->buf_header; + migf->buf_header = NULL; + } else { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; + } } } @@ -790,7 +830,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, node = interval_tree_iter_first(ranges, 0, ULONG_MAX); for (i = 0; i < num_ranges; i++) { void *addr_range_i_base = range_list_ptr + record_size * i; - unsigned long length = node->last - node->start; + unsigned long length = node->last - node->start + 1; MLX5_SET64(page_track_range, addr_range_i_base, start_address, node->start); @@ -800,7 +840,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, } WARN_ON(node); - log_addr_space_size = ilog2(total_ranges_len); + log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len)); if (log_addr_space_size < (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || log_addr_space_size > @@ -1032,18 +1072,18 @@ mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, void *in; int err; - qp = kzalloc(sizeof(*qp), GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL_ACCOUNT); if (!qp) return ERR_PTR(-ENOMEM); - qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); - log_rq_stride = ilog2(MLX5_SEND_WQE_DS); - log_rq_sz = ilog2(qp->rq.wqe_cnt); err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); if (err) goto err_free; if (max_recv_wr) { + qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); + log_rq_stride = ilog2(MLX5_SEND_WQE_DS); + log_rq_sz = ilog2(qp->rq.wqe_cnt); err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_rq_sz, log_rq_stride), &qp->buf, mdev->priv.numa_node); @@ -1213,12 +1253,13 @@ static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, int i; recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!recv_buf->page_list) return -ENOMEM; for (;;) { - filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done, + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, + npages - done, recv_buf->page_list + done); if (!filled) goto err; @@ -1248,7 +1289,7 @@ static int register_dma_recv_pages(struct mlx5_core_dev *mdev, recv_buf->dma_addrs = kvcalloc(recv_buf->npages, sizeof(*recv_buf->dma_addrs), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!recv_buf->dma_addrs) return -ENOMEM; diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 5483171d57ad..aec4c69dd6c1 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/vfio_pci_core.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> @@ -26,15 +27,33 @@ enum mlx5_vf_migf_state { enum mlx5_vf_load_state { MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, MLX5_VF_LOAD_STATE_READ_HEADER, + MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, + MLX5_VF_LOAD_STATE_READ_HEADER_DATA, MLX5_VF_LOAD_STATE_PREP_IMAGE, MLX5_VF_LOAD_STATE_READ_IMAGE, MLX5_VF_LOAD_STATE_LOAD_IMAGE, }; +struct mlx5_vf_migration_tag_stop_copy_data { + __le64 stop_copy_size; +}; + +enum mlx5_vf_migf_header_flags { + MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0, + MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0, +}; + +enum mlx5_vf_migf_header_tag { + MLX5_MIGF_HEADER_TAG_FW_DATA = 0, + MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE = 1 << 0, +}; + struct mlx5_vf_migration_header { - __le64 image_size; + __le64 record_size; /* For future use in case we may need to change the kernel protocol */ - __le64 flags; + __le32 flags; /* Use mlx5_vf_migf_header_flags */ + __le32 tag; /* Use mlx5_vf_migf_header_tag */ + __u8 data[]; /* Its size is given in the record_size */ }; struct mlx5_vhca_data_buffer { @@ -42,7 +61,6 @@ struct mlx5_vhca_data_buffer { loff_t start_pos; u64 length; u64 allocated_length; - u64 header_image_size; u32 mkey; enum dma_data_direction dma_dir; u8 dmaed:1; @@ -72,6 +90,10 @@ struct mlx5_vf_migration_file { enum mlx5_vf_load_state load_state; u32 pdn; loff_t max_pos; + u64 record_size; + u32 record_tag; + u64 stop_copy_prep_size; + u64 pre_copy_initial_bytes; struct mlx5_vhca_data_buffer *buf; struct mlx5_vhca_data_buffer *buf_header; spinlock_t list_lock; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 9feb89c6d939..e897537a9e8a 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -21,8 +21,8 @@ #include "cmd.h" -/* Arbitrary to prevent userspace from consuming endless memory */ -#define MAX_MIGRATION_SIZE (512*1024*1024) +/* Device specification max LOAD size */ +#define MAX_LOAD_SIZE (BIT_ULL(__mlx5_bit_sz(load_vhca_state_in, size)) - 1) static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) { @@ -73,12 +73,13 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, int ret; to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); if (!page_list) return -ENOMEM; do { - filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list); + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, + page_list); if (!filled) { ret = -ENOMEM; goto err; @@ -87,7 +88,7 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, ret = sg_alloc_append_table_from_pages( &buf->table, page_list, filled, 0, filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (ret) goto err; @@ -303,6 +304,87 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) wake_up_interruptible(&migf->poll_wait); } +static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf) +{ + size_t size = sizeof(struct mlx5_vf_migration_header) + + sizeof(struct mlx5_vf_migration_tag_stop_copy_data); + struct mlx5_vf_migration_tag_stop_copy_data data = {}; + struct mlx5_vhca_data_buffer *header_buf = NULL; + struct mlx5_vf_migration_header header = {}; + unsigned long flags; + struct page *page; + u8 *to_buff; + int ret; + + header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE); + if (IS_ERR(header_buf)) + return PTR_ERR(header_buf); + + header.record_size = cpu_to_le64(sizeof(data)); + header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL); + header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE); + page = mlx5vf_get_migration_page(header_buf, 0); + if (!page) { + ret = -EINVAL; + goto err; + } + to_buff = kmap_local_page(page); + memcpy(to_buff, &header, sizeof(header)); + header_buf->length = sizeof(header); + data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length); + memcpy(to_buff + sizeof(header), &data, sizeof(data)); + header_buf->length += sizeof(data); + kunmap_local(to_buff); + header_buf->start_pos = header_buf->migf->max_pos; + migf->max_pos += header_buf->length; + spin_lock_irqsave(&migf->list_lock, flags); + list_add_tail(&header_buf->buf_elm, &migf->buf_list); + spin_unlock_irqrestore(&migf->list_lock, flags); + migf->pre_copy_initial_bytes = size; + return 0; +err: + mlx5vf_put_data_buffer(header_buf); + return ret; +} + +static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf, + size_t state_size) +{ + struct mlx5_vhca_data_buffer *buf; + size_t inc_state_size; + int ret; + + /* let's be ready for stop_copy size that might grow by 10 percents */ + if (check_add_overflow(state_size, state_size / 10, &inc_state_size)) + inc_state_size = state_size; + + buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + migf->buf = buf; + buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } + + migf->buf_header = buf; + ret = mlx5vf_add_stop_copy_header(migf); + if (ret) + goto err_header; + return 0; + +err_header: + mlx5vf_put_data_buffer(migf->buf_header); + migf->buf_header = NULL; +err: + mlx5vf_put_data_buffer(migf->buf); + migf->buf = NULL; + return ret; +} + static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -313,7 +395,7 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, loff_t *pos = &filp->f_pos; unsigned long minsz; size_t inc_length = 0; - bool end_of_data; + bool end_of_data = false; int ret; if (cmd != VFIO_MIG_GET_PRECOPY_INFO) @@ -357,25 +439,19 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, goto err_migf_unlock; } - buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); - if (buf) { - if (buf->start_pos == 0) { - info.initial_bytes = buf->header_image_size - *pos; - } else if (buf->start_pos == - sizeof(struct mlx5_vf_migration_header)) { - /* First data buffer following the header */ - info.initial_bytes = buf->start_pos + - buf->length - *pos; - } else { - info.dirty_bytes = buf->start_pos + buf->length - *pos; - } + if (migf->pre_copy_initial_bytes > *pos) { + info.initial_bytes = migf->pre_copy_initial_bytes - *pos; } else { - if (!end_of_data) { - ret = -EINVAL; - goto err_migf_unlock; + buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); + if (buf) { + info.dirty_bytes = buf->start_pos + buf->length - *pos; + } else { + if (!end_of_data) { + ret = -EINVAL; + goto err_migf_unlock; + } + info.dirty_bytes = inc_length; } - - info.dirty_bytes = inc_length; } if (!end_of_data || !inc_length) { @@ -440,10 +516,16 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) if (ret) goto err; - buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto err; + /* Checking whether we have a matching pre-allocated buffer that can fit */ + if (migf->buf && migf->buf->allocated_length >= length) { + buf = migf->buf; + migf->buf = NULL; + } else { + buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } } ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false); @@ -467,7 +549,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) size_t length; int ret; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); @@ -502,6 +584,12 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) if (ret) goto out_pd; + if (track) { + ret = mlx5vf_prep_stop_copy(migf, length); + if (ret) + goto out_pd; + } + buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE); if (IS_ERR(buf)) { ret = PTR_ERR(buf); @@ -515,7 +603,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) out_save: mlx5vf_free_data_buffer(buf); out_pd: - mlx5vf_cmd_dealloc_pd(migf); + mlx5fv_cmd_clean_migf_resources(migf); out_free: fput(migf->filp); end: @@ -564,7 +652,7 @@ mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, { int ret; - if (requested_length > MAX_MIGRATION_SIZE) + if (requested_length > MAX_LOAD_SIZE) return -ENOMEM; if (vhca_buf->allocated_length < requested_length) { @@ -616,6 +704,56 @@ mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, } static int +mlx5vf_resume_read_header_data(struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *vhca_buf, + const char __user **buf, size_t *len, + loff_t *pos, ssize_t *done) +{ + size_t copy_len, to_copy; + size_t required_data; + u8 *to_buff; + int ret; + + required_data = migf->record_size - vhca_buf->length; + to_copy = min_t(size_t, *len, required_data); + copy_len = to_copy; + while (to_copy) { + ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos, + done); + if (ret) + return ret; + } + + *len -= copy_len; + if (vhca_buf->length == migf->record_size) { + switch (migf->record_tag) { + case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE: + { + struct page *page; + + page = mlx5vf_get_migration_page(vhca_buf, 0); + if (!page) + return -EINVAL; + to_buff = kmap_local_page(page); + migf->stop_copy_prep_size = min_t(u64, + le64_to_cpup((__le64 *)to_buff), MAX_LOAD_SIZE); + kunmap_local(to_buff); + break; + } + default: + /* Optional tag */ + break; + } + + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + migf->max_pos += migf->record_size; + vhca_buf->length = 0; + } + + return 0; +} + +static int mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *vhca_buf, const char __user **buf, @@ -645,23 +783,38 @@ mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf, *len -= copy_len; vhca_buf->length += copy_len; if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) { - u64 flags; + u64 record_size; + u32 flags; - vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff); - if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) { + record_size = le64_to_cpup((__le64 *)to_buff); + if (record_size > MAX_LOAD_SIZE) { ret = -ENOMEM; goto end; } - flags = le64_to_cpup((__le64 *)(to_buff + + migf->record_size = record_size; + flags = le32_to_cpup((__le32 *)(to_buff + offsetof(struct mlx5_vf_migration_header, flags))); - if (flags) { - ret = -EOPNOTSUPP; - goto end; + migf->record_tag = le32_to_cpup((__le32 *)(to_buff + + offsetof(struct mlx5_vf_migration_header, tag))); + switch (migf->record_tag) { + case MLX5_MIGF_HEADER_TAG_FW_DATA: + migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE; + break; + case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE: + migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA; + break; + default: + if (!(flags & MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) { + ret = -EOPNOTSUPP; + goto end; + } + /* We may read and skip this optional record data */ + migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA; } - migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE; migf->max_pos += vhca_buf->length; + vhca_buf->length = 0; *has_work = true; } end: @@ -705,9 +858,34 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, if (ret) goto out_unlock; break; + case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA: + if (vhca_buf_header->allocated_length < migf->record_size) { + mlx5vf_free_data_buffer(vhca_buf_header); + + migf->buf_header = mlx5vf_alloc_data_buffer(migf, + migf->record_size, DMA_NONE); + if (IS_ERR(migf->buf_header)) { + ret = PTR_ERR(migf->buf_header); + migf->buf_header = NULL; + goto out_unlock; + } + + vhca_buf_header = migf->buf_header; + } + + vhca_buf_header->start_pos = migf->max_pos; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA; + break; + case MLX5_VF_LOAD_STATE_READ_HEADER_DATA: + ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header, + &buf, &len, pos, &done); + if (ret) |