From 2202844e4468c7539dba0c0b06577c93735af952 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Mon, 6 Nov 2023 15:22:23 +0800 Subject: vfio/migration: Add debugfs to live migration driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are multiple devices, software and operational steps involved in the process of live migration. An error occurred on any node may cause the live migration operation to fail. This complex process makes it very difficult to locate and analyze the cause when the function fails. In order to quickly locate the cause of the problem when the live migration fails, I added a set of debugfs to the vfio live migration driver. +-------------------------------------------+ | | | | | QEMU | | | | | +---+----------------------------+----------+ | ^ | ^ | | | | | | | | v | v | +---------+--+ +---------+--+ |src vfio_dev| |dst vfio_dev| +--+---------+ +--+---------+ | ^ | ^ | | | | v | | | +-----------+----+ +-----------+----+ |src dev debugfs | |dst dev debugfs | +----------------+ +----------------+ The entire debugfs directory will be based on the definition of the CONFIG_DEBUG_FS macro. If this macro is not enabled, the interfaces in vfio.h will be empty definitions, and the creation and initialization of the debugfs directory will not be executed. vfio | +--- | +---migration | +--state | +--- +---migration +--state debugfs will create a public root directory "vfio" file. then create a dev_name() file for each live migration device. First, create a unified state acquisition file of "migration" in this device directory. Then, create a public live migration state lookup file "state". Signed-off-by: Longfang Liu Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/r/20231106072225.28577-2-liulongfang@huawei.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 10 ++++++ drivers/vfio/Makefile | 1 + drivers/vfio/debugfs.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio.h | 14 ++++++++ drivers/vfio/vfio_main.c | 4 +++ include/linux/vfio.h | 7 ++++ include/uapi/linux/vfio.h | 1 + 7 files changed, 129 insertions(+) create mode 100644 drivers/vfio/debugfs.c diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 6bda6dbb4878..ceae52fd7586 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -80,6 +80,16 @@ config VFIO_VIRQFD select EVENTFD default n +config VFIO_DEBUGFS + bool "Export VFIO internals in DebugFS" + depends on DEBUG_FS + help + Allows exposure of VFIO device internals. This option enables + the use of debugfs by VFIO drivers as required. The device can + cause the VFIO code create a top-level debug/vfio directory + during initialization, and then populate a subdirectory with + entries as required. + source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 68c05705200f..b2fc9fb499d8 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -7,6 +7,7 @@ vfio-$(CONFIG_VFIO_GROUP) += group.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o +vfio-$(CONFIG_VFIO_DEBUGFS) += debugfs.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o diff --git a/drivers/vfio/debugfs.c b/drivers/vfio/debugfs.c new file mode 100644 index 000000000000..298bd866f157 --- /dev/null +++ b/drivers/vfio/debugfs.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, HiSilicon Ltd. + */ + +#include +#include +#include +#include +#include "vfio.h" + +static struct dentry *vfio_debugfs_root; + +static int vfio_device_state_read(struct seq_file *seq, void *data) +{ + struct device *vf_dev = seq->private; + struct vfio_device *vdev = container_of(vf_dev, + struct vfio_device, device); + enum vfio_device_mig_state state; + int ret; + + BUILD_BUG_ON(VFIO_DEVICE_STATE_NR != + VFIO_DEVICE_STATE_PRE_COPY_P2P + 1); + + ret = vdev->mig_ops->migration_get_state(vdev, &state); + if (ret) + return -EINVAL; + + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + seq_puts(seq, "ERROR\n"); + break; + case VFIO_DEVICE_STATE_STOP: + seq_puts(seq, "STOP\n"); + break; + case VFIO_DEVICE_STATE_RUNNING: + seq_puts(seq, "RUNNING\n"); + break; + case VFIO_DEVICE_STATE_STOP_COPY: + seq_puts(seq, "STOP_COPY\n"); + break; + case VFIO_DEVICE_STATE_RESUMING: + seq_puts(seq, "RESUMING\n"); + break; + case VFIO_DEVICE_STATE_RUNNING_P2P: + seq_puts(seq, "RUNNING_P2P\n"); + break; + case VFIO_DEVICE_STATE_PRE_COPY: + seq_puts(seq, "PRE_COPY\n"); + break; + case VFIO_DEVICE_STATE_PRE_COPY_P2P: + seq_puts(seq, "PRE_COPY_P2P\n"); + break; + default: + seq_puts(seq, "Invalid\n"); + } + + return 0; +} + +void vfio_device_debugfs_init(struct vfio_device *vdev) +{ + struct device *dev = &vdev->device; + + vdev->debug_root = debugfs_create_dir(dev_name(vdev->dev), + vfio_debugfs_root); + + if (vdev->mig_ops) { + struct dentry *vfio_dev_migration = NULL; + + vfio_dev_migration = debugfs_create_dir("migration", + vdev->debug_root); + debugfs_create_devm_seqfile(dev, "state", vfio_dev_migration, + vfio_device_state_read); + } +} + +void vfio_device_debugfs_exit(struct vfio_device *vdev) +{ + debugfs_remove_recursive(vdev->debug_root); +} + +void vfio_debugfs_create_root(void) +{ + vfio_debugfs_root = debugfs_create_dir("vfio", NULL); +} + +void vfio_debugfs_remove_root(void) +{ + debugfs_remove_recursive(vfio_debugfs_root); + vfio_debugfs_root = NULL; +} diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 307e3f29b527..bde84ad344e5 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -448,4 +448,18 @@ static inline void vfio_device_put_kvm(struct vfio_device *device) } #endif +#ifdef CONFIG_VFIO_DEBUGFS +void vfio_debugfs_create_root(void); +void vfio_debugfs_remove_root(void); + +void vfio_device_debugfs_init(struct vfio_device *vdev); +void vfio_device_debugfs_exit(struct vfio_device *vdev); +#else +static inline void vfio_debugfs_create_root(void) { } +static inline void vfio_debugfs_remove_root(void) { } + +static inline void vfio_device_debugfs_init(struct vfio_device *vdev) { } +static inline void vfio_device_debugfs_exit(struct vfio_device *vdev) { } +#endif /* CONFIG_VFIO_DEBUGFS */ + #endif diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 8d4995ada74a..1cc93aac99a2 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -311,6 +311,7 @@ static int __vfio_register_dev(struct vfio_device *device, refcount_set(&device->refcount, 1); vfio_device_group_register(device); + vfio_device_debugfs_init(device); return 0; err_out: @@ -378,6 +379,7 @@ void vfio_unregister_group_dev(struct vfio_device *device) } } + vfio_device_debugfs_exit(device); /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } @@ -1676,6 +1678,7 @@ static int __init vfio_init(void) if (ret) goto err_alloc_dev_chrdev; + vfio_debugfs_create_root(); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; @@ -1691,6 +1694,7 @@ err_virqfd: static void __exit vfio_cleanup(void) { + vfio_debugfs_remove_root(); ida_destroy(&vfio.device_ida); vfio_cdev_cleanup(); class_destroy(vfio.device_class); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a65b2513f8cd..89b265bc6ec3 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -69,6 +69,13 @@ struct vfio_device { u8 iommufd_attached:1; #endif u8 cdev_opened:1; +#ifdef CONFIG_DEBUG_FS + /* + * debug_root is a static property of the vfio_device + * which must be set prior to registering the vfio_device. + */ + struct dentry *debug_root; +#endif }; /** diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 7f5fb010226d..2b68e6cdf190 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1219,6 +1219,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, VFIO_DEVICE_STATE_PRE_COPY = 6, VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, + VFIO_DEVICE_STATE_NR, }; /** -- cgit v1.2.3 From 7b994177805f4ef05913c3f242840b262ecf2fa1 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Mon, 6 Nov 2023 15:22:24 +0800 Subject: Documentation: add debugfs description for vfio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an debugfs document description file to help users understand how to use the accelerator live migration driver's debugfs. Signed-off-by: Longfang Liu Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/r/20231106072225.28577-3-liulongfang@huawei.com Signed-off-by: Alex Williamson --- Documentation/ABI/testing/debugfs-vfio | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 Documentation/ABI/testing/debugfs-vfio diff --git a/Documentation/ABI/testing/debugfs-vfio b/Documentation/ABI/testing/debugfs-vfio new file mode 100644 index 000000000000..90f7c262f591 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-vfio @@ -0,0 +1,25 @@ +What: /sys/kernel/debug/vfio +Date: December 2023 +KernelVersion: 6.8 +Contact: Longfang Liu +Description: This debugfs file directory is used for debugging + of vfio devices, it's a common directory for all vfio devices. + Vfio core will create a device subdirectory under this + directory. + +What: /sys/kernel/debug/vfio//migration +Date: December 2023 +KernelVersion: 6.8 +Contact: Longfang Liu +Description: This debugfs file directory is used for debugging + of vfio devices that support live migration. + The debugfs of each vfio device that supports live migration + could be created under this directory. + +What: /sys/kernel/debug/vfio//migration/state +Date: December 2023 +KernelVersion: 6.8 +Contact: Longfang Liu +Description: Read the live migration status of the vfio device. + The contents of the state file reflects the migration state + relative to those defined in the vfio_device_mig_state enum -- cgit v1.2.3 From 0d9bacfa615310e88bae385d64ca065953cb95ac Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Mon, 6 Nov 2023 15:22:25 +0800 Subject: MAINTAINERS: Add vfio debugfs interface doc link After adding the debugfs function to the vfio driver, a new debugfs-vfio file was added. Signed-off-by: Longfang Liu Link: https://lore.kernel.org/r/20231106072225.28577-4-liulongfang@huawei.com Signed-off-by: Alex Williamson --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 788be9ab5b73..d924ef5f5699 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22837,6 +22837,7 @@ M: Alex Williamson L: kvm@vger.kernel.org S: Maintained T: git https://github.com/awilliam/linux-vfio.git +F: Documentation/ABI/testing/debugfs-vfio F: Documentation/ABI/testing/sysfs-devices-vfio-dev F: Documentation/driver-api/vfio.rst F: drivers/vfio/ -- cgit v1.2.3 From 4004497cec3093d7b0087bc70709b45969fa07b6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:02 -0800 Subject: vfio/pds: Fix calculations in pds_vfio_dirty_sync The incorrect check is being done for comparing the iova/length being requested to sync. This can cause the dirty sync operation to fail. Fix this by making sure the iova offset added to the requested sync length doesn't exceed the region_size. Also, the region_start is assumed to always be at 0. This can cause dirty tracking to fail because the device/driver bitmap offset always starts at 0, however, the region_start/iova may not. Fix this by determining the iova offset from region_start to determine the bitmap offset. Fixes: f232836a9152 ("vfio/pds: Add support for dirty page tracking") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-2-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index c937aa6f3954..27607d7b9030 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -478,8 +478,7 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, pages, bitmap_size); - if (!length || ((dirty->region_start + iova + length) > - (dirty->region_start + dirty->region_size))) { + if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; @@ -496,7 +495,8 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP(iova / dirty->region_page_size, sizeof(u64)); + bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) / + dirty->region_page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", -- cgit v1.2.3 From 3b8f7a24d1fe79cc3fa1da9be5610b9c3aedbc2a Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:03 -0800 Subject: vfio/pds: Only use a single SGL for both seq and ack Since the seq/ack operations never happen in parallel there is no need for multiple scatter gather lists per region. The current implementation is wasting memory. Fix this by only using a single scatter-gather list for both the seq and ack operations. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-3-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 68 ++++++++++++++++---------------------------- drivers/vfio/pci/pds/dirty.h | 6 ++-- 2 files changed, 28 insertions(+), 46 deletions(-) diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 27607d7b9030..4462f6edb0ed 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -100,35 +100,35 @@ static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info) + struct pds_vfio_dirty *dirty) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, bmp_info->sgl_addr, - bmp_info->num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, dirty->sgl_addr, + dirty->num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(bmp_info->sgl); + kfree(dirty->sgl); - bmp_info->num_sge = 0; - bmp_info->sgl = NULL; - bmp_info->sgl_addr = 0; + dirty->num_sge = 0; + dirty->sgl = NULL; + dirty->sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - if (pds_vfio->dirty.host_seq.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_seq); - if (pds_vfio->dirty.host_ack.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_ack); + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + + if (dirty->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, dirty); } -static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info, - u32 page_count) +static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, + u32 page_count) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; size_t sgl_size; @@ -147,30 +147,9 @@ static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - bmp_info->sgl = sgl; - bmp_info->num_sge = max_sge; - bmp_info->sgl_addr = sgl_addr; - - return 0; -} - -static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, - u32 page_count) -{ - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - int err; - - err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_seq, - page_count); - if (err) - return err; - - err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_ack, - page_count); - if (err) { - __pds_vfio_dirty_free_sgl(pds_vfio, &dirty->host_seq); - return err; - } + dirty->sgl = sgl; + dirty->num_sge = max_sge; + dirty->sgl_addr = sgl_addr; return 0; } @@ -328,6 +307,8 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_lm_sg_elem *sgl; unsigned long long npages; struct sg_table sg_table; struct scatterlist *sg; @@ -374,8 +355,9 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, if (err) goto out_free_sg_table; + sgl = pds_vfio->dirty.sgl; for_each_sgtable_dma_sg(&sg_table, sg, i) { - struct pds_lm_sg_elem *sg_elem = &bmp_info->sgl[i]; + struct pds_lm_sg_elem *sg_elem = &sgl[i]; sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); sg_elem->len = cpu_to_le32(sg_dma_len(sg)); @@ -383,15 +365,15 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); - dma_sync_single_for_device(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); - err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, bmp_info->sgl_addr, num_sge, + dma_sync_single_for_device(pdsc_dev, dirty->sgl_addr, size, dma_dir); + err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, dirty->sgl_addr, num_sge, offset, bmp_bytes, read_seq); if (err) dev_err(&pdev->dev, "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", bmp_type_str, offset, bmp_bytes, - num_sge, bmp_info->sgl_addr, ERR_PTR(err)); - dma_sync_single_for_cpu(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); + num_sge, dirty->sgl_addr, ERR_PTR(err)); + dma_sync_single_for_cpu(pdsc_dev, dirty->sgl_addr, size, dma_dir); dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); out_free_sg_table: diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index f78da25d75ca..9de5aac58190 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -7,9 +7,6 @@ struct pds_vfio_bmp_info { unsigned long *bmp; u32 bmp_bytes; - struct pds_lm_sg_elem *sgl; - dma_addr_t sgl_addr; - u16 num_sge; }; struct pds_vfio_dirty { @@ -18,6 +15,9 @@ struct pds_vfio_dirty { u64 region_size; u64 region_start; u64 region_page_size; + struct pds_lm_sg_elem *sgl; + dma_addr_t sgl_addr; + u16 num_sge; bool is_enabled; }; -- cgit v1.2.3 From 3f5898133a706402180908b671d5e781fc48f548 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:04 -0800 Subject: vfio/pds: Move and rename region specific info An upcoming change in this series will add support for multiple regions. To prepare for that, move region specific information into struct pds_vfio_region and rename the members for readability. This will reduce the size of the patch that actually implements multiple region support. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-4-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 83 ++++++++++++++++++++++---------------------- drivers/vfio/pci/pds/dirty.h | 12 ++++--- 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 4462f6edb0ed..8a7f9eed4e59 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -85,18 +85,18 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->host_seq.bmp = host_seq_bmp; - dirty->host_ack.bmp = host_ack_bmp; + dirty->region.host_seq.bmp = host_seq_bmp; + dirty->region.host_ack.bmp = host_ack_bmp; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->host_seq.bmp); - vfree(dirty->host_ack.bmp); - dirty->host_seq.bmp = NULL; - dirty->host_ack.bmp = NULL; + vfree(dirty->region.host_seq.bmp); + vfree(dirty->region.host_ack.bmp); + dirty->region.host_seq.bmp = NULL; + dirty->region.host_ack.bmp = NULL; } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -105,21 +105,21 @@ static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, dirty->sgl_addr, - dirty->num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, dirty->region.sgl_addr, + dirty->region.num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(dirty->sgl); + kfree(dirty->region.sgl); - dirty->num_sge = 0; - dirty->sgl = NULL; - dirty->sgl_addr = 0; + dirty->region.num_sge = 0; + dirty->region.sgl = NULL; + dirty->region.sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - if (dirty->sgl) + if (dirty->region.sgl) __pds_vfio_dirty_free_sgl(pds_vfio, dirty); } @@ -147,9 +147,9 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - dirty->sgl = sgl; - dirty->num_sge = max_sge; - dirty->sgl_addr = sgl_addr; + dirty->region.sgl = sgl; + dirty->region.num_sge = max_sge; + dirty->region.sgl_addr = sgl_addr; return 0; } @@ -267,9 +267,9 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_bitmaps; } - dirty->region_start = region_start; - dirty->region_size = region_size; - dirty->region_page_size = region_page_size; + dirty->region.start = region_start; + dirty->region.size = region_size; + dirty->region.page_size = region_page_size; pds_vfio_dirty_set_enabled(pds_vfio); pds_vfio_print_guest_region_info(pds_vfio, max_regions); @@ -304,11 +304,10 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; + struct pds_vfio_region *region = &pds_vfio->dirty.region; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - struct pds_lm_sg_elem *sgl; unsigned long long npages; struct sg_table sg_table; struct scatterlist *sg; @@ -355,9 +354,8 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, if (err) goto out_free_sg_table; - sgl = pds_vfio->dirty.sgl; for_each_sgtable_dma_sg(&sg_table, sg, i) { - struct pds_lm_sg_elem *sg_elem = &sgl[i]; + struct pds_lm_sg_elem *sg_elem = ®ion->sgl[i]; sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); sg_elem->len = cpu_to_le32(sg_dma_len(sg)); @@ -365,15 +363,15 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); - dma_sync_single_for_device(pdsc_dev, dirty->sgl_addr, size, dma_dir); - err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, dirty->sgl_addr, num_sge, + dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir); + err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge, offset, bmp_bytes, read_seq); if (err) dev_err(&pdev->dev, "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", bmp_type_str, offset, bmp_bytes, - num_sge, dirty->sgl_addr, ERR_PTR(err)); - dma_sync_single_for_cpu(pdsc_dev, dirty->sgl_addr, size, dma_dir); + num_sge, region->sgl_addr, ERR_PTR(err)); + dma_sync_single_for_cpu(pdsc_dev, region->sgl_addr, size, dma_dir); dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); out_free_sg_table: @@ -387,14 +385,18 @@ out_free_pages: static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack, + struct pds_vfio_region *region = &pds_vfio->dirty.region; + + return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_ack, offset, len, WRITE_ACK); } static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq, + struct pds_vfio_region *region = &pds_vfio->dirty.region; + + return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_seq, offset, len, READ_SEQ); } @@ -402,15 +404,15 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, struct iova_bitmap *dirty_bitmap, u32 bmp_offset, u32 len_bytes) { - u64 page_size = pds_vfio->dirty.region_page_size; - u64 region_start = pds_vfio->dirty.region_start; + u64 page_size = pds_vfio->dirty.region.page_size; + u64 region_start = pds_vfio->dirty.region.start; u32 bmp_offset_bit; __le64 *seq, *ack; int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)pds_vfio->dirty.region.host_seq.bmp + bmp_offset); + ack = (__le64 *)((u64)pds_vfio->dirty.region.host_ack.bmp + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -451,25 +453,24 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size); + pages = DIV_ROUND_UP(length, pds_vfio->dirty.region.page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; dev_dbg(dev, "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", - pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, + pds_vfio->vf_id, iova, length, pds_vfio->dirty.region.page_size, pages, bitmap_size); - if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) { + if (!length || ((iova - dirty->region.start + length) > dirty->region.size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; } /* bitmap is modified in 64 bit chunks */ - bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size, - sizeof(u64)), - sizeof(u64)); + bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region.page_size, + sizeof(u64)), sizeof(u64)); if (bmp_bytes != bitmap_size) { dev_err(dev, "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", @@ -477,8 +478,8 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) / - dirty->region_page_size, sizeof(u64)); + bmp_offset = DIV_ROUND_UP((iova - dirty->region.start) / + dirty->region.page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index 9de5aac58190..07662d369e7c 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -9,15 +9,19 @@ struct pds_vfio_bmp_info { u32 bmp_bytes; }; -struct pds_vfio_dirty { +struct pds_vfio_region { struct pds_vfio_bmp_info host_seq; struct pds_vfio_bmp_info host_ack; - u64 region_size; - u64 region_start; - u64 region_page_size; + u64 size; + u64 start; + u64 page_size; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; u16 num_sge; +}; + +struct pds_vfio_dirty { + struct pds_vfio_region region; bool is_enabled; }; -- cgit v1.2.3 From 87bdf9807ed70f5d3a0f852f9d04ceb1c99ad9f6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:05 -0800 Subject: vfio/pds: Pass region info to relevant functions A later patch in the series implements multi-region support. That will require specific regions to be passed to relevant functions. Prepare for that change by passing the region structure to relevant functions. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-5-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 71 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 8a7f9eed4e59..09fed7c1771a 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -100,35 +100,35 @@ static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_dirty *dirty) + struct pds_vfio_region *region) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, dirty->region.sgl_addr, - dirty->region.num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, region->sgl_addr, + region->num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(dirty->region.sgl); + kfree(region->sgl); - dirty->region.num_sge = 0; - dirty->region.sgl = NULL; - dirty->region.sgl_addr = 0; + region->num_sge = 0; + region->sgl = NULL; + region->sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_vfio_region *region = &pds_vfio->dirty.region; - if (dirty->region.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, dirty); + if (region->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, region); } static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 page_count) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; size_t sgl_size; @@ -147,9 +147,9 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - dirty->region.sgl = sgl; - dirty->region.num_sge = max_sge; - dirty->region.sgl_addr = sgl_addr; + region->sgl = sgl; + region->num_sge = max_sge; + region->sgl_addr = sgl_addr; return 0; } @@ -260,7 +260,7 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_region_info; } - err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count); + err = pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->region, page_count); if (err) { dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", ERR_PTR(err)); @@ -300,11 +300,11 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) } static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, struct pds_vfio_bmp_info *bmp_info, u32 offset, u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; - struct pds_vfio_region *region = &pds_vfio->dirty.region; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; @@ -383,36 +383,36 @@ out_free_pages: } static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 offset, u32 len) { - struct pds_vfio_region *region = &pds_vfio->dirty.region; - return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_ack, + return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_ack, offset, len, WRITE_ACK); } static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 offset, u32 len) { - struct pds_vfio_region *region = &pds_vfio->dirty.region; - - return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_seq, + return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_seq, offset, len, READ_SEQ); } static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, struct iova_bitmap *dirty_bitmap, u32 bmp_offset, u32 len_bytes) { - u64 page_size = pds_vfio->dirty.region.page_size; - u64 region_start = pds_vfio->dirty.region.start; + u64 page_size = region->page_size; + u64 region_start = region->start; u32 bmp_offset_bit; __le64 *seq, *ack; int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)pds_vfio->dirty.region.host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)pds_vfio->dirty.region.host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)region->host_seq.bmp + bmp_offset); + ack = (__le64 *)((u64)region->host_ack.bmp + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -441,6 +441,7 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, { struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_vfio_region *region = &dirty->region; u64 bmp_offset, bmp_bytes; u64 bitmap_size, pages; int err; @@ -453,23 +454,23 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - pages = DIV_ROUND_UP(length, pds_vfio->dirty.region.page_size); + pages = DIV_ROUND_UP(length, region->page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; dev_dbg(dev, "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", - pds_vfio->vf_id, iova, length, pds_vfio->dirty.region.page_size, + pds_vfio->vf_id, iova, length, region->page_size, pages, bitmap_size); - if (!length || ((iova - dirty->region.start + length) > dirty->region.size)) { + if (!length || ((iova - region->start + length) > region->size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; } /* bitmap is modified in 64 bit chunks */ - bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region.page_size, + bmp_bytes = ALIGN(DIV_ROUND_UP(length / region->page_size, sizeof(u64)), sizeof(u64)); if (bmp_bytes != bitmap_size) { dev_err(dev, @@ -478,23 +479,23 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP((iova - dirty->region.start) / - dirty->region.page_size, sizeof(u64)); + bmp_offset = DIV_ROUND_UP((iova - region->start) / + region->page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", iova, length, bmp_offset, bmp_bytes); - err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes); + err = pds_vfio_dirty_read_seq(pds_vfio, region, bmp_offset, bmp_bytes); if (err) return err; - err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset, - bmp_bytes); + err = pds_vfio_dirty_process_bitmaps(pds_vfio, region, dirty_bitmap, + bmp_offset, bmp_bytes); if (err) return err; - err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes); + err = pds_vfio_dirty_write_ack(pds_vfio, region, bmp_offset, bmp_bytes); if (err) return err; -- cgit v1.2.3 From 0c320f223ee6892a413428051f35bebf85fe83d3 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:06 -0800 Subject: vfio/pds: Move seq/ack bitmaps into region struct Since the host seq/ack bitmaps are part of a region move them into struct pds_vfio_region. Also, make use of the bmp_bytes value for validation purposes. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-6-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 35 ++++++++++++++++++++++------------- drivers/vfio/pci/pds/dirty.h | 10 +++------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 09fed7c1771a..4824cdfe01ed 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -85,18 +85,20 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->region.host_seq.bmp = host_seq_bmp; - dirty->region.host_ack.bmp = host_ack_bmp; + dirty->region.host_seq = host_seq_bmp; + dirty->region.host_ack = host_ack_bmp; + dirty->region.bmp_bytes = bytes; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->region.host_seq.bmp); - vfree(dirty->region.host_ack.bmp); - dirty->region.host_seq.bmp = NULL; - dirty->region.host_ack.bmp = NULL; + vfree(dirty->region.host_seq); + vfree(dirty->region.host_ack); + dirty->region.host_seq = NULL; + dirty->region.host_ack = NULL; + dirty->region.bmp_bytes = 0; } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -301,8 +303,8 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, struct pds_vfio_region *region, - struct pds_vfio_bmp_info *bmp_info, - u32 offset, u32 bmp_bytes, bool read_seq) + unsigned long *seq_ack_bmp, u32 offset, + u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; @@ -319,7 +321,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, int err; int i; - bmp = (void *)((u64)bmp_info->bmp + offset); + bmp = (void *)((u64)seq_ack_bmp + offset); page_offset = offset_in_page(bmp); bmp -= page_offset; @@ -387,7 +389,7 @@ static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_ack, + return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_ack, offset, len, WRITE_ACK); } @@ -395,7 +397,7 @@ static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, struct pds_vfio_region *region, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_seq, + return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_seq, offset, len, READ_SEQ); } @@ -411,8 +413,8 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)region->host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)region->host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)region->host_seq + bmp_offset); + ack = (__le64 *)((u64)region->host_ack + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -479,6 +481,13 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } + if (bmp_bytes > region->bmp_bytes) { + dev_err(dev, + "Calculated bitmap bytes %llu larger than region's cached bmp_bytes %llu\n", + bmp_bytes, region->bmp_bytes); + return -EINVAL; + } + bmp_offset = DIV_ROUND_UP((iova - region->start) / region->page_size, sizeof(u64)); diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index 07662d369e7c..a1f6d894f913 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -4,14 +4,10 @@ #ifndef _DIRTY_H_ #define _DIRTY_H_ -struct pds_vfio_bmp_info { - unsigned long *bmp; - u32 bmp_bytes; -}; - struct pds_vfio_region { - struct pds_vfio_bmp_info host_seq; - struct pds_vfio_bmp_info host_ack; + unsigned long *host_seq; + unsigned long *host_ack; + u64 bmp_bytes; u64 size; u64 start; u64 page_size; -- cgit v1.2.3 From 2e7c6feb4ef5257189819359e754625ccf5a1d6c Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:07 -0800 Subject: vfio/pds: Add multi-region support Only supporting a single region/range is limiting, wasteful, and in some cases broken (i.e. when there are large gaps in the iova memory ranges). Fix this by adding support for multiple regions based on what the device tells the driver it can support. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-7-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 220 ++++++++++++++++++++++++++++++------------- drivers/vfio/pci/pds/dirty.h | 4 +- 2 files changed, 156 insertions(+), 68 deletions(-) diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 4824cdfe01ed..8ddf4346fcd5 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -70,7 +70,7 @@ out_free_region_info: kfree(region_info); } -static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, +static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region, unsigned long bytes) { unsigned long *host_seq_bmp, *host_ack_bmp; @@ -85,20 +85,27 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->region.host_seq = host_seq_bmp; - dirty->region.host_ack = host_ack_bmp; - dirty->region.bmp_bytes = bytes; + region->host_seq = host_seq_bmp; + region->host_ack = host_ack_bmp; + region->bmp_bytes = bytes; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->region.host_seq); - vfree(dirty->region.host_ack); - dirty->region.host_seq = NULL; - dirty->region.host_ack = NULL; - dirty->region.bmp_bytes = 0; + if (!dirty->regions) + return; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + vfree(region->host_seq); + vfree(region->host_ack); + region->host_seq = NULL; + region->host_ack = NULL; + region->bmp_bytes = 0; + } } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -119,10 +126,17 @@ static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - struct pds_vfio_region *region = &pds_vfio->dirty.region; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - if (region->sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, region); + if (!dirty->regions) + return; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + if (region->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, region); + } } static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -156,22 +170,90 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return 0; } +static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty) +{ + vfree(dirty->regions); + dirty->regions = NULL; + dirty->num_regions = 0; +} + +static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio, + struct pds_lm_dirty_region_info *region_info, + u64 region_page_size, u8 num_regions) +{ + struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + u32 dev_bmp_offset_byte = 0; + int err; + + dirty->regions = vcalloc(num_regions, sizeof(struct pds_vfio_region)); + if (!dirty->regions) + return -ENOMEM; + dirty->num_regions = num_regions; + + for (int i = 0; i < num_regions; i++) { + struct pds_lm_dirty_region_info *ri = ®ion_info[i]; + struct pds_vfio_region *region = &dirty->regions[i]; + u64 region_size, region_start; + u32 page_count; + + /* page_count might be adjusted by the device */ + page_count = le32_to_cpu(ri->page_count); + region_start = le64_to_cpu(ri->dma_base); + region_size = page_count * region_page_size; + + err = pds_vfio_dirty_alloc_bitmaps(region, + page_count / BITS_PER_BYTE); + if (err) { + dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", + ERR_PTR(err)); + goto out_free_regions; + } + + err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count); + if (err) { + dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", + ERR_PTR(err)); + goto out_free_regions; + } + + region->size = region_size; + region->start = region_start; + region->page_size = region_page_size; + region->dev_bmp_offset_start_byte = dev_bmp_offset_byte; + + dev_bmp_offset_byte += page_count / BITS_PER_BYTE; + if (dev_bmp_offset_byte % BITS_PER_BYTE) { + dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n"); + err = -EINVAL; + goto out_free_regions; + } + } + + return 0; + +out_free_regions: + pds_vfio_dirty_free_bitmaps(dirty); + pds_vfio_dirty_free_sgl(pds_vfio); + pds_vfio_dirty_free_regions(dirty); + + return err; +} + static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, struct rb_root_cached *ranges, u32 nnodes, u64 *page_size) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - u64 region_start, region_size, region_page_size; struct pds_lm_dirty_region_info *region_info; struct interval_tree_node *node = NULL; + u64 region_page_size = *page_size; u8 max_regions = 0, num_regions; dma_addr_t regions_dma = 0; u32 num_ranges = nnodes; - u32 page_count; - u16 len; int err; + u16 len; dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", pds_vfio->vf_id); @@ -198,39 +280,38 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, return -EOPNOTSUPP; } - /* - * Only support 1 region for now. If there are any large gaps in the - * VM's address regions, then this would be a waste of memory as we are - * generating 2 bitmaps (ack/seq) from the min address to the max - * address of the VM's address regions. In the future, if we support - * more than one region in the device/driver we can split the bitmaps - * on the largest address region gaps. We can do this split up to the - * max_regions times returned from the dirty_status command. - */ - max_regions = 1; if (num_ranges > max_regions) { vfio_combine_iova_ranges(ranges, nnodes, max_regions); num_ranges = max_regions; } + region_info = kcalloc(num_ranges, sizeof(*region_info), GFP_KERNEL); + if (!region_info) + return -ENOMEM; + len = num_ranges * sizeof(*region_info); + node = interval_tree_iter_first(ranges, 0, ULONG_MAX); if (!node) return -EINVAL; + for (int i = 0; i < num_ranges; i++) { + struct pds_lm_dirty_region_info *ri = ®ion_info[i]; + u64 region_size = node->last - node->start + 1; + u64 region_start = node->start; + u32 page_count; - region_size = node->last - node->start + 1; - region_start = node->start; - region_page_size = *page_size; + page_count = DIV_ROUND_UP(region_size, region_page_size); - len = sizeof(*region_info); - region_info = kzalloc(len, GFP_KERNEL); - if (!region_info) - return -ENOMEM; + ri->dma_base = cpu_to_le64(region_start); + ri->page_count = cpu_to_le32(page_count); + ri->page_size_log2 = ilog2(region_page_size); - page_count = DIV_ROUND_UP(region_size, region_page_size); + dev_dbg(&pdev->dev, + "region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n", + i, region_start, node->last, region_size, page_count, + region_page_size); - region_info->dma_base = cpu_to_le64(region_start); - region_info->page_count = cpu_to_le32(page_count); - region_info->page_size_log2 = ilog2(region_page_size); + node = interval_tree_iter_next(node, 0, ULONG_MAX); + } regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, DMA_BIDIRECTIONAL); @@ -239,39 +320,20 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_region_info; } - err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions); + err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_ranges); dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); if (err) goto out_free_region_info; - /* - * page_count might be adjusted by the device, - * update it before freeing region_info DMA - */ - page_count = le32_to_cpu(region_info->page_count); - - dev_dbg(&pdev->dev, - "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n", - regions_dma, region_start, page_count, - (u8)ilog2(region_page_size)); - - err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE); - if (err) { - dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", - ERR_PTR(err)); - goto out_free_region_info; - } - - err = pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->region, page_count); + err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info, + region_page_size, num_ranges); if (err) { - dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", - ERR_PTR(err)); - goto out_free_bitmaps; + dev_err(&pdev->dev, + "Failed to allocate %d regions for tracking dirty regions: %pe\n", + num_regions, ERR_PTR(err)); + goto out_dirty_disable; } - dirty->region.start = region_start; - dirty->region.size = region_size; - dirty->region.page_size = region_page_size; pds_vfio_dirty_set_enabled(pds_vfio); pds_vfio_print_guest_region_info(pds_vfio, max_regions); @@ -280,8 +342,8 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, return 0; -out_free_bitmaps: - pds_vfio_dirty_free_bitmaps(dirty); +out_dirty_disable: + pds_vfio_dirty_disable_cmd(pds_vfio); out_free_region_info: kfree(region_info); return err; @@ -295,6 +357,7 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) pds_vfio_dirty_disable_cmd(pds_vfio); pds_vfio_dirty_free_sgl(pds_vfio); pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); + pds_vfio_dirty_free_regions(&pds_vfio->dirty); } if (send_cmd) @@ -365,6 +428,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); + offset += region->dev_bmp_offset_start_byte; dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir); err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge, offset, bmp_bytes, read_seq); @@ -437,13 +501,28 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, return 0; } +static struct pds_vfio_region * +pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova) +{ + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + if (iova >= region->start && + iova < (region->start + region->size)) + return region; + } + + return NULL; +} + static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, struct iova_bitmap *dirty_bitmap, unsigned long iova, unsigned long length) { struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - struct pds_vfio_region *region = &dirty->region; + struct pds_vfio_region *region; u64 bmp_offset, bmp_bytes; u64 bitmap_size, pages; int err; @@ -456,6 +535,13 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } + region = pds_vfio_get_region(pds_vfio, iova); + if (!region) { + dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n", + pds_vfio->vf_id, iova, length); + return -EINVAL; + } + pages = DIV_ROUND_UP(length, region->page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index a1f6d894f913..c8e23018b801 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -13,11 +13,13 @@ struct pds_vfio_region { u64 page_size; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; + u32 dev_bmp_offset_start_byte; u16 num_sge; }; struct pds_vfio_dirty { - struct pds_vfio_region region; + struct pds_vfio_region *regions; + u8 num_regions; bool is_enabled; }; -- cgit v1.2.3 From 160912fc3d4ae492954c25cd78b90083869f0011 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Nov 2023 20:09:00 +0000 Subject: vfio/type1: account iommu allocations iommu allocations should be accounted in order to allow admins to monitor and limit the amount of iommu memory. Signed-off-by: Pasha Tatashin Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231130200900.2320829-1-pasha.tatashin@soleen.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index eacd6ec04de5..b2854d7939ce 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1436,7 +1436,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, npage << PAGE_SHIFT, prot | IOMMU_CACHE, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (ret) goto unwind; @@ -1750,7 +1750,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, size, - dma->prot | IOMMU_CACHE, GFP_KERNEL); + dma->prot | IOMMU_CACHE, + GFP_KERNEL_ACCOUNT); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1845,7 +1846,8 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head * continue; ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, + GFP_KERNEL_ACCOUNT); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); -- cgit v1.2.3 From 838bebb4c926a573839ff1bfe1b654a6ed0f9779 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 19 Dec 2023 11:32:39 +0200 Subject: virtio: Define feature bit for administration virtqueue Introduce VIRTIO_F_ADMIN_VQ which is used for administration virtqueue support. Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231219093247.170936-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_config.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 8881aea60f6f..2445f365bce7 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 41 +#define VIRTIO_TRANSPORT_F_END 42 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -114,4 +114,10 @@ * This feature indicates that the driver can reset a queue individually. */ #define VIRTIO_F_RING_RESET 40 + +/* + * This feature indicates that the device support administration virtqueues. + */ +#define VIRTIO_F_ADMIN_VQ 41 + #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From fd27ef6b44bec26915c5b2b22c13856d9f0ba17a Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 19 Dec 2023 11:32:40 +0200 Subject: virtio-pci: Introduce admin virtqueue Introduce support for the admin virtqueue. By negotiating VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates one administration virtqueue. Administration virtqueue implementation in virtio pci generic layer, enables multiple types of upper layer drivers such as vfio, net, blk to utilize it. Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231219093247.170936-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/virtio/virtio.c | 37 +++++++++++++++-- drivers/virtio/virtio_pci_common.c | 3 ++ drivers/virtio/virtio_pci_common.h | 15 ++++++- drivers/virtio/virtio_pci_modern.c | 75 +++++++++++++++++++++++++++++++++- drivers/virtio/virtio_pci_modern_dev.c | 24 ++++++++++- include/linux/virtio_config.h | 4 ++ include/linux/virtio_pci_modern.h | 2 + include/uapi/linux/virtio_pci.h | 5 +++ 8 files changed, 157 insertions(+), 8 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 3893dc29eb26..f4080692b351 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d) if (err) goto err; + if (dev->config->create_avq) { + err = dev->config->create_avq(dev); + if (err) + goto err; + } + err = drv->probe(dev); if (err) - goto err; + goto err_probe; /* If probe didn't do it, mark device DRIVER_OK ourselves. */ if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -316,6 +322,10 @@ static int virtio_dev_probe(struct device *_d) virtio_config_enable(dev); return 0; + +err_probe: + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -331,6 +341,9 @@ static void virtio_dev_remove(struct device *_d) drv->remove(dev); + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); + /* Driver should have reset device. */ WARN_ON_ONCE(dev->config->get_status(dev)); @@ -489,13 +502,20 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device); int virtio_device_freeze(struct virtio_device *dev) { struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + int ret; virtio_config_disable(dev); dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; - if (drv && drv->freeze) - return drv->freeze(dev); + if (drv && drv->freeze) { + ret = drv->freeze(dev); + if (ret) + return ret; + } + + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); return 0; } @@ -532,10 +552,16 @@ int virtio_device_restore(struct virtio_device *dev) if (ret) goto err; + if (dev->config->create_avq) { + ret = dev->config->create_avq(dev); + if (ret) + goto err; + } + if (drv->restore) { ret = drv->restore(dev); if (ret) - goto err; + goto err_restore; } /* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@ -546,6 +572,9 @@ int virtio_device_restore(struct virtio_device *dev) return 0; +err_restore: + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7a5593997e0e..fafd13d0e4d4 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -236,6 +236,9 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + if (vp_dev->is_avq(vdev, vq->index)) + continue; + if (vp_dev->per_vq_vectors) { int v = vp_dev->vqs[vq->index]->msix_vector; diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 4b773bd7c58c..7306128e63e9 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -41,6 +41,14 @@ struct virtio_pci_vq_info { unsigned int msix_vector; }; +struct virtio_pci_admin_vq { + /* Virtqueue info associated with this admin queue. */ + struct virtio_pci_vq_info info; + /* Name of the admin queue: avq.$vq_index. */ + char name[10]; + u16 vq_index; +}; + /* Our device structure */ struct virtio_pci_device { struct virtio_device vdev; @@ -58,9 +66,13 @@ struct virtio_pci_device { spinlock_t lock; struct list_head virtqueues; - /* array of all queues for house-keeping */ + /* Array of all virtqueues reported in the + * PCI common config num_queues field + */ struct virtio_pci_vq_info **vqs; + struct virtio_pci_admin_vq admin_vq; + /* MSI-X support */ int msix_enabled; int intx_enabled; @@ -86,6 +98,7 @@ struct virtio_pci_device { void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); + bool (*is_avq)(struct virtio_device *vdev, unsigned int index); }; /* Constants for MSI-X */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index ee6a386d250b..ce915018b5b0 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -19,6 +19,8 @@ #define VIRTIO_RING_NO_LEGACY #include "virtio_pci_common.h" +#define VIRTIO_AVQ_SGS_MAX 4 + static u64 vp_get_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -26,6 +28,16 @@ static u64 vp_get_features(struct virtio_device *vdev) return vp_modern_get_features(&vp_dev->mdev); } +static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return false; + + return index == vp_dev->admin_vq.vq_index; +} + static void vp_transport_features(struct virtio_device *vdev, u64 features) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -37,6 +49,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) if (features & BIT_ULL(VIRTIO_F_RING_RESET)) __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); + + if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ)) + __virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ); } static int __vp_check_common_size_one_feature(struct virtio_device *vdev, u32 fbit, @@ -69,6 +84,9 @@ static int vp_check_common_size(struct virtio_device *vdev) if (vp_check_common_size_one_feature(vdev, VIRTIO_F_RING_RESET, queue_reset)) return -EINVAL; + if (vp_check_common_size_one_feature(vdev, VIRTIO_F_ADMIN_VQ, admin_queue_num)) + return -EINVAL; + return 0; } @@ -345,6 +363,7 @@ static