Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin: "Several new features here: - virtio-balloon supports new stats - vdpa supports setting mac address - vdpa/mlx5 suspend/resume as well as MKEY ops are now faster - virtio_fs supports new sysfs entries for queue info - virtio/vsock performance has been improved And fixes, cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (34 commits) vsock/virtio: avoid queuing packets when intermediate queue is empty vsock/virtio: refactor virtio_transport_send_pkt_work fw_cfg: Constify struct kobj_type vdpa/mlx5: Postpone MR deletion vdpa/mlx5: Introduce init/destroy for MR resources vdpa/mlx5: Rename mr_mtx -> lock vdpa/mlx5: Extract mr members in own resource struct vdpa/mlx5: Rename function vdpa/mlx5: Delete direct MKEYs in parallel vdpa/mlx5: Create direct MKEYs in parallel MAINTAINERS: add virtio-vsock driver in the VIRTIO CORE section virtio_fs: add sysfs entries for queue information virtio_fs: introduce virtio_fs_put_locked helper vdpa: Remove unused declarations vdpa/mlx5: Parallelize VQ suspend/resume for CVQ MQ command vdpa/mlx5: Small improvement for change_num_qps() vdpa/mlx5: Keep notifiers during suspend but ignore vdpa/mlx5: Parallelize device resume vdpa/mlx5: Parallelize device suspend vdpa/mlx5: Use async API for vq modify commands ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-09-26 08:43:17 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-09-26 08:43:17 -0700
commit: 0181f8c809d6116a8347d8beb25a8c35ed22f7d7 (patch)
tree: 73760081feebcfe93ef3c1bc78c778659266e7d4
parent: 11a299a7933e03c83818b431e6a1c53ad387423d (diff)
parent: efcd71af38be403fa52223092f79ada446e121ba (diff)
download: linux-0181f8c809d6116a8347d8beb25a8c35ed22f7d7.tar.gz
linux-0181f8c809d6116a8347d8beb25a8c35ed22f7d7.tar.bz2
linux-0181f8c809d6116a8347d8beb25a8c35ed22f7d7.zip
20 files changed, 1092 insertions, 294 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4c37285a4747..9b899a1327a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24464,6 +24464,7 @@ F:	include/linux/vdpa.h
 F:	include/linux/virtio*.h
 F:	include/linux/vringh.h
 F:	include/uapi/linux/virtio_*.h
+F:	net/vmw_vsock/virtio*
 F:	tools/virtio/
 F:	tools/testing/selftests/drivers/net/virtio_net/
 
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5f43dfa22f79..85c525745b31 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -452,7 +452,7 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
 }
 
 /* kobj_type: ties together all properties required to register an entry */
-static struct kobj_type fw_cfg_sysfs_entry_ktype = {
+static const struct kobj_type fw_cfg_sysfs_entry_ktype = {
 	.default_groups = fw_cfg_sysfs_entry_groups,
 	.sysfs_ops = &fw_cfg_sysfs_attr_ops,
 	.release = fw_cfg_sysfs_release_entry,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 9af8ddb4a78f..a64d96effb9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1887,10 +1887,12 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 
 	throttle_op = mlx5_cmd_is_throttle_opcode(opcode);
 	if (throttle_op) {
-		/* atomic context may not sleep */
-		if (callback)
-			return -EINVAL;
-		down(&dev->cmd.vars.throttle_sem);
+		if (callback) {
+			if (down_trylock(&dev->cmd.vars.throttle_sem))
+				return -EBUSY;
+		} else {
+			down(&dev->cmd.vars.throttle_sem);
+		}
 	}
 
 	pages_queue = is_manage_pages(in);
@@ -2096,10 +2098,19 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
 {
 	struct mlx5_async_work *work = _work;
 	struct mlx5_async_ctx *ctx;
+	struct mlx5_core_dev *dev;
+	u16 opcode;
 
 	ctx = work->ctx;
-	status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out);
+	dev = ctx->dev;
+	opcode = work->opcode;
+	status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out);
 	work->user_callback(status, work);
+	/* Can't access "work" from this point on. It could have been freed in
+	 * the callback.
+	 */
+	if (mlx5_cmd_is_throttle_opcode(opcode))
+		up(&dev->cmd.vars.throttle_sem);
 	if (atomic_dec_and_test(&ctx->num_inflight))
 		complete(&ctx->inflight_done);
 }
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 35c8fbbba10e..f55d60922b87 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -44,6 +44,15 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
 	unsigned long flags;
 	int err, err1;
 
+	/*
+	 * Don't bother to submit the request to the device if the device is
+	 * not activated.
+	 */
+	if (vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_NEEDS_RESET) {
+		dev_info(&vdev->dev, "virtio pmem device needs a reset\n");
+		return -EIO;
+	}
+
 	might_sleep();
 	req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
 	if (!req_data)
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 0f347717021a..aa36de361c10 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -112,15 +112,12 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
 			    const void *src, int length);
 u8 ifcvf_get_status(struct ifcvf_hw *hw);
 void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
-void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
 void ifcvf_reset(struct ifcvf_hw *hw);
 u64 ifcvf_get_dev_features(struct ifcvf_hw *hw);
 u64 ifcvf_get_hw_features(struct ifcvf_hw *hw);
 int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features);
 u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
 int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
-struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
-int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
 u32 ifcvf_get_config_size(struct ifcvf_hw *hw);
 u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector);
 u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector);
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 50aac8fe57ef..2cedf7e2dbc4 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -83,10 +83,28 @@ enum {
 	MLX5_VDPA_NUM_AS = 2
 };
 
+struct mlx5_vdpa_mr_resources {
+	struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
+	unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
+
+	/* Pre-deletion mr list */
+	struct list_head mr_list_head;
+
+	/* Deferred mr list */
+	struct list_head mr_gc_list_head;
+	struct workqueue_struct *wq_gc;
+	struct delayed_work gc_dwork_ent;
+
+	struct mutex lock;
+
+	atomic_t shutdown;
+};
+
 struct mlx5_vdpa_dev {
 	struct vdpa_device vdev;
 	struct mlx5_core_dev *mdev;
 	struct mlx5_vdpa_resources res;
+	struct mlx5_vdpa_mr_resources mres;
 
 	u64 mlx_features;
 	u64 actual_features;
@@ -95,14 +113,23 @@ struct mlx5_vdpa_dev {
 	u16 max_idx;
 	u32 generation;
 
-	struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
-	struct list_head mr_list_head;
-	/* serialize mr access */
-	struct mutex mr_mtx;
 	struct mlx5_control_vq cvq;
 	struct workqueue_struct *wq;
-	unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
 	bool suspended;
+
+	struct mlx5_async_ctx async_ctx;
+};
+
+struct mlx5_vdpa_async_cmd {
+	int err;
+	struct mlx5_async_work cb_work;
+	struct completion cmd_done;
+
+	void *in;
+	size_t inlen;
+
+	void *out;
+	size_t outlen;
 };
 
 int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
@@ -121,7 +148,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
 int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
 					 struct vhost_iotlb *iotlb);
+int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev);
 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev);
 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
 		      struct mlx5_vdpa_mr *mr);
 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
@@ -134,6 +163,14 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
 				unsigned int asid);
 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev);
 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
+int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
+			      struct mlx5_vdpa_async_cmd *cmds,
+			      int num_cmds);
+
+#define mlx5_vdpa_err(__dev, format, ...)                                                          \
+	dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__,        \
+		 current->pid, ##__VA_ARGS__)
+
 
 #define mlx5_vdpa_warn(__dev, format, ...)                                                         \
 	dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__,     \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 4758914ccf86..2dd21e0b399e 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -49,17 +49,23 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
 	}
 }
 
-static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
-{
-	int inlen;
+struct mlx5_create_mkey_mem {
+	u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
+	u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
+	__be64 mtt[];
+};
+
+struct mlx5_destroy_mkey_mem {
+	u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)];
+	u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)];
+};
+
+static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
+				  struct mlx5_vdpa_direct_mr *mr,
+				  struct mlx5_create_mkey_mem *mem)
+{
+	void *in = &mem->in;
 	void *mkc;
-	void *in;
-	int err;
-
-	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
-	in = kvzalloc(inlen, GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
 
 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -76,18 +82,36 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
 		 get_octo_len(mr->end - mr->start, mr->log_size));
 	populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
-	err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
-	kvfree(in);
-	if (err) {
-		mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
-		return err;
-	}
 
-	return 0;
+	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
+}
+
+static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
+				 struct mlx5_vdpa_direct_mr *mr,
+				 struct mlx5_create_mkey_mem *mem)
+{
+	u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
+
+	mr->mr = mlx5_idx_to_mkey(mkey_index);
+}
+
+static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev,
+				   struct mlx5_vdpa_direct_mr *mr,
+				   struct mlx5_destroy_mkey_mem *mem)
+{
+	void *in = &mem->in;
+
+	MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
+	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr));
 }
 
 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
 {
+	if (!mr->mr)
+		return;
+
 	mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
 }
 
@@ -179,6 +203,123 @@ static int klm_byte_size(int nklms)
 	return 16 * ALIGN(nklms, 4);
 }
 
+#define MLX5_VDPA_MTT_ALIGN 16
+
+static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
+{
+	struct mlx5_vdpa_async_cmd *cmds;
+	struct mlx5_vdpa_direct_mr *dmr;
+	int err = 0;
+	int i = 0;
+
+	cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
+	if (!cmds)
+		return -ENOMEM;
+
+	list_for_each_entry(dmr, &mr->head, list) {
+		struct mlx5_create_mkey_mem *cmd_mem;
+		int mttlen, mttcount;
+
+		mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN);
+		mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
+		cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
+		if (!cmd_mem) {
+			err = -ENOMEM;
+			goto done;
+		}
+
+		cmds[i].out = cmd_mem->out;
+		cmds[i].outlen = sizeof(cmd_mem->out);
+		cmds[i].in = cmd_mem->in;
+		cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
+
+		fill_create_direct_mr(mvdev, dmr, cmd_mem);
+
+		i++;
+	}
+
+	err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
+	if (err) {
+
+		mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
+		goto done;
+	}
+
+	i = 0;
+	list_for_each_entry(dmr, &mr->head, list) {
+		struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
+		struct mlx5_create_mkey_mem *cmd_mem;
+
+		cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
+
+		if (!cmd->err) {
+			create_direct_mr_end(mvdev, dmr, cmd_mem);
+		} else {
+			err = err ? err : cmd->err;
+			mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
+				dmr->start, dmr->end, cmd->err);
+		}
+	}
+
+done:
+	for (i = i-1; i >= 0; i--) {
+		struct mlx5_create_mkey_mem *cmd_mem;
+
+		cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
+		kvfree(cmd_mem);
+	}
+
+	kvfree(cmds);
+	return err;
+}
+
+DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T))
+DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T))
+
+static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
+{
+	struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL;
+	struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL;
+	struct mlx5_vdpa_direct_mr *dmr;
+	int err = 0;
+	int i = 0;
+
+	cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
+	cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL);
+	if (!cmds || !cmd_mem)
+		return -ENOMEM;
+
+	list_for_each_entry(dmr, &mr->head, list) {
+		cmds[i].out = cmd_mem[i].out;
+		cmds[i].outlen = sizeof(cmd_mem[i].out);
+		cmds[i].in = cmd_mem[i].in;
+		cmds[i].inlen = sizeof(cmd_mem[i].in);
+		fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]);
+		i++;
+	}
+
+	err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
+	if (err) {
+
+		mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err);
+		return err;
+	}
+
+	i = 0;
+	list_for_each_entry(dmr, &mr->head, list) {
+		struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
+
+		dmr->mr = 0;
+		if (cmd->err) {
+			err = err ? err : cmd->err;
+			mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n",
+				dmr->start, dmr->end, cmd->err);
+		}
+	}
+
+	return err;
+}
+
 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 {
 	int inlen;
@@ -279,14 +420,8 @@ done:
 		goto err_map;
 	}
 
-	err = create_direct_mr(mvdev, mr);
-	if (err)
-		goto err_direct;
-
 	return 0;
 
-err_direct:
-	dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
 err_map:
 	sg_free_table(&mr->sg_head);
 	return err;
@@ -401,6 +536,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
 	if (err)
 		goto err_chain;
 
+	err = create_direct_keys(mvdev, mr);
+	if (err)
+		goto err_chain;
+
 	/* Create the memory key that defines the guests's address space. This
 	 * memory key refers to the direct keys that contain the MTT
 	 * translations
@@ -489,6 +628,7 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
 	struct mlx5_vdpa_direct_mr *n;
 
 	destroy_indirect_key(mvdev, mr);
+	destroy_direct_keys(mvdev, mr);
 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
 		list_del_init(&dmr->list);
 		unmap_direct_mr(mvdev, dmr);
@@ -513,22 +653,58 @@ static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_
 	kfree(mr);
 }
 
+/* There can be multiple .set_map() operations in quick succession.
+ * This large delay is a simple way to prevent the MR cleanup from blocking
+ * .set_map() MR creation in this scenario.
+ */
+#define MLX5_VDPA_MR_GC_TRIGGER_MS 2000
+
+static void mlx5_vdpa_mr_gc_handler(struct work_struct *work)
+{
+	struct mlx5_vdpa_mr_resources *mres;
+	struct mlx5_vdpa_mr *mr, *tmp;
+	struct mlx5_vdpa_dev *mvdev;
+
+	mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work);
+
+	if (atomic_read(&mres->shutdown)) {
+		mutex_lock(&mres->lock);
+	} else if (!mutex_trylock(&mres->lock)) {
+		queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
+				   msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
+		return;
+	}
+
+	mvdev = container_of(mres, struct mlx5_vdpa_dev, mres);
+
+	list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) {
+		_mlx5_vdpa_destroy_mr(mvdev, mr);
+	}
+
+	mutex_unlock(&mres->lock);
+}
+
 static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
 			      struct mlx5_vdpa_mr *mr)
 {
+	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
+
 	if (!mr)
 		return;
 
-	if (refcount_dec_and_test(&mr->refcount))
-		_mlx5_vdpa_destroy_mr(mvdev, mr);
+	if (refcount_dec_and_test(&mr->refcount)) {
+		list_move_tail(&mr->mr_list, &mres->mr_gc_list_head);
+		queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
+				   msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
+	}
 }
 
 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
 		      struct mlx5_vdpa_mr *mr)
 {
-	mutex_lock(&mvdev->mr_mtx);
+	mutex_lock(&mvdev->mres.lock);
 	_mlx5_vdpa_put_mr(mvdev, mr);
-	mutex_unlock(&mvdev->mr_mtx);
+	mutex_unlock(&mvdev->mres.lock);
 }
 
 static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
@@ -543,44 +719,47 @@ static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
 		      struct mlx5_vdpa_mr *mr)
 {
-	mutex_lock(&mvdev->mr_mtx);
+	mutex_lock(&mvdev->mres.lock);
 	_mlx5_vdpa_get_mr(mvdev, mr);
-	mutex_unlock(&mvdev->mr_mtx);
+	mutex_unlock(&mvdev->mres.lock);
 }
 
 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
 			 struct mlx5_vdpa_mr *new_mr,
 			 unsigned int asid)
 {
-	struct mlx5_vdpa_mr *old_mr = mvdev->mr[asid];
+	struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid];
 
-	mutex_lock(&mvdev->mr_mtx);
+	mutex_lock(&mvdev->mres.lock);
 
 	_mlx5_vdpa_put_mr(mvdev, old_mr);
-	mvdev->mr[asid] = new_mr;
+	mvdev->mres.mr[asid] = new_mr;
 
-	mutex_unlock(&mvdev->mr_mtx);
+	mutex_unlock(&mvdev->mres.lock);
 }
 
 static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
 {
 	struct mlx5_vdpa_mr *mr;
 
-	mutex_lock(&mvdev->mr_mtx);
+	mutex_lock(&mvdev->mres.lock);
 
-	list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) {
+	list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) {
 
 		mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
 				      "mr: %p, mkey: 0x%x, refcount: %u\n",
 				       mr, mr->mkey, refcount_read(&mr->refcount));
 	}
 
-	mutex_unlock(&mvdev->mr_mtx);
+	mutex_unlock(&mvdev->mres.lock);
 
 }
 
-void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
+void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev)
 {
+	if (!mvdev->res.valid)
+		return;
+
 	for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
 		mlx5_vdpa_update_mr(mvdev, NULL, i);
 
@@ -613,7 +792,7 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
 	if (err)
 		goto err_iotlb;
 
-	list_add_tail(&mr->mr_list, &mvdev->mr_list_head);
+	list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head);
 
 	return 0;
 
@@ -639,9 +818,9 @@ struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&mvdev->mr_mtx);
+	mutex_lock(&mvdev->mres.lock);
 	err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
-	mutex_unlock(&mvdev->mr_mtx);
+	mutex_unlock(&mvdev->mres.lock);
 
 	if (err)
 		goto out_err;
@@ -661,7 +840,7 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
 {
 	int err;
 
-	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+	if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
 		return 0;
 
 	spin_lock(&mvdev->cvq.iommu_lock);
@@ -703,3 +882,33 @@ int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
 
 	return 0;
 }
+
+int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev)
+{
+	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
+
+	mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc");
+	if (!mres->wq_gc)
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler);
+
+	mutex_init(&mres->lock);
+
+	INIT_LIST_HEAD(&mres->mr_list_head);
+	INIT_LIST_HEAD(&mres->mr_gc_list_head);
+
+	return 0;
+}
+
+void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
+{
+	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
+
+	atomic_set(&mres->shutdown, 1);
+
+	flush_delayed_work(&mres->gc_dwork_ent);
+	destroy_workqueue(mres->wq_gc);
+	mres->wq_gc = NULL;
+	mutex_destroy(&mres->lock);
+}
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 5c5a41b64bfc..aeae31d0cefa 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -256,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
 		mlx5_vdpa_warn(mvdev, "resources already allocated\n");
 		return -EINVAL;
 	}
-	mutex_init(&mvdev->mr_mtx);
 	res->uar = mlx5_get_uars_page(mdev);
 	if (IS_ERR(res->uar)) {
 		err = PTR_ERR(res->uar);
@@ -301,7 +300,6 @@ err_pd:
 err_uctx:
 	mlx5_put_uars_page(mdev, res->uar);
 err_uars:
-	mutex_destroy(&mvdev->mr_mtx);
 	return err;
 }
 
@@ -318,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
 	dealloc_pd(mvdev, res->pdn, res->uid);
 	destroy_uctx(mvdev, res->uid);
 	mlx5_put_uars_page(mvdev->mdev, res->uar);
-	mutex_destroy(&mvdev->mr_mtx);
 	res->valid = false;
 }
+
+static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context)
+{
+	struct mlx5_vdpa_async_cmd *cmd =
+		container_of(context, struct mlx5_vdpa_async_cmd, cb_work);
+
+	cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out);
+	complete(&cmd->cmd_done);
+}
+
+static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev,
+			   struct mlx5_vdpa_async_cmd *cmds,
+			   int issued,
+			   int *completed)
+
+{
+	struct mlx5_vdpa_async_cmd *cmd = &cmds[issued];
+	int err;
+
+retry:
+	err = mlx5_cmd_exec_cb(&mvdev->async_ctx,
+			       cmd->in, cmd->inlen,
+			       cmd->out, cmd->outlen,
+			       virtqueue_cmd_callback,
+			       &cmd->cb_work);
+	if (err == -EBUSY) {
+		if (*completed < issued) {
+			/* Throttled by own commands: wait for oldest completion. */
+			wait_for_completion(&cmds[*completed].cmd_done);
+			(*completed)++;
+
+			goto retry;
+		} else {
+			/* Throttled by external commands: switch to sync api. */
+			err = mlx5_cmd_exec(mvdev->mdev,
+					    cmd->in, cmd->inlen,
+					    cmd->out, cmd->outlen);
+			if (!err)
+				(*completed)++;
+		}
+	}
+
+	return err;
+}
+
+int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
+			      struct mlx5_vdpa_async_cmd *cmds,
+			      int num_cmds)
+{
+	int completed = 0;
+	int issued = 0;
+	int err = 0;
+
+	for (int i = 0; i < num_cmds; i++)
+		init_completion(&cmds[i].cmd_done);
+
+	while (issued < num_cmds) {
+
+		err = issue_async_cmd(mvdev, cmds, issued, &completed);
+		if (err) {
+			mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n",
+				      issued, num_cmds, err);
+			break;
+		}
+
+		issued++;
+	}
+
+	while (completed < issued)
+		wait_for_completion(&cmds[completed++].cmd_done);
+
+	return err;
+}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index fa78e8288ebb..dee019977716 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -941,11 +941,11 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev,
 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
 
-		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
+		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
 		if (vq_mr)
 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
 
-		vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
+		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
 		if (vq_desc_mr &&
 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
@@ -953,11 +953,11 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev,
 		/* If there is no mr update, make sure that the existing ones are set
 		 * modify to ready.
 		 */
-		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
+		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
 		if (vq_mr)
 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
 
-		vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
+		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
 		if (vq_desc_mr)
 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
 	}
@@ -1184,40 +1184,92 @@ struct mlx5_virtq_attr {
 	u16 used_index;
 };
 
-static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
-			   struct mlx5_virtq_attr *attr)
-{
-	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
-	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
-	void *out;
-	void *obj_context;
-	void *cmd_hdr;
-	int err;
+struct mlx5_virtqueue_query_mem {
+	u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
+	u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
+};
 
-	out = kzalloc(outlen, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
+struct mlx5_virtqueue_modify_mem {
+	u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
+	u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
+};
 
-	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
+static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
+				     struct mlx5_vdpa_virtqueue *mvq,
+				     struct mlx5_virtqueue_query_mem *cmd)
+{
+	void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
 
 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
-	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
-	if (err)
-		goto err_cmd;
+}
+
+static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
+				struct mlx5_virtqueue_query_mem *cmd,
+				struct mlx5_virtq_attr *attr)
+{
+	void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
 
-	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
 	memset(attr, 0, sizeof(*attr));
 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
-	kfree(out);
-	return 0;
+}
 
-err_cmd:
-	kfree(out);
+static int query_virtqueues(struct mlx5_vdpa_net *ndev,
+			    int start_vq,
+			    int num_vqs,
+			    struct mlx5_virtq_attr *attrs)
+{
+	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+	struct mlx5_virtqueue_query_mem *cmd_mem;
+	struct mlx5_vdpa_async_cmd *cmds;
+	int err = 0;
+
+	WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n",
+	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
+
+	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
+	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
+	if (!cmds || !cmd_mem) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (int i = 0; i < num_vqs; i++) {
+		cmds[i].in = &cmd_mem[i].in;
+		cmds[i].inlen = sizeof(cmd_mem[i].in);
+		cmds[i].out = &cmd_mem[i].out;
+		cmds[i].outlen = sizeof(cmd_mem[i].out);
+		fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
+	}
+
+	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
+	if (err) {
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-09-26 08:43:17 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-09-26 08:43:17 -0700
commit	0181f8c809d6116a8347d8beb25a8c35ed22f7d7 (patch)
tree	73760081feebcfe93ef3c1bc78c778659266e7d4
parent	11a299a7933e03c83818b431e6a1c53ad387423d (diff)
parent	efcd71af38be403fa52223092f79ada446e121ba (diff)
download	linux-0181f8c809d6116a8347d8beb25a8c35ed22f7d7.tar.gz linux-0181f8c809d6116a8347d8beb25a8c35ed22f7d7.tar.bz2 linux-0181f8c809d6116a8347d8beb25a8c35ed22f7d7.zip