summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
commit8ccd54fe45713cd458015b5b08d6098545e70543 (patch)
tree12a034b2ee42d681b2e915815c4529d6f246bcc4
parent0835b5ee8704aef4e19b369237a762c52c7b6fb1 (diff)
parentc82729e06644f4e087f5ff0f91b8fb15e03b8890 (diff)
downloadlinux-8ccd54fe45713cd458015b5b08d6098545e70543.tar.gz
linux-8ccd54fe45713cd458015b5b08d6098545e70543.tar.bz2
linux-8ccd54fe45713cd458015b5b08d6098545e70543.zip
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: "virtio,vhost,vdpa: features, fixes, and cleanups: - reduction in interrupt rate in virtio - perf improvement for VDUSE - scalability for vhost-scsi - non power of 2 ring support for packed rings - better management for mlx5 vdpa - suspend for snet - VIRTIO_F_NOTIFICATION_DATA - shared backend with vdpa-sim-blk - user VA support in vdpa-sim - better struct packing for virtio and fixes, cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (52 commits) vhost_vdpa: fix unmap process in no-batch mode MAINTAINERS: make me a reviewer of VIRTIO CORE AND NET DRIVERS tools/virtio: fix build caused by virtio_ring changes virtio_ring: add a struct device forward declaration vdpa_sim_blk: support shared backend vdpa_sim: move buffer allocation in the devices vdpa/snet: use likely/unlikely macros in hot functions vdpa/snet: implement kick_vq_with_data callback virtio-vdpa: add VIRTIO_F_NOTIFICATION_DATA feature support virtio: add VIRTIO_F_NOTIFICATION_DATA feature support vdpa/snet: support the suspend vDPA callback vdpa/snet: support getting and setting VQ state MAINTAINERS: add vringh.h to Virtio Core and Net Drivers vringh: address kdoc warnings vdpa: address kdoc warnings virtio_ring: don't update event idx on get_buf vdpa_sim: add support for user VA vdpa_sim: replace the spinlock with a mutex to protect the state vdpa_sim: use kthread worker vdpa_sim: make devices agnostic for work management ...
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/s390/virtio/virtio_ccw.c22
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c261
-rw-r--r--drivers/vdpa/solidrun/Makefile1
-rw-r--r--drivers/vdpa/solidrun/snet_ctrl.c330
-rw-r--r--drivers/vdpa/solidrun/snet_hwmon.c2
-rw-r--r--drivers/vdpa/solidrun/snet_main.c146
-rw-r--r--drivers/vdpa/solidrun/snet_vdpa.h20
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c168
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.h14
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_blk.c93
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c38
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c414
-rw-r--r--drivers/vhost/scsi.c102
-rw-r--r--drivers/vhost/vdpa.c44
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--drivers/vhost/vringh.c191
-rw-r--r--drivers/virtio/virtio_mmio.c18
-rw-r--r--drivers/virtio/virtio_pci_modern.c22
-rw-r--r--drivers/virtio/virtio_ring.c89
-rw-r--r--drivers/virtio/virtio_vdpa.c120
-rw-r--r--include/linux/vdpa.h52
-rw-r--r--include/linux/virtio.h16
-rw-r--r--include/linux/virtio_ring.h3
-rw-r--r--include/linux/vringh.h26
-rw-r--r--include/uapi/linux/virtio_config.h6
-rw-r--r--lib/group_cpus.c1
-rw-r--r--tools/include/linux/types.h5
-rw-r--r--tools/virtio/linux/compiler.h2
-rw-r--r--tools/virtio/linux/kernel.h5
-rw-r--r--tools/virtio/linux/uaccess.h11
-rw-r--r--tools/virtio/virtio_test.c12
32 files changed, 1761 insertions, 481 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index a53a1a29b10c..23d628857400 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22212,6 +22212,7 @@ F: include/uapi/linux/virtio_console.h
VIRTIO CORE AND NET DRIVERS
M: "Michael S. Tsirkin" <mst@redhat.com>
M: Jason Wang <jasowang@redhat.com>
+R: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
L: virtualization@lists.linux-foundation.org
S: Maintained
F: Documentation/ABI/testing/sysfs-bus-vdpa
@@ -22225,6 +22226,7 @@ F: drivers/vdpa/
F: drivers/virtio/
F: include/linux/vdpa.h
F: include/linux/virtio*.h
+F: include/linux/vringh.h
F: include/uapi/linux/virtio_*.h
F: tools/virtio/
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 954fc31b4bc7..02922768b129 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -391,7 +391,7 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
}
-static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
+static inline bool virtio_ccw_do_kvm_notify(struct virtqueue *vq, u32 data)
{
struct virtio_ccw_vq_info *info = vq->priv;
struct virtio_ccw_device *vcdev;
@@ -402,12 +402,22 @@ static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
BUILD_BUG_ON(sizeof(struct subchannel_id) != sizeof(unsigned int));
info->cookie = kvm_hypercall3(KVM_S390_VIRTIO_CCW_NOTIFY,
*((unsigned int *)&schid),
- vq->index, info->cookie);
+ data, info->cookie);
if (info->cookie < 0)
return false;
return true;
}
+static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
+{
+ return virtio_ccw_do_kvm_notify(vq, vq->index);
+}
+
+static bool virtio_ccw_kvm_notify_with_data(struct virtqueue *vq)
+{
+ return virtio_ccw_do_kvm_notify(vq, vring_notification_data(vq));
+}
+
static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
struct ccw1 *ccw, int index)
{
@@ -495,6 +505,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
struct ccw1 *ccw)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
+ bool (*notify)(struct virtqueue *vq);
int err;
struct virtqueue *vq = NULL;
struct virtio_ccw_vq_info *info;
@@ -502,6 +513,11 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
unsigned long flags;
bool may_reduce;
+ if (__virtio_test_bit(vdev, VIRTIO_F_NOTIFICATION_DATA))
+ notify = virtio_ccw_kvm_notify_with_data;
+ else
+ notify = virtio_ccw_kvm_notify;
+
/* Allocate queue. */
info = kzalloc(sizeof(struct virtio_ccw_vq_info), GFP_KERNEL);
if (!info) {
@@ -524,7 +540,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
may_reduce = vcdev->revision > 0;
vq = vring_create_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN,
vdev, true, may_reduce, ctx,
- virtio_ccw_kvm_notify, callback, name);
+ notify, callback, name);
if (!vq) {
/* For now, we fail if we can't get the requested size. */
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 195963b82b63..e29e32b306ad 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -778,12 +778,28 @@ static bool vq_is_tx(u16 idx)
return idx % 2;
}
-static u16 get_features_12_3(u64 features)
+enum {
+ MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
+ MLX5_VIRTIO_NET_F_HOST_ECN = 4,
+ MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
+ MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
+ MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
+ MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
+ MLX5_VIRTIO_NET_F_CSUM = 10,
+ MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
+ MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
+};
+
+static u16 get_features(u64 features)
{
- return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
+ return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
}
static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
@@ -797,6 +813,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
void *obj_context;
+ u16 mlx_features;
void *cmd_hdr;
void *vq_ctx;
void *in;
@@ -812,6 +829,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
goto err_alloc;
}
+ mlx_features = get_features(ndev->mvdev.actual_features);
cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
@@ -822,7 +840,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
- get_features_12_3(ndev->mvdev.actual_features));
+ mlx_features >> 3);
+ MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
+ mlx_features & 7);
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
@@ -2171,23 +2191,27 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
return MLX5_VDPA_DATAVQ_GROUP;
}
-enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
- MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
- MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
- MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
-};
-
static u64 mlx_to_vritio_features(u16 dev_features)
{
u64 result = 0;
- if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
+ result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
return result;
@@ -2298,6 +2322,113 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
}
}
+static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+ int err;
+
+ MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+ if (err)
+ return 0;
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
+{
+ if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
+ VPORT_STATE_UP)
+ return true;
+
+ return false;
+}
+
+static void update_carrier(struct work_struct *work)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_vdpa_net *ndev;
+
+ wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
+ mvdev = wqent->mvdev;
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ if (get_link_state(mvdev))
+ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ else
+ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+
+ if (ndev->config_cb.callback)
+ ndev->config_cb.callback(ndev->config_cb.private);
+
+ kfree(wqent);
+}
+
+static int queue_link_work(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+
+ wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+ if (!wqent)
+ return -ENOMEM;
+
+ wqent->mvdev = &ndev->mvdev;
+ INIT_WORK(&wqent->work, update_carrier);
+ queue_work(ndev->mvdev.wq, &wqent->work);
+ return 0;
+}
+
+static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
+{
+ struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
+ struct mlx5_eqe *eqe = param;
+ int ret = NOTIFY_DONE;
+
+ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ if (queue_link_work(ndev))
+ return NOTIFY_DONE;
+
+ ret = NOTIFY_OK;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return ret;
+ }
+ return ret;
+}
+
+static void register_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
+ return;
+
+ ndev->nb.notifier_call = event_handler;
+ mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
+ ndev->nb_registered = true;
+ queue_link_work(ndev);
+}
+
+static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!ndev->nb_registered)
+ return;
+
+ ndev->nb_registered = false;
+ mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
+ if (ndev->mvdev.wq)
+ flush_workqueue(ndev->mvdev.wq);
+}
+
static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2567,10 +2698,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
goto err_setup;
}
+ register_link_notifier(ndev);
err = setup_driver(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
- goto err_setup;
+ goto err_driver;
}
} else {
mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
@@ -2582,6 +2714,8 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock);
return;
+err_driver:
+ unregister_link_notifier(ndev);
err_setup:
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
@@ -2607,6 +2741,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
mlx5_vdpa_info(mvdev, "performing device reset\n");
down_write(&ndev->reslock);
+ unregister_link_notifier(ndev);
teardown_driver(ndev);
clear_vqs_ready(ndev);
mlx5_vdpa_destroy_mr(&ndev->mvdev);
@@ -2861,9 +2996,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
mlx5_vdpa_info(mvdev, "suspending device\n");
down_write(&ndev->reslock);
- ndev->nb_registered = false;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- flush_workqueue(ndev->mvdev.wq);
+ unregister_link_notifier(ndev);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
suspend_vq(ndev, mvq);
@@ -3000,84 +3133,6 @@ struct mlx5_vdpa_mgmtdev {
struct mlx5_vdpa_net *ndev;
};
-static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
- u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
- int err;
-
- MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
- MLX5_SET(query_vport_state_in, in, op_mod, opmod);
- MLX5_SET(query_vport_state_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_vport_state_in, in, other_vport, 1);
-
- err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
- if (err)
- return 0;
-
- return MLX5_GET(query_vport_state_out, out, state);
-}
-
-static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
-{
- if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
- VPORT_STATE_UP)
- return true;
-
- return false;
-}
-
-static void update_carrier(struct work_struct *work)
-{
- struct mlx5_vdpa_wq_ent *wqent;
- struct mlx5_vdpa_dev *mvdev;
- struct mlx5_vdpa_net *ndev;
-
- wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
- mvdev = wqent->mvdev;
- ndev = to_mlx5_vdpa_ndev(mvdev);
- if (get_link_state(mvdev))
- ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
- else
- ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
-
- if (ndev->nb_registered && ndev->config_cb.callback)
- ndev->config_cb.callback(ndev->config_cb.private);
-
- kfree(wqent);
-}
-
-static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
-{
- struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
- struct mlx5_eqe *eqe = param;
- int ret = NOTIFY_DONE;
- struct mlx5_vdpa_wq_ent *wqent;
-
- if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
- if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
- return NOTIFY_DONE;
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
- if (!wqent)
- return NOTIFY_DONE;
-
- wqent->mvdev = &ndev->mvdev;
- INIT_WORK(&wqent->work, update_carrier);
- queue_work(ndev->mvdev.wq, &wqent->work);
- ret = NOTIFY_OK;
- break;
- default:
- return NOTIFY_DONE;
- }
- return ret;
- }
- return ret;
-}
-
static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
{
int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
@@ -3127,6 +3182,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
return -EINVAL;
}
device_features &= add_config->device_features;
+ } else {
+ device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
}
if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
@@ -3258,9 +3315,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
goto err_res2;
}
- ndev->nb.notifier_call = event_handler;
- mlx5_notifier_register(mdev, &ndev->nb);
- ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
@@ -3294,10 +3348,7 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
mlx5_vdpa_remove_debugfs(ndev->debugfs);
ndev->debugfs = NULL;
- if (ndev->nb_registered) {
- ndev->nb_registered = false;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- }
+ unregister_link_notifier(ndev);
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
diff --git a/drivers/vdpa/solidrun/Makefile b/drivers/vdpa/solidrun/Makefile
index c0aa3415bf7b..9116252cd5fa 100644
--- a/drivers/vdpa/solidrun/Makefile
+++ b/drivers/vdpa/solidrun/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SNET_VDPA) += snet_vdpa.o
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_main.o
+snet_vdpa-$(CONFIG_SNET_VDPA) += snet_ctrl.o
ifdef CONFIG_HWMON
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_hwmon.o
endif
diff --git a/drivers/vdpa/solidrun/snet_ctrl.c b/drivers/vdpa/solidrun/snet_ctrl.c
new file mode 100644
index 000000000000..3858738643b4
--- /dev/null
+++ b/drivers/vdpa/solidrun/snet_ctrl.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SolidRun DPU driver for control plane
+ *
+ * Copyright (C) 2022-2023 SolidRun
+ *
+ * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
+ *
+ */
+
+#include <linux/iopoll.h>
+
+#include "snet_vdpa.h"
+
+enum snet_ctrl_opcodes {
+ SNET_CTRL_OP_DESTROY = 1,
+ SNET_CTRL_OP_READ_VQ_STATE,
+ SNET_CTRL_OP_SUSPEND,
+};
+
+#define SNET_CTRL_TIMEOUT 2000000
+
+#define SNET_CTRL_DATA_SIZE_MASK 0x0000FFFF
+#define SNET_CTRL_IN_PROCESS_MASK 0x00010000
+#define SNET_CTRL_CHUNK_RDY_MASK 0x00020000
+#define SNET_CTRL_ERROR_MASK 0x0FFC0000
+
+#define SNET_VAL_TO_ERR(val) (-(((val) & SNET_CTRL_ERROR_MASK) >> 18))
+#define SNET_EMPTY_CTRL(val) (((val) & SNET_CTRL_ERROR_MASK) || \
+ !((val) & SNET_CTRL_IN_PROCESS_MASK))
+#define SNET_DATA_READY(val) ((val) & (SNET_CTRL_ERROR_MASK | SNET_CTRL_CHUNK_RDY_MASK))
+
+/* Control register used to read data from the DPU */
+struct snet_ctrl_reg_ctrl {
+ /* Chunk size in 4B words */
+ u16 data_size;
+ /* We are in the middle of a command */
+ u16 in_process:1;
+ /* A data chunk is ready and can be consumed */
+ u16 chunk_ready:1;
+ /* Error code */
+ u16 error:10;
+ /* Saved for future usage */
+ u16 rsvd:4;
+};
+
+/* Opcode register */
+struct snet_ctrl_reg_op {
+ u16 opcode;
+ /* Only if VQ index is relevant for the command */
+ u16 vq_idx;
+};
+
+struct snet_ctrl_regs {
+ struct snet_ctrl_reg_op op;
+ struct snet_ctrl_reg_ctrl ctrl;
+ u32 rsvd;
+ u32 data[];
+};
+
+static struct snet_ctrl_regs __iomem *snet_get_ctrl(struct snet *snet)
+{
+ return snet->bar + snet->psnet->cfg.ctrl_off;
+}
+
+static int snet_wait_for_empty_ctrl(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_EMPTY_CTRL(val), 10,
+ SNET_CTRL_TIMEOUT);
+}
+
+static int snet_wait_for_empty_op(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->op, val, !val, 10, SNET_CTRL_TIMEOUT);
+}
+
+static int snet_wait_for_data(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_DATA_READY(val), 10,
+ SNET_CTRL_TIMEOUT);
+}
+
+static u32 snet_read32_word(struct snet_ctrl_regs __iomem *ctrl_regs, u16 word_idx)
+{
+ return ioread32(&ctrl_regs->data[word_idx]);
+}
+
+static u32 snet_read_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs)
+{
+ return ioread32(&ctrl_regs->ctrl);
+}
+
+static void snet_write_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
+{
+ iowrite32(val, &ctrl_regs->ctrl);
+}
+
+static void snet_write_op(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
+{
+ iowrite32(val, &ctrl_regs->op);
+}
+
+static int snet_wait_for_dpu_completion(struct snet_ctrl_regs __iomem *ctrl_regs)
+{
+ /* Wait until the DPU finishes completely.
+ * It will clear the opcode register.
+ */
+ return snet_wait_for_empty_op(ctrl_regs);
+}
+
+/* Reading ctrl from the DPU:
+ * buf_size must be 4B aligned
+ *
+ * Steps:
+ *
+ * (1) Verify that the DPU is not in the middle of another operation by
+ * reading the in_process and error bits in the control register.
+ * (2) Write the request opcode and the VQ idx in the opcode register
+ * and write the buffer size in the control register.
+ * (3) Start readind chunks of data, chunk_ready bit indicates that a
+ * data chunk is available, we signal that we read the data by clearing the bit.
+ * (4) Detect that the transfer is completed when the in_process bit
+ * in the control register is cleared or when the an error appears.
+ */
+static int snet_ctrl_read_from_dpu(struct snet *snet, u16 opcode, u16 vq_idx, void *buffer,
+ u32 buf_size)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ u32 *bfr_ptr = (u32 *)buffer;
+ u32 val;
+ u16 buf_words;
+ int ret;
+ u16 words, i, tot_words = 0;
+
+ /* Supported for config 2+ */
+ if (!SNET_CFG_VER(snet, 2))
+ return -EOPNOTSUPP;
+
+ if (!IS_ALIGNED(buf_size, 4))
+ return -EINVAL;
+
+ mutex_lock(&snet->ctrl_lock);
+
+ buf_words = buf_size / 4;
+
+ /* Make sure control register is empty */
+ ret = snet_wait_for_empty_ctrl(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
+ goto exit;
+ }
+
+ /* We need to write the buffer size in the control register, and the opcode + vq index in
+ * the opcode register.
+ * We use a spinlock to serialize the writes.
+ */
+ spin_lock(&snet->ctrl_spinlock);
+
+ snet_write_ctrl(regs, buf_words);
+ snet_write_op(regs, opcode | (vq_idx << 16));
+
+ spin_unlock(&snet->ctrl_spinlock);
+
+ while (buf_words != tot_words) {
+ ret = snet_wait_for_data(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for control data\n");
+ goto exit;
+ }
+
+ val = snet_read_ctrl(regs);
+
+ /* Error? */
+ if (val & SNET_CTRL_ERROR_MASK) {
+ ret = SNET_VAL_TO_ERR(val);
+ SNET_WARN(pdev, "Error while reading control data from DPU, err %d\n", ret);
+ goto exit;
+ }
+
+ words = min_t(u16, val & SNET_CTRL_DATA_SIZE_MASK, buf_words - tot_words);
+
+ for (i = 0; i < words; i++) {
+ *bfr_ptr = snet_read32_word(regs, i);
+ bfr_ptr++;
+ }
+
+ tot_words += words;
+
+ /* Is the job completed? */
+ if (!(val & SNET_CTRL_IN_PROCESS_MASK))
+ break;
+
+ /* Clear the chunk ready bit and continue */
+ val &= ~SNET_CTRL_CHUNK_RDY_MASK;
+ snet_write_ctrl(regs, val);
+ }
+
+ ret = snet_wait_for_dpu_completion(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for the DPU to complete a control command\n");
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+/* Send a control message to the DPU using the old mechanism
+ * used with config version 1.
+ */
+static int snet_send_ctrl_msg_old(struct snet *snet, u32 opcode)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ int ret;
+
+ mutex_lock(&snet->ctrl_lock);
+
+ /* Old mechanism uses just 1 register, the opcode register.
+ * Make sure that the opcode register is empty, and that the DPU isn't
+ * processing an old message.
+ */
+ ret = snet_wait_for_empty_op(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control message to be ACKed\n");
+ goto exit;
+ }
+
+ /* Write the message */
+ snet_write_op(regs, opcode);
+
+ /* DPU ACKs the message by clearing the opcode register */
+ ret = snet_wait_for_empty_op(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for a control message to be ACKed\n");
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+/* Send a control message to the DPU.
+ * A control message is a message without payload.
+ */
+static int snet_send_ctrl_msg(struct snet *snet, u16 opcode, u16 vq_idx)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ u32 val;
+ int ret;
+
+ /* If config version is not 2+, use the old mechanism */
+ if (!SNET_CFG_VER(snet, 2))
+ return snet_send_ctrl_msg_old(snet, opcode);
+
+ mutex_lock(&snet->ctrl_lock);
+
+ /* Make sure control register is empty */
+ ret = snet_wait_for_empty_ctrl(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
+ goto exit;
+ }
+
+ /* We need to clear the control register and write the opcode + vq index in the opcode
+ * register.
+ * We use a spinlock to serialize the writes.
+ */
+ spin_lock(&snet->ctrl_spinlock);
+
+ snet_write_ctrl(regs, 0);
+ snet_write_op(regs, opcode | (vq_idx << 16));
+
+ spin_unlock(&snet->ctrl_spinlock);
+
+ /* The DPU ACKs control messages by setting the chunk ready bit
+ * without data.
+ */
+ ret = snet_wait_for_data(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for control message to be ACKed\n");
+ goto exit;
+ }
+
+ /* Check for errors */
+ val = snet_read_ctrl(regs);
+ ret = SNET_VAL_TO_ERR(val);
+
+ /* Clear the chunk ready bit */
+ val &= ~SNET_CTRL_CHUNK_RDY_MASK;
+ snet_write_ctrl(regs, val);
+
+ ret = snet_wait_for_dpu_completion(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for DPU to complete a control command, err %d\n",
+ ret);
+
+exit:
+ mutex_unlock(&snet->ctrl_loc