diff options
Diffstat (limited to 'drivers/vdpa/mlx5/net/mlx5_vnet.c')
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 1974 |
1 files changed, 1974 insertions, 0 deletions
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c new file mode 100644 index 000000000000..9df69d5efe8c --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -0,0 +1,1974 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/vdpa.h> +#include <uapi/linux/virtio_ids.h> +#include <linux/virtio_config.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/device.h> +#include "mlx5_vnet.h" +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vdpa.h" + +#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) + +#define VALID_FEATURES_MASK \ + (BIT(VIRTIO_NET_F_CSUM) | BIT(VIRTIO_NET_F_GUEST_CSUM) | \ + BIT(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT(VIRTIO_NET_F_MTU) | BIT(VIRTIO_NET_F_MAC) | \ + BIT(VIRTIO_NET_F_GUEST_TSO4) | BIT(VIRTIO_NET_F_GUEST_TSO6) | \ + BIT(VIRTIO_NET_F_GUEST_ECN) | BIT(VIRTIO_NET_F_GUEST_UFO) | BIT(VIRTIO_NET_F_HOST_TSO4) | \ + BIT(VIRTIO_NET_F_HOST_TSO6) | BIT(VIRTIO_NET_F_HOST_ECN) | BIT(VIRTIO_NET_F_HOST_UFO) | \ + BIT(VIRTIO_NET_F_MRG_RXBUF) | BIT(VIRTIO_NET_F_STATUS) | BIT(VIRTIO_NET_F_CTRL_VQ) | \ + BIT(VIRTIO_NET_F_CTRL_RX) | BIT(VIRTIO_NET_F_CTRL_VLAN) | \ + BIT(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + BIT(VIRTIO_NET_F_MQ) | BIT(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT(VIRTIO_NET_F_HASH_REPORT) | \ + BIT(VIRTIO_NET_F_RSS) | BIT(VIRTIO_NET_F_RSC_EXT) | BIT(VIRTIO_NET_F_STANDBY) | \ + BIT(VIRTIO_NET_F_SPEED_DUPLEX) | BIT(VIRTIO_F_NOTIFY_ON_EMPTY) | \ + BIT(VIRTIO_F_ANY_LAYOUT) | BIT(VIRTIO_F_VERSION_1) | BIT(VIRTIO_F_ACCESS_PLATFORM) | \ + BIT(VIRTIO_F_RING_PACKED) | BIT(VIRTIO_F_ORDER_PLATFORM) | BIT(VIRTIO_F_SR_IOV)) + +#define VALID_STATUS_MASK \ + (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ + VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) + +struct mlx5_vdpa_net_resources { + u32 tisn; + u32 tdn; + u32 tirn; + u32 rqtn; + bool valid; +}; + +struct mlx5_vdpa_cq_buf { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int cqe_size; + int nent; +}; + +struct mlx5_vdpa_cq { + struct mlx5_core_cq mcq; + struct mlx5_vdpa_cq_buf buf; + struct mlx5_db db; + int cqe; +}; + +struct mlx5_vdpa_umem { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int size; + u32 id; +}; + +struct mlx5_vdpa_qp { + struct mlx5_core_qp mqp; + struct mlx5_frag_buf frag_buf; + struct mlx5_db db; + u16 head; + bool fw; +}; + +struct mlx5_vq_restore_info { + u32 num_ent; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u16 avail_index; + bool ready; + struct vdpa_callback cb; + bool restore; +}; + +struct mlx5_vdpa_virtqueue { + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num_ent; + struct vdpa_callback event_cb; + + /* Resources for implementing the notification channel from the device + * to the driver. fwqp is the firmware end of an RC connection; the + * other end is vqqp used by the driver. cq is is where completions are + * reported. + */ + struct mlx5_vdpa_cq cq; + struct mlx5_vdpa_qp fwqp; + struct mlx5_vdpa_qp vqqp; + + /* umem resources are required for the virtqueue operation. They're use + * is internal and they must be provided by the driver. + */ + struct mlx5_vdpa_umem umem1; + struct mlx5_vdpa_umem umem2; + struct mlx5_vdpa_umem umem3; + + bool initialized; + int index; + u32 virtq_id; + struct mlx5_vdpa_net *ndev; + u16 avail_idx; + int fw_state; + + /* keep last in the struct */ + struct mlx5_vq_restore_info ri; +}; + +/* We will remove this limitation once mlx5_vdpa_alloc_resources() + * provides for driver space allocation + */ +#define MLX5_MAX_SUPPORTED_VQS 16 + +struct mlx5_vdpa_net { + struct mlx5_vdpa_dev mvdev; + struct mlx5_vdpa_net_resources res; + struct virtio_net_config config; + struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; + + /* Serialize vq resources creation and destruction. This is required + * since memory map might change and we need to destroy and create + * resources while driver in operational. + */ + struct mutex reslock; + struct mlx5_flow_table *rxft; + struct mlx5_fc *rx_counter; + struct mlx5_flow_handle *rx_rule; + bool setup; + u16 mtu; +}; + +static void free_resources(struct mlx5_vdpa_net *ndev); +static void init_mvqs(struct mlx5_vdpa_net *ndev); +static int setup_driver(struct mlx5_vdpa_net *ndev); +static void teardown_driver(struct mlx5_vdpa_net *ndev); + +static bool mlx5_vdpa_debug; + +#define MLX5_LOG_VIO_FLAG(_feature) \ + do { \ + if (features & BIT(_feature)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ + } while (0) + +#define MLX5_LOG_VIO_STAT(_status) \ + do { \ + if (status & (_status)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_status); \ + } while (0) + +static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) +{ + if (status & ~VALID_STATUS_MASK) + mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", + status & ~VALID_STATUS_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); + if (set && !status) { + mlx5_vdpa_info(mvdev, "driver resets the device\n"); + return; + } + + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); +} + +static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) +{ + if (features & ~VALID_FEATURES_MASK) + mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", + features & ~VALID_FEATURES_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); + if (!features) + mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); + + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); + MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); + MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); +} + +static int create_tis(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + int err; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); + err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); + if (err) + mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); + + return err; +} + +static void destroy_tis(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); +} + +#define MLX5_VDPA_CQE_SIZE 64 +#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) + +static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) +{ + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; + u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; + int err; + + err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, + ndev->mvdev.mdev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + + buf->cqe_size = MLX5_VDPA_CQE_SIZE; + buf->nent = nent; + + return 0; +} + +static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) +{ + struct mlx5_frag_buf *frag_buf = &umem->frag_buf; + + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); +} + +static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) +{ + return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); +} + +static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) +{ + struct mlx5_cqe64 *cqe64; + void *cqe; + int i; + + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe(vcq, i); + cqe64 = cqe; + cqe64->op_own = MLX5_CQE_INVALID << 4; + } +} + +static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) +{ + struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); + + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) + return cqe64; + + return NULL; +} + +static void rx_post(struct mlx5_vdpa_qp *vqp, int n) +{ + vqp->head += n; + vqp->db.db[0] = cpu_to_be32(vqp->head); +} + +static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, + struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) +{ + struct mlx5_vdpa_qp *vqp; + __be64 *pas; + void *qpc; + + vqp = fw ? &mvq->fwqp : &mvq->vqqp; + MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + if (vqp->fw) { + /* Firmware QP is allocated by the driver for the firmware's + * use so we can skip part of the params as they will be chosen by firmware + */ + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); + MLX5_SET(qpc, qpc, no_sq, 1); + return; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, no_sq, 1); + MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); + mlx5_fill_page_frag_array(&vqp->frag_buf, pas); +} + +static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) +{ + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, + num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); +} + +static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_vdpa_qp *vqp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + void *qpc; + void *in; + int err; + + if (!vqp->fw) { + vqp = &mvq->vqqp; + err = rq_buf_alloc(ndev, vqp, mvq->num_ent); + if (err) + return err; + + err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); + if (err) + goto err_db; + inlen += vqp->frag_buf.npages * sizeof(__be64); + } + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_kzalloc; + } + + qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + if (!vqp->fw) + MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (err) + goto err_kzalloc; + + vqp->mqp.uid = ndev->mvdev.res.uid; + vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); + + if (!vqp->fw) + rx_post(vqp, mvq->num_ent); + + return 0; + +err_kzalloc: + if (!vqp->fw) + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); +err_db: + if (!vqp->fw) + rq_buf_free(ndev, vqp); + + return err; +} + +static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); + MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); + if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) + mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); + if (!vqp->fw) { + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); + rq_buf_free(ndev, vqp); + } +} + +static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) +{ + struct mlx5_cqe64 *cqe64; + + cqe64 = next_cqe_sw(vcq); + if (!cqe64) + return -EAGAIN; + + vcq->mcq.cons_index++; + return 0; +} + +static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) +{ + mlx5_cq_set_ci(&mvq->cq.mcq); + rx_post(&mvq->vqqp, num); + if (mvq->event_cb.callback) + mvq->event_cb.callback(mvq->event_cb.private); +} + +static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); + struct mlx5_vdpa_net *ndev = mvq->ndev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + int num = 0; + + while (!mlx5_vdpa_poll_one(&mvq->cq)) { + num++; + if (num > mvq->num_ent / 2) { + /* If completions keep coming while we poll, we want to + * let the hardware know that we consumed them by + * updating the doorbell record. We also let vdpa core + * know about this so it passes it on the virtio driver + * on the guest. + */ + mlx5_vdpa_handle_completions(mvq, num); + num = 0; + } + } + + if (num) + mlx5_vdpa_handle_completions(mvq, num); + + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); +} + +static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + unsigned int irqn; + __be64 *pas; + int inlen; + void *cqc; + void *in; + int err; + int eqn; + + err = mlx5_db_alloc(mdev, &vcq->db); + if (err) + return err; + + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + vcq->mcq.cqe_sz = 64; + + err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); + if (err) + goto err_db; + + cq_frag_buf_init(vcq, &vcq->buf); + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_vzalloc; + } + + MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + /* Use vector 0 by default. Consider adding code to choose least used + * vector. + */ + err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + if (err) + goto err_vec; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); + MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); + + err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); + if (err) + goto err_vec; + + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); + kfree(in); + return 0; + +err_vec: + kfree(in); +err_vzalloc: + cq_frag_buf_free(ndev, &vcq->buf); +err_db: + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); + return err; +} + +static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + + if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); + return; + } + cq_frag_buf_free(ndev, &vcq->buf); + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); +} + +static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, + struct mlx5_vdpa_umem **umemp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int p_a; + int p_b; + + switch (num) { + case 1: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); + *umemp = &mvq->umem1; + break; + case 2: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); + *umemp = &mvq->umem2; + break; + case 3: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); + *umemp = &mvq->umem3; + break; + } + return p_a * mvq->num_ent + p_b; +} + +static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); +} + +static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + int inlen; + u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; + void *um; + void *in; + int err; + __be64 *pas; + int size; + struct mlx5_vdpa_umem *umem; + + size = umem_size(ndev, mvq, num, &umem); + if (size < 0) + return size; + + umem->size = size; + err = umem_frag_buf_alloc(ndev, umem, size); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); + MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); + um = MLX5_ADDR_OF(create_umem_in, in, umem); + MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); + + pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); + mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); + goto err_cmd; + } + + kfree(in); + umem->id = MLX5_GET(create_umem_out, out, umem_id); + + return 0; + +err_cmd: + kfree(in); +err_in: + umem_frag_buf_free(ndev, umem); + return err; +} + +static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; + struct mlx5_vdpa_umem *umem; + + switch (num) { + case 1: + umem = &mvq->umem1; + break; + case 2: + umem = &mvq->umem2; + break; + case 3: + umem = &mvq->umem3; + break; + } + + MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, in, umem_id, umem->id); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) + return; + + umem_frag_buf_free(ndev, umem); +} + +static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + int err; + + for (num = 1; num <= 3; num++) { + err = create_umem(ndev, mvq, num); + if (err) + goto err_umem; + } + return 0; + +err_umem: + for (num--; num > 0; num--) + umem_destroy(ndev, mvq, num); + + return err; +} + +static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + + for (num = 3; num > 0; num--) + umem_destroy(ndev, mvq, num); +} + +static int get_queue_type(struct mlx5_vdpa_net *ndev) +{ + u32 type_mask; + + type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); + + /* prefer split queue */ + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; +} + +static bool vq_is_tx(u16 idx) +{ + return idx % 2; +} + +static u16 get_features_12_3(u64 features) +{ + return (!!(features & BIT(VIRTIO_NET_F_HOST_TSO4)) << 9) | + (!!(features & BIT(VIRTIO_NET_F_HOST_TSO6)) << 8) | + (!!(features & BIT(VIRTIO_NET_F_CSUM)) << 7) | + (!!(features & BIT(VIRTIO_NET_F_GUEST_CSUM)) << 6); +} + +static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *vq_ctx; + void *in; + int err; + + err = umems_create(ndev, mvq); + if (err) + return err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_alloc; + } + + cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, + get_features_12_3(ndev->mvdev.actual_features)); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); + + if (vq_is_tx(mvq->index)) + MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); + + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); + MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, + !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1)); + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key); + MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); + MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); + MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); + MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); + if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_cmd; + + kfree(in); + mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; + +err_cmd: + kfree(in); +err_alloc: + umems_destroy(ndev, mvq); + return err; +} + +static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; + + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_VIRTIO_NET_Q); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); + return; + } + umems_destroy(ndev, mvq); +} + +static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; +} + +static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; +} + +static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, + int *outlen, u32 qpn, u32 rqpn) +{ + void *qpc; + void *pp; + + switch (cmd) { + case MLX5_CMD_OP_2RST_QP: + *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); + *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(*outlen, GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(qp_2rst_in, *in, opcode, cmd); + MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(qp_2rst_in, *in, qpn, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(rst2init_qp_in, *in, opcode, cmd); + MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rst2init_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + MLX5_SET(qpc, qpc, rwe, 1); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, vhca_port_num, 1); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); + *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); + MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, 30); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, fl, 1); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); + MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, ack_timeout, 14); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); + break; + default: + goto outerr_nullify; + } + + return; + +outerr: + kfree(*in); + kfree(*out); +outerr_nullify: + *in = NULL; + *out = NULL; +} + +static void free_inout(void *in, void *out) +{ + kfree(in); + kfree(out); +} + +/* Two QPs are used by each virtqueue. One is used by the driver and one by + * firmware. The fw argument indicates whether the subjected QP is the one used + * by firmware. + */ +static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) +{ + int outlen; + int inlen; + void *out; + void *in; + int err; + + alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); + if (!in || !out) + return -ENOMEM; + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); + free_inout(in, out); + return err; +} + +static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); +} + +struct mlx5_virtq_attr { + u8 state; + u16 available_index; +}; + +static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtq_attr *attr) +{ + int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); + u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; + void *out; + void *obj_context; + void *cmd_hdr; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); + if (err) + goto err_cmd; + + obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); + memset(attr, 0, sizeof(*attr)); + attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); + attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); + kfree(out); + return 0; + +err_cmd: + kfree(out); + return err; +} + +static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, + MLX5_VIRTQ_MODIFY_MASK_STATE); + MLX5_SET(virtio_net_q_object, obj_context, state, state); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + mvq->fw_state = state; + + return err; +} + +static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u16 idx = mvq->index; + int err; + + if (!mvq->num_ent) + return 0; + + if (mvq->initialized) { + mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n"); + return -EINVAL; + } + + err = cq_create(ndev, idx, mvq->num_ent); + if (err) + return err; + + err = qp_create(ndev, mvq, &mvq->fwqp); |