diff options
| author | David S. Miller <davem@davemloft.net> | 2018-12-10 17:06:58 -0800 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2018-12-10 17:06:58 -0800 |
| commit | 93698321f72cb351ae6ab59185b611ba26e6e1d0 (patch) | |
| tree | 372527b855938f2336d8f03c4e9c3b3bc350694f | |
| parent | d8ed257f313f64e9835e61d1365dea95a0a1c9c6 (diff) | |
| parent | 69bd48404f251b9c45a15799fdcfc87a7ad6ab8a (diff) | |
| download | linux-93698321f72cb351ae6ab59185b611ba26e6e1d0.tar.gz linux-93698321f72cb351ae6ab59185b611ba26e6e1d0.tar.bz2 linux-93698321f72cb351ae6ab59185b611ba26e6e1d0.zip | |
Merge tag 'mlx5e-updates-2018-12-10' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed:
====================
mlx5e-updates-2018-12-10 (gre)
This patch set adds GRE offloading support to Mellanox ethernet driver.
Patches 1-5 replace the existing egdev mechanism with the new TC indirect
block binds mechanism that was introduced by Netronome:
7f76fa36754b ("net: sched: register callbacks for indirect tc block binds")
Patches 6-9 add GRE offloading support along with some required
refactoring work.
Patch 10, Add netif_is_gretap()/netif_is_ip6gretap()
- Changed the is_gretap_dev and is_ip6gretap_dev logic from structure
comparison to string comparison of the rtnl_link_ops kind field.
Patch 11, add GRE offloading support to mlx5.
Patch 12 removes the egdev mechanism from TC as it is no longer used by
any of the drivers.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
69 files changed, 3563 insertions, 2924 deletions
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 676c1fd1119d..9608681224e6 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, flags, local_page_list, NULL, NULL); up_read(&owning_mm->mmap_sem); - if (npages < 0) + if (npages < 0) { + if (npages != -EAGAIN) + pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages); + else + pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages); break; + } bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); mutex_lock(&umem_odp->umem_mutex); @@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = ib_umem_odp_map_dma_single_page( umem_odp, k, local_page_list[j], access_mask, current_seq); - if (ret < 0) + if (ret < 0) { + if (ret != -EAGAIN) + pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); + else + pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); break; + } p = page_to_phys(local_page_list[j]); k++; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b8e4b15e2674..33f5adb14e4e 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ + srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ + cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7d769b5538b4..26ab9041f94a 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "mlx5_ib.h" +#include "srq.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { @@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { return cqe; } else { @@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { - msrq = mlx5_core_get_srq(dev->mdev, - be32_to_cpu(cqe->srqn)); + msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); @@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } wc->byte_len = be32_to_cpu(cqe->byte_cnt); - switch (cqe->op_own >> 4) { + switch (get_cqe_opcode(cqe)) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; @@ -537,7 +537,7 @@ repoll: */ rmb(); - opcode = cqe64->op_own >> 4; + opcode = get_cqe_opcode(cqe64); if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { if (likely(cq->resize_buf)) { free_cq_buf(dev, &cq->buf); @@ -1295,7 +1295,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq) return -EINVAL; } - while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) { dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, (i + 1) & cq->resize_buf->nent); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 584ff2ea7810..8a682d86d634 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -4,6 +4,7 @@ */ #include "ib_rep.h" +#include "srq.h" static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, @@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3569fda07e07..0eeefff09c1e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -60,6 +60,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "srq.h" #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> @@ -82,10 +83,13 @@ static char mlx5_version[] = struct mlx5_ib_event_work { struct work_struct work; - struct mlx5_core_dev *dev; - void *context; - enum mlx5_dev_event event; - unsigned long param; + union { + struct mlx5_ib_dev *dev; + struct mlx5_ib_multiport_info *mpi; + }; + bool is_slave; + unsigned int event; + void *param; }; enum { @@ -2669,11 +2673,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohs(ib_spec->gre.val.protocol)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.mask.key, sizeof(ib_spec->gre.mask.key)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.val.key, sizeof(ib_spec->gre.val.key)); break; @@ -4226,6 +4230,63 @@ static void delay_drop_handler(struct work_struct *work) mutex_unlock(&delay_drop->lock); } +static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + break; + default: /* do nothing */ + return; + } +} + +static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + u8 port = (eqe->data.port.port >> 4) & 0xf; + + ibev->element.port_num = port; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_LID: + ibev->event = IB_EVENT_LID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + ibev->event = IB_EVENT_PKEY_CHANGE; + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); + break; + + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + ibev->event = IB_EVENT_GID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + ibev->event = IB_EVENT_CLIENT_REREGISTER; + break; + default: + return -EINVAL; + } + + return 0; +} + static void mlx5_ib_handle_event(struct work_struct *_work) { struct mlx5_ib_event_work *work = @@ -4233,65 +4294,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = (u8)work->param; - if (mlx5_core_is_mp_slave(work->dev)) { - ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); + if (work->is_slave) { + ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); if (!ibdev) goto out; } else { - ibdev = work->context; + ibdev = work->dev; } switch (work->event) { case MLX5_DEV_EVENT_SYS_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + ibev.element.port_num = (u8)(unsigned long)work->param; fatal = true; break; - - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* In RoCE, port up/down events are handled in - * mlx5_netdev_event(). - */ - if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == - IB_LINK_LAYER_ETHERNET) + case MLX5_EVENT_TYPE_PORT_CHANGE: + if (handle_port_change(ibdev, work->param, &ibev)) goto out; - - ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; break; - - case MLX5_DEV_EVENT_LID_CHANGE: - ibev.event = IB_EVENT_LID_CHANGE; - break; - - case MLX5_DEV_EVENT_PKEY_CHANGE: - ibev.event = IB_EVENT_PKEY_CHANGE; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); - break; - - case MLX5_DEV_EVENT_GUID_CHANGE: - ibev.event = IB_EVENT_GID_CHANGE; - break; - - case MLX5_DEV_EVENT_CLIENT_REREG: - ibev.event = IB_EVENT_CLIENT_REREGISTER; - break; - case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: - schedule_work(&ibdev->delay_drop.delay_drop_work); - goto out; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + handle_general_event(ibdev, work->param, &ibev); + /* fall through */ default: goto out; } - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = port; + ibev.device = &ibdev->ib_dev; - if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { - mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num); goto out; } @@ -4304,22 +4337,43 @@ out: kfree(work); } -static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param) +static int mlx5_ib_event(struct notifier_block *nb, + unsigned long event, void *param) { struct mlx5_ib_event_work *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return; + return NOTIFY_DONE; INIT_WORK(&work->work, mlx5_ib_handle_event); - work->dev = dev; + work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events); + work->is_slave = false; work->param = param; - work->context = context; work->event = event; queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; +} + +static int mlx5_ib_event_slave_port(struct notifier_block *nb, + unsigned long event, void *param) +{ + struct mlx5_ib_event_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + + INIT_WORK(&work->work, mlx5_ib_handle_event); + work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events); + work->is_slave = true; + work->param = param; + work->event = event; + queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -5330,7 +5384,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); + return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector); } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5350,6 +5404,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, spin_unlock(&port->mp.mpi_lock); return; } + + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; + mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); @@ -5405,6 +5464,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].mp.mpi = mpi; mpi->ibdev = ibdev; + mpi->mdev_events.notifier_call = NULL; spin_unlock(&ibdev->port[port_num].mp.mpi_lock); err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); @@ -5422,6 +5482,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, goto unbind; } + mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; + mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); + err = mlx5_ib_init_cong_debugfs(ibdev, port_num); if (err) goto unbind; @@ -5694,8 +5757,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = - dev->mdev->priv.eq_table.num_comp_vectors; + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = &mdev->pdev->dev; mutex_init(&dev->cap_mask_mutex); @@ -6034,6 +6096,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } +void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_odp_cleanup_one(dev); +} + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { @@ -6152,6 +6219,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_unregister_vport_reps(dev); } +static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) +{ + dev->mdev_events.notifier_call = mlx5_ib_event; + mlx5_notifier_register(dev->mdev, &dev->mdev_events); + return 0; +} + +static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); +} + +static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) +{ + int uid; + + uid = mlx5_ib_devx_create(dev); + if (uid > 0) + dev->devx_whitelist_uid = uid; + + return 0; +} +static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) +{ + if (dev->devx_whitelist_uid) + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -6163,8 +6258,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, profile->stage[stage].cleanup(dev); } - if (dev->devx_whitelist_uid) - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); ib_dealloc_device((struct ib_device *)dev); } @@ -6173,7 +6266,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, { int err; int i; - int uid; for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -6183,10 +6275,6 @@ void *__mlx |
