diff options
| author | David S. Miller <davem@davemloft.net> | 2022-05-11 12:12:27 +0100 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2022-05-11 12:12:27 +0100 |
| commit | dc3a2001f61611347c057fea422c382b9ce3cfcb (patch) | |
| tree | e9eae7cb5bdb0649b74896e8213c1a4884bba33e | |
| parent | 53a332f222c015cb82349fd4f6b58cb14f574e8d (diff) | |
| parent | 7f46a0b7327ae261f9981888708dbca22c283900 (diff) | |
| download | linux-dc3a2001f61611347c057fea422c382b9ce3cfcb.tar.gz linux-dc3a2001f61611347c057fea422c382b9ce3cfcb.tar.bz2 linux-dc3a2001f61611347c057fea422c382b9ce3cfcb.zip | |
Merge tag 'mlx5-updates-2022-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says:
====================
mlx5-updates-2022-05-09
1) Gavin Li, adds exit route from waiting for FW init on device boot and
increases FW init timeout on health recovery flow
2) Support 4 ports HCAs LAG mode
Mark Bloch Says:
================
This series adds to mlx5 drivers support for 4 ports HCAs.
Starting with ConnectX-7 HCAs with 4 ports are possible.
As most driver parts aren't affected by such configuration most driver
code is unchanged.
Specially the only affected areas are:
- Lag
- Devcom
- Merged E-Switch
- Single FDB E-Switch
Lag was chosen to be converted first. Creating hardware LAG when all 4
ports are added to the same bond device.
Devom, merge E-Switch and single FDB E-Switch, are marked as supporting
only 2 ports HCAs and future patches will add support for 4 ports HCAs.
In order to activate the hardware lag a user can execute the:
ip link add bond0 type bond
ip link set bond0 type bond miimon 100 mode 2
ip link set eth2 master bond0
ip link set eth3 master bond0
ip link set eth4 master bond0
ip link set eth5 master bond0
Where eth2, eth3, eth4 and eth5 are the PFs of the same HCA.
================
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
22 files changed, 720 insertions, 302 deletions
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 3ad8f637c589..b804f2dd5628 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -100,7 +100,7 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, port_type) == MLX5_CAP_PORT_TYPE_IB) num_qps = pd->device->attrs.max_pkeys; else if (dev->lag_active) - num_qps = MLX5_MAX_PORTS; + num_qps = dev->lag_ports; } gsi = &mqp->gsi; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 61aa196d6484..61a3b767262f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2991,6 +2991,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_ports = mlx5_lag_get_num_ports(mdev); dev->lag_active = true; return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4f04bb55c4c6..8b3c83c0b70a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1131,6 +1131,7 @@ struct mlx5_ib_dev { struct xarray sig_mrs; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u16 pkey_table_len; + u8 lag_ports; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3f467557d34e..fb8669c02546 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3907,7 +3907,7 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; return (unsigned int)atomic_add_return(1, tx_port_affinity) % - MLX5_MAX_PORTS + 1; + (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1; } static bool qp_supports_affinity(struct mlx5_ib_qp *qp) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 81620c25c77e..7895ed7cc285 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ - fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ + fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o lib/tout.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ba6dad97e308..11f7c03ae81b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -555,12 +555,9 @@ static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) PCI_SLOT(dev->pdev->devfn)); } -static int next_phys_dev(struct device *dev, const void *data) +static int _next_phys_dev(struct mlx5_core_dev *mdev, + const struct mlx5_core_dev *curr) { - struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); - struct mlx5_core_dev *mdev = madev->mdev; - const struct mlx5_core_dev *curr = data; - if (!mlx5_core_is_pf(mdev)) return 0; @@ -574,8 +571,30 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* Must be called with intf_mutex held */ -struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +static int next_phys_dev(struct device *dev, const void *data) +{ + struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); + struct mlx5_core_dev *mdev = madev->mdev; + + return _next_phys_dev(mdev, data); +} + +static int next_phys_dev_lag(struct device *dev, const void *data) +{ + struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); + struct mlx5_core_dev *mdev = madev->mdev; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager) || + !MLX5_CAP_GEN(mdev, lag_master) || + (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(mdev, num_lag_ports) <= 1)) + return 0; + + return _next_phys_dev(mdev, data); +} + +static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, + int (*match)(struct device *dev, const void *data)) { struct auxiliary_device *adev; struct mlx5_adev *madev; @@ -583,7 +602,7 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return NULL; - adev = auxiliary_find_device(NULL, dev, &next_phys_dev); + adev = auxiliary_find_device(NULL, dev, match); if (!adev) return NULL; @@ -592,6 +611,20 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return madev->mdev; } +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev_lag); +} + void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e8789e6d7e7b..f85166e587f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -178,13 +178,13 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) break; /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); default: /* Unsupported action should not get to this function */ WARN_ON(1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 25f2d2717aaa..719ef26d23c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1569,9 +1569,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ida_init(&esw->offloads.vport_metadata_ida); xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); - lockdep_register_key(&esw->mode_lock_key); init_rwsem(&esw->mode_lock); - lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; @@ -1615,7 +1613,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); - lockdep_unregister_key(&esw->mode_lock_key); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1893,17 +1890,6 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); -bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) -{ - if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || - (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && - dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) - return true; - - return false; -} - bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { @@ -2015,17 +2001,6 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw) } /** - * mlx5_esw_lock() - Take write lock on esw mode lock - * @esw: eswitch device. - */ -void mlx5_esw_lock(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_allowed(esw)) - return; - down_write(&esw->mode_lock); -} - -/** * mlx5_eswitch_get_total_vports - Get total vports of the eswitch * * @dev: Pointer to core device diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index bac5160837c5..2754a732914d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -331,7 +331,6 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; - struct lock_class_key mode_lock_key; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -518,8 +517,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } -bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1); bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); @@ -706,7 +703,6 @@ void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); -void mlx5_esw_lock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); @@ -724,7 +720,6 @@ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} -static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } @@ -733,9 +728,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; } -static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; } - static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ca1aba845dd6..84df0d56a2b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -148,7 +148,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_load_one(dev); + mlx5_load_one(dev, false); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c new file mode 100644 index 000000000000..443daf6e3d4b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lag.h" + +static char *get_str_mode_type(struct mlx5_lag *ldev) +{ + if (ldev->flags & MLX5_LAG_FLAG_ROCE) + return "roce"; + if (ldev->flags & MLX5_LAG_FLAG_SRIOV) + return "switchdev"; + if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH) + return "multipath"; + + return NULL; +} + +static int type_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + char *mode = NULL; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_mode_type(ldev); + mutex_unlock(&ldev->lock); + if (!mode) + return -EINVAL; + seq_printf(file, "%s\n", mode); + + return 0; +} + +static int port_sel_mode_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int ret = 0; + char *mode; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_port_sel_mode(ldev->flags); + else + ret = -EINVAL; + mutex_unlock(&ldev->lock); + if (ret || !mode) + return ret; + + seq_printf(file, "%s\n", mode); + return 0; +} + +static int state_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + seq_printf(file, "%s\n", active ? "active" : "disabled"); + return 0; +} + +static int flags_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool shared_fdb; + bool lag_active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) + shared_fdb = ldev->shared_fdb; + + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + return 0; +} + +static int mapping_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + u8 ports[MLX5_MAX_PORTS] = {}; + struct mlx5_lag *ldev; + bool hash = false; + bool lag_active; + int num_ports; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) { + if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) { + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + &num_ports); + hash = true; + } else { + for (i = 0; i < ldev->ports; i++) + ports[i] = ldev->v2p_map[i]; + num_ports = ldev->ports; + } + } + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + for (i = 0; i < num_ports; i++) { + if (hash) + seq_printf(file, "%d\n", ports[i] + 1); + else + seq_printf(file, "%d:%d\n", i + 1, ports[i]); + } + + return 0; +} + +static int members_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + } + mutex_unlock(&ldev->lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(type); +DEFINE_SHOW_ATTRIBUTE(port_sel_mode); +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(flags); +DEFINE_SHOW_ATTRIBUTE(mapping); +DEFINE_SHOW_ATTRIBUTE(members); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) +{ + struct dentry *dbg; + + dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); + dev->priv.dbg.lag_debugfs = dbg; + + debugfs_create_file("type", 0444, dbg, dev, &type_fops); + debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops); + debugfs_create_file("state", 0444, dbg, dev, &state_fops); + debugfs_create_file("flags", 0444, dbg, dev, &flags_fops); + debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops); + debugfs_create_file("members", 0444, dbg, dev, &members_fops); +} + +void mlx5_ldev_remove_debugfs(struct dentry *dbg) +{ + debugfs_remove_recursive(dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 6cad3b72c133..b6dd9043061f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -53,8 +53,7 @@ enum { */ static DEFINE_SPINLOCK(lag_lock); -static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2, bool shared_fdb, u8 flags) +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, bool shared_fdb, u8 flags) { u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); @@ -63,8 +62,8 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) { - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); } else { MLX5_SET(lagc, lag_ctx, port_select_mode, MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT); @@ -73,8 +72,8 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, return mlx5_cmd_exec_in(dev, create_lag, in); } -static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, + u8 *ports) { u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); @@ -82,8 +81,8 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); MLX5_SET(modify_lag_in, in, field_select, 0x1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); return mlx5_cmd_exec_in(dev, modify_lag, in); } @@ -108,6 +107,75 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_disabled) +{ + int i; + + *num_disabled = 0; + for (i = 0; i < num_ports; i++) { + if (!tracker->netdev_state[i].tx_enabled || + !tracker->netdev_state[i].link_up) + ports[(*num_disabled)++] = i; + } +} + +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled) +{ + int i; + + *num_enabled = 0; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + ports[(*num_enabled)++] = i; + } + + if (*num_enabled == 0) + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); +} + +static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, + struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) +{ + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + int written = 0; + int num_enabled; + int idx; + int err; + int i; + int j; + + if (flags & MLX5_LAG_FLAG_HASH_BASED) { + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + &num_enabled); + for (i = 0; i < num_enabled; i++) { + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); + if (err != 3) + return; + written += err; + } + buf[written - 2] = 0; + mlx5_core_info(dev, "lag map active ports: %s\n", buf); + } else { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + err = scnprintf(buf + written, 10, + " port %d:%d", i + 1, ldev->v2p_map[idx]); + if (err != 9) + return; + written += err; + } + } + mlx5_core_info(dev, "lag map:%s\n", buf); + } +} + static int mlx5_lag_netdev_event(struct notifier_block *this, unsigned long event, void *ptr); static void mlx5_do_bond_work(struct work_struct *work); @@ -121,6 +189,7 @@ static void mlx5_ldev_free(struct kref *ref) mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); + mutex_destroy(&ldev->lock); kfree(ldev); } @@ -150,6 +219,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) } kref_init(&ldev->ref); + mutex_init(&ldev->lock); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); ldev->nb.notifier_call = mlx5_lag_netdev_event; @@ -162,6 +232,8 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) if (err) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); + ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); + ldev->buckets = 1; return ldev; } @@ -171,7 +243,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < ldev->ports; i++) if (ldev->pf[i].netdev == ndev) return i; @@ -188,39 +260,72 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); } +/* Create a mapping between steering slots and active ports. + * As we have ldev->buckets slots per port first assume the native + * mapping should be used. + * If there are ports that are disabled fill the relevant slots + * with mapping that points to active ports. + */ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 *port1, u8 *port2) + u8 num_ports, + u8 buckets, + u8 *ports) { - bool p1en; - bool p2en; + int disabled[MLX5_MAX_PORTS] = {}; + int enabled[MLX5_MAX_PORTS] = {}; + int disabled_ports_num = 0; + int enabled_ports_num = 0; + int idx; + u32 rand; + int i; + int j; - p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled && - tracker->netdev_state[MLX5_LAG_P1].link_up; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + enabled[enabled_ports_num++] = i; + else + disabled[disabled_ports_num++] = i; + } - p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && - tracker->netdev_state[MLX5_LAG_P2].link_up; + /* Use native mapping by default where each port's buckets + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + */ + for (i = 0; i < num_ports; i++) + for (j = 0; j < buckets; j++) { + idx = i * buckets + j; + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + } - *port1 = MLX5_LAG_EGRESS_PORT_1; - *port2 = MLX5_LAG_EGRESS_PORT_2; - if ((!p1en && !p2en) || (p1en && p2en)) + /* If all ports are disabled/enabled keep native mapping */ + if (enabled_ports_num == num_ports || + disabled_ports_num == num_ports) return; - if (p1en) - *port2 = MLX5_LAG_EGRESS_PORT_1; - else - *port1 = MLX5_LAG_EGRESS_PORT_2; + /* Go over the disabled ports and for each assign a random active port */ + for (i = 0; i < disabled_ports_num; i++) { + for (j = 0; j < buckets; j++) { + get_random_bytes(&rand, 4); + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + } + } } static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) { - return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop; + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].has_drop) + return true; + return false; } static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].has_drop) continue; @@ -233,12 +338,12 @@ static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, struct lag_tracker *tracker) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - struct mlx5_core_dev *inactive; - u8 v2p_port1, v2p_port2; - int inactive_idx; + u8 disabled_ports[MLX5_MAX_PORTS] = {}; + struct mlx5_core_dev *dev; + int disabled_index; + int num_disabled; int err; + int i; /* First delete the current drop rule so there won't be any dropped * packets @@ -248,58 +353,60 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, if (!ldev->tracker.has_inactive) return; - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2); + mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); - if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) { - inactive = dev1; - inactive_idx = MLX5_LAG_P2; - } else { - inactive = dev0; - inactive_idx = MLX5_LAG_P1; + for (i = 0; i < num_disabled; i++) { + disabled_index = disabled_ports[i]; + dev = ldev->pf[disabled_index].dev; + err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[disabled_index].has_drop = true; + else + mlx5_core_err(dev, + "Failed to create lag drop rule, error: %d", err); } - - err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch, - MLX5_VPORT_UPLINK); - if (!err) - ldev->pf[inactive_idx].has_drop = true; - else - mlx5_core_err(inactive, - "Failed to create lag drop rule, error: %d", err); } -static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) +static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) - return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2); - return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + return mlx5_lag_port_sel_modify(ldev, ports); + return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); } void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - u8 v2p_port1, v2p_port2; + int idx; int err; + int i; + int j; - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, - &v2p_port2); + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); - if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || - v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { - err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2); - if (err) { - mlx5_core_err(de |
