summaryrefslogtreecommitdiff
path: root/drivers/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-06-15 11:14:21 -0700
committerDavid S. Miller <davem@davemloft.net>2021-06-15 11:14:21 -0700
commitf0c227c7df657a3470cd3ea03d51a71941d1e98f (patch)
tree9b869fbaf8c3cac721207c53cae0f0fca17f614d /drivers/net
parent08ab4d74414585bddb451d934b2eeddec9460909 (diff)
parentc36326d38d933199014aba5a17d384cf52e4b558 (diff)
downloadlinux-f0c227c7df657a3470cd3ea03d51a71941d1e98f.tar.gz
linux-f0c227c7df657a3470cd3ea03d51a71941d1e98f.tar.bz2
linux-f0c227c7df657a3470cd3ea03d51a71941d1e98f.zip
Merge tag 'mlx5-updates-2021-06-14' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2021-06-14 1) Trivial Lag refactroing in preparation for upcomming Single FDB lag feature - First 3 patches 2) Scalable IRQ distriburion for Sub-functions A subfunction (SF) is a lightweight function that has a parent PCI function (PF) on which it is deployed. Currently, mlx5 subfunction is sharing the IRQs (MSI-X) with their parent PCI function. Before this series the PF allocates enough IRQs to cover all the cores in a system, Newly created SFs will re-use all the IRQs that the PF has allocated for itself. Hence, the more SFs are created, there are more EQs per IRQs. Therefore, whenever we handle an interrupt, we need to pull all SFs EQs and PF EQs instead of PF EQs without SFs on the system. This leads to a hard impact on the performance of SFs and PF. For example, on machine with: Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz with 56 cores. PCI Express 3 with BW of 126 Gb/s. ConnectX-5 Ex; EDR IB (100Gb/s) and 100GbE; dual-port QSFP28; PCIe4.0 x16. test case: iperf TX BW single CPU, affinity of app and IRQ are the same. PF only: no SFs on the system, 56 IRQs. SF (before), 250 SFs Sharing the same 56 IRQs . SF (now), 250 SFs + 255 avaiable IRQs for the NIC. (please see IRQ spread scheme below). application SF-IRQ channel BW(Gb/sec) interrupts/sec iperf TX affinity PF only cpu={0} cpu={0} cpu={0} 79 8200 SF (before) cpu={0} cpu={0} cpu={0} 51.3 (-35%) 9500 SF (now) cpu={0} cpu={0} cpu={0} 78 (-2%) 8200 command: $ taskset -c 0 iperf -c 11.1.1.1 -P 3 -i 6 -t 30 | grep SUM The different between the SF examples is that before this series we allocate num_cpus (56) IRQs, and all of them were shared among the PF and the SFs. And after this series, we allocate 255 IRQs, and we spread the SFs among the above IRQs. This have significantly decreased the load on each IRQ and the number of EQs per IRQ is down by 95% (251->11). In this patchset the solution proposed is to have a dedicated IRQ pool for SFs to use. the pool will allocate a large number of IRQs for SFs to grab from in order to minimize irq sharing between the different SFs. IRQs will not be requested from the OS until they are 1st requested by an SF consumer, and will be eventually released when the last SF consumer releases them. For the detailed IRQ spread and allocation scheme please see last patch: ("net/mlx5: Round-Robin EQs over IRQs") ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c179
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c608
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c1
14 files changed, 785 insertions, 431 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 59ee28156603..930b225dfe77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5114,7 +5114,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);
- mlx5_lag_add(mdev, netdev);
+ mlx5_lag_add_netdev(mdev, netdev);
mlx5e_enable_async_events(priv);
mlx5e_enable_blocking_events(priv);
@@ -5162,7 +5162,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
priv->en_trap = NULL;
}
mlx5e_disable_async_events(priv);
- mlx5_lag_remove(mdev);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
mlx5_vxlan_reset_to_default(mdev->vxlan);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8290e0086178..2d2cc5f3b03f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -976,7 +976,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
if (MLX5_CAP_GEN(mdev, uplink_follow))
mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
- mlx5_lag_add(mdev, netdev);
+ mlx5_lag_add_netdev(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
mlx5e_dcbnl_initialize(priv);
@@ -1009,7 +1009,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
mlx5e_dcbnl_delete_app(priv);
mlx5_notifier_unregister(mdev, &priv->events_nb);
mlx5e_rep_tc_disable(priv);
- mlx5_lag_remove(mdev);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
}
static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 77c0ca655975..7e5b3826eae5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/interrupt.h>
@@ -45,6 +18,7 @@
#include "eswitch.h"
#include "lib/clock.h"
#include "diag/fw_tracer.h"
+#include "mlx5_irq.h"
enum {
MLX5_EQE_OWNER_INIT_VAL = 0x1,
@@ -84,6 +58,9 @@ struct mlx5_eq_table {
struct mutex lock; /* sync async eqs creations */
int num_comp_eqs;
struct mlx5_irq_table *irq_table;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -286,7 +263,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
struct mlx5_priv *priv = &dev->priv;
- u8 vecidx = param->irq_index;
+ u16 vecidx = param->irq_index;
__be64 *pas;
void *eqc;
int inlen;
@@ -309,13 +286,20 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
init_eq_buf(eq);
+ eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
+ if (IS_ERR(eq->irq)) {
+ err = PTR_ERR(eq->irq);
+ goto err_buf;
+ }
+
+ vecidx = mlx5_irq_get_index(eq->irq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
- goto err_buf;
+ goto err_irq;
}
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
@@ -359,6 +343,8 @@ err_eq:
err_in:
kvfree(in);
+err_irq:
+ mlx5_irq_release(eq->irq);
err_buf:
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -377,10 +363,9 @@ err_buf:
int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct notifier_block *nb)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
- err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+ err = mlx5_irq_attach_nb(eq->irq, nb);
if (!err)
eq_update_ci(eq, 1);
@@ -399,9 +384,7 @@ EXPORT_SYMBOL(mlx5_eq_enable);
void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct notifier_block *nb)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-
- mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+ mlx5_irq_detach_nb(eq->irq, nb);
}
EXPORT_SYMBOL(mlx5_eq_disable);
@@ -415,10 +398,9 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
- synchronize_irq(eq->irqn);
+ mlx5_irq_release(eq->irq);
mlx5_frag_buf_free(dev, &eq->frag_buf);
-
return err;
}
@@ -490,14 +472,7 @@ static int create_async_eq(struct mlx5_core_dev *dev,
int err;
mutex_lock(&eq_table->lock);
- /* Async EQs must share irq index 0 */
- if (param->irq_index != 0) {
- err = -EINVAL;
- goto unlock;
- }
-
err = create_map_eq(dev, eq, param);
-unlock:
mutex_unlock(&eq_table->lock);
return err;
}
@@ -616,8 +591,11 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
eq->irq_nb.notifier_call = mlx5_eq_async_int;
spin_lock_init(&eq->lock);
+ if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
+ return -ENOMEM;
err = create_async_eq(dev, &eq->core, param);
+ free_cpumask_var(param->affinity);
if (err) {
mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
return err;
@@ -652,7 +630,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) {
- .irq_index = 0,
.nent = MLX5_NUM_CMD_EQE,
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
@@ -665,7 +642,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) {
- .irq_index = 0,
.nent = MLX5_NUM_ASYNC_EQE,
};
@@ -675,7 +651,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
goto err2;
param = (struct mlx5_eq_param) {
- .irq_index = 0,
.nent = /* TODO: sriov max_vf + */ 1,
.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
};
@@ -735,6 +710,9 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
int err;
+ if (!param->affinity)
+ return ERR_PTR(-EINVAL);
+
if (!eq)
return ERR_PTR(-ENOMEM);
@@ -845,16 +823,21 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
.irq_index = vecidx,
.nent = nent,
};
- err = create_map_eq(dev, &eq->core, &param);
- if (err) {
- kfree(eq);
- goto clean;
+
+ if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
+ err = -ENOMEM;
+ goto clean_eq;
}
+ cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
+ param.affinity);
+ err = create_map_eq(dev, &eq->core, &param);
+ free_cpumask_var(param.affinity);
+ if (err)
+ goto clean_eq;
err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
if (err) {
destroy_unmap_eq(dev, &eq->core);
- kfree(eq);
- goto clean;
+ goto clean_eq;
}
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
@@ -863,7 +846,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
}
return 0;
-
+clean_eq:
+ kfree(eq);
clean:
destroy_comp_eqs(dev);
return err;
@@ -899,17 +883,23 @@ EXPORT_SYMBOL(mlx5_comp_vectors_count);
struct cpumask *
mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
{
- int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int i = 0;
- return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
- vecidx);
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector)
+ break;
+ }
+
+ return mlx5_irq_get_affinity_mask(eq->core.irq);
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
{
- return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
+ return dev->priv.eq_table->rmap;
}
#endif
@@ -926,12 +916,57 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
return ERR_PTR(-ENOENT);
}
+static void clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+ free_irq_cpu_rmap(eq_table->rmap);
+#endif
+}
+
+static int set_rmap(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+ int vecidx;
+
+ eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+ if (!eq_table->rmap) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+ goto err_out;
+ }
+
+ vecidx = MLX5_IRQ_VEC_COMP_BASE;
+ for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE;
+ vecidx++) {
+ err = irq_cpu_rmap_add(eq_table->rmap,
+ pci_irq_vector(mdev->pdev, vecidx));
+ if (err) {
+ mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+ err);
+ goto err_irq_cpu_rmap_add;
+ }
+ }
+ return 0;
+
+err_irq_cpu_rmap_add:
+ clear_rmap(mdev);
+err_out:
+#endif
+ return err;
+}
+
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
}
@@ -948,12 +983,19 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int max_eqs_sf;
int err;
eq_table->num_comp_eqs =
min_t(int,
- mlx5_irq_get_num_comp(eq_table->irq_table),
+ mlx5_irq_table_get_num_comp(eq_table->irq_table),
num_eqs - MLX5_MAX_ASYNC_EQS);
+ if (mlx5_core_is_sf(dev)) {
+ max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
+ mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
+ eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
+ max_eqs_sf);
+ }
err = create_async_eqs(dev);
if (err) {
@@ -961,6 +1003,18 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
goto err_async_eqs;
}
+ if (!mlx5_core_is_sf(dev)) {
+ /* rmap is a mapping between irq number and queue number.
+ * each irq can be assign only to a single rmap.
+ * since SFs share IRQs, rmap mapping cannot function correctly
+ * for irqs that are shared for different core/netdev RX rings.
+ * Hence we don't allow netdev rmap for SFs
+ */
+ err = set_rmap(dev);
+ if (err)
+ goto err_rmap;
+ }
+
err = create_comp_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create completion EQs\n");
@@ -969,6 +1023,9 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
return 0;
err_comp_eqs:
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
+err_rmap:
destroy_async_eqs(dev);
err_async_eqs:
return err;
@@ -976,6 +1033,8 @@ err_async_eqs:
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 1fb70524d067..5c043c5cc403 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -93,6 +93,64 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+static void mlx5_do_bond_work(struct work_struct *work);
+
+static void mlx5_ldev_free(struct kref *ref)
+{
+ struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
+
+ if (ldev->nb.notifier_call)
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+ mlx5_lag_mp_cleanup(ldev);
+ cancel_delayed_work_sync(&ldev->bond_work);
+ destroy_workqueue(ldev->wq);
+ kfree(ldev);
+}
+
+static void mlx5_ldev_put(struct mlx5_lag *ldev)
+{
+ kref_put(&ldev->ref, mlx5_ldev_free);
+}
+
+static void mlx5_ldev_get(struct mlx5_lag *ldev)
+{
+ kref_get(&ldev->ref);
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ int err;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return NULL;
+
+ ldev->wq = create_singlethread_workqueue("mlx5_lag");
+ if (!ldev->wq) {
+ kfree(ldev);
+ return NULL;
+ }
+
+ kref_init(&ldev->ref);
+ INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+
+ ldev->nb.notifier_call = mlx5_lag_netdev_event;
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
+ ldev->nb.notifier_call = NULL;
+ mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+ }
+
+ err = mlx5_lag_mp_init(ldev);
+ if (err)
+ mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+ err);
+
+ return ldev;
+}
+
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev)
{
@@ -258,6 +316,10 @@ static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
if (!ldev->pf[i].dev)
continue;
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(ldev->pf[i].dev);
}
@@ -276,6 +338,31 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
}
}
+static void mlx5_disable_lag(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ bool roce_lag;
+ int err;
+
+ roce_lag = __mlx5_lag_is_roce(ldev);
+
+ if (roce_lag) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ }
+ mlx5_nic_vport_disable_roce(dev1);
+ }
+
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
+
+ if (roce_lag)
+ mlx5_lag_add_devices(ldev);
+}
+
static void mlx5_do_bond(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
@@ -322,20 +409,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
} else if (do_bond && __mlx5_lag_is_active(ldev)) {
mlx5_modify_lag(ldev, &tracker);
} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
- roce_lag = __mlx5_lag_is_roce(ldev);
-
- if (roce_lag) {
- dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
- mlx5_rescan_drivers_locked(dev0);
- mlx5_nic_vport_disable_roce(dev1);
- }
-
- err = mlx5_deactivate_lag(ldev);
- if (err)
- return;
-
- if (roce_lag)
- mlx5_lag_add_devices(ldev);
+ mlx5_disable_lag(ldev);
}
}
@@ -495,55 +569,52 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static struct mlx5_lag *mlx5_lag_dev_alloc(void)
+static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
{
- struct mlx5_lag *ldev;
-
- ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
- if (!ldev)
- return NULL;
-
- ldev->wq = create_singlethread_workqueue("mlx5_lag");
- if (!ldev->wq) {
- kfree(ldev);
- return NULL;
- }
+ unsigned int fn = PCI_FUNC(dev->pdev->devfn);
- INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+ if (fn >= MLX5_MAX_PORTS)
+ return;
- return ldev;
+ spin_lock(&lag_lock);
+ ldev->pf[fn].netdev = netdev;
+ ldev->tracker.netdev_state[fn].link_up = 0;
+ ldev->tracker.netdev_state[fn].tx_enabled = 0;
+ spin_unlock(&lag_lock);
}
-static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
+static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
+ struct net_device *netdev)
{
- destroy_workqueue(ldev->wq);
- kfree(ldev);
+ int i;
+
+ spin_lock(&lag_lock);
+ for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ if (ldev->pf[i].netdev == netdev) {
+ ldev->pf[i].netdev = NULL;
+ break;
+ }
+ }
+ spin_unlock(&lag_lock);
}
-static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
- struct mlx5_core_dev *dev,
- struct net_device *netdev)
+static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
{
unsigned int fn = PCI_FUNC(dev->pdev->devfn);
if (fn >= MLX5_MAX_PORTS)
- return -EPERM;
-
- spin_lock(&lag_lock);
- ldev->pf[fn].dev = dev;
- ldev->pf[fn].netdev = netdev;
- ldev->tracker.netdev_state[fn].link_up = 0;
- ldev->tracker.netdev_state[fn].tx_enabled = 0;
+ return;
+ ldev->pf[fn].dev = dev;
dev->priv.lag = ldev;
-
- spin_unlock(&lag_lock);
-
- return fn;
}
-static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
- struct mlx5_core_dev *dev)
+/* Must be called with intf_mutex held */
+static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
{
int i;
@@ -554,19 +625,15 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
if (i == MLX5_MAX_PORTS)
return;
- spin_lock(&lag_lock);
- memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
-
+ ldev->pf[i].dev = NULL;
dev->priv.lag = NULL;
- spin_unlock(&lag_lock);
}
/* Must be called with intf_mutex held */
-void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
+static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
- int i, err;
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
@@ -578,67 +645,77 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
ldev = tmp_dev->priv.lag;
if (!ldev) {
- ldev = mlx5_lag_dev_alloc();
+ ldev = mlx5_lag_dev_alloc(dev);
if (!ldev) {
mlx5_core_err(dev, "Failed to alloc lag dev\n");
return;
}
+ } else {
+ mlx5_ldev_get(ldev);
}
- if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
- return;
+ mlx5_ldev_add_mdev(ldev, dev);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (!ldev->pf[i].dev)
- break;
+ return;
+}
- if (i >= MLX5_MAX_PORTS)
- ldev->flags |= MLX5_LAG_FLAG_READY;
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
- if (!ldev->nb.notifier_call) {
- ldev->nb.notifier_call = mlx5_lag_netdev_event;
- if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
- ldev->nb.notifier_call = NULL;
- mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
- }
- }
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
- err = mlx5_lag_mp_init(ldev);
- if (err)
- mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
- err);
+ mlx5_dev_list_lock();
+ mlx5_ldev_remove_mdev(ldev, dev);
+ mlx5_dev_list_unlock();
+ mlx5_ldev_put(ldev);
+}
+
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
+{
+ mlx5_dev_list_lock();
+ __mlx5_lag_dev_add_mdev(dev);
+ mlx5_dev_list_unlock();
}
/* Must be called with intf_mutex held */
-void mlx5_lag_remove(struct mlx5_core_dev *dev)
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
+ struct net_device *netdev)
{
struct mlx5_lag *ldev;
- int i;
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
if (!ldev)
return;
if (__mlx5_lag_is_active(ldev))
- mlx5_deactivate_lag(ldev);
-
- mlx5_lag_dev_remove_pf(ldev, dev);
+ mlx5_disable_lag(ldev);
+ mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mlx5_ldev_add_netdev(ldev, dev, netdev);
for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
+ if (!ldev->pf[i].dev)
break;
- if (i == MLX5_MAX_PORTS) {
- if (ldev->nb.notifier_call) {
- unregister_netdevice_notifier_net(&init_net, &ldev->nb);
- ldev->nb.notifier_call = NULL;
- }
- mlx5_lag_mp_cleanup(ldev);
- cancel_delayed_work_sync(&ldev->bond_work);
- mlx5_lag_dev_free(ldev);
- }
+ if (i >= MLX5_MAX_PORTS)
+ ldev->flags |= MLX5_LAG_FLAG_READY;
}
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -647,7 +724,7 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
bool res;
spin_lock(&lag_lock);
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
spin_unlock(&lag_lock);
@@ -661,7 +738,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
bool res;
spin_lock(&lag_lock);
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev);
spin_unlock(&lag_lock);
@@ -675,7 +752,7 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
bool res;
spin_lock(&lag_lock);
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
spin_unlock(&lag_lock);
@@ -688,7 +765,7 @@ void mlx5_lag_update(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev;
mlx5_dev_list_lock();
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
if (!ldev)
goto unlock;
@@ -704,7 +781,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev;
spin_lock(&lag_lock);
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
@@ -733,7 +810,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
u8 port = 0;
spin_lock(&lag_lock);
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
@@ -769,7 +846,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters);
spin_lock(&lag_lock);
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
num_ports = MLX5_MAX_PORTS;
mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 8d8cf2d0bc6d..191392c37558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -40,6 +40,7 @@ struct lag_tracker {
struct mlx5_lag {
u8 flags;
u8 v2p_map[MLX5_MAX_PORTS];
+ struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
struct workqueue_struct *wq;
@@ -49,7 +50,7 @@ struct mlx5_lag {
};
static inline struct mlx5_lag *
-mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+mlx5_lag_dev(struct mlx5_core_dev *dev)
{
return dev->priv.lag;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index fd6196b5e163..c4bf8b679541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -28,7 +28,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev;
bool res;
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_multipath(ldev);
return res;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index f607a3858ef5..624cedebb510 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies */
+/* Copyright (c) 2018-2021, Mellanox Technologies inc. All rights reserved. */
#ifndef __LIB_MLX5_EQ_H__
#define __LIB_MLX5_EQ_H__
@@ -32,6 +32,7 @@ struct mlx5_eq {
unsigned int irqn;
u8 eqn;
struct mlx5_rsc_debug *dbg;
+ struct mlx5_irq *irq;
};
struct mlx5_eq_async {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
new file mode 100644
index 000000000000..84e5683861be
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __LIB_MLX5_SF_H__