summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-10-25 12:59:42 +0100
committerDavid S. Miller <davem@davemloft.net>2021-10-25 12:59:42 +0100
commit57bb11328f9ab88571889f4a842859fcdd64d6cb (patch)
treeca7071763f0791120ec4ce1e5f0e03ea1c281e20
parent2d7e73f09fc2f5d968ca375f047718cf25ae2b92 (diff)
parenteccd0a80dc7f4be65430236db475546b0ab9ec37 (diff)
downloadlinux-57bb11328f9ab88571889f4a842859fcdd64d6cb.tar.gz
linux-57bb11328f9ab88571889f4a842859fcdd64d6cb.tar.bz2
linux-57bb11328f9ab88571889f4a842859fcdd64d6cb.zip
Merge branch 'dsa-rtnl'
Vladimir Oltean says: ==================== Drop rtnl_lock from DSA .port_fdb_{add,del} As mentioned in the RFC posted 2 months ago: https://patchwork.kernel.org/project/netdevbpf/cover/20210824114049.3814660-1-vladimir.oltean@nxp.com/ DSA is transitioning to a driver API where the rtnl_lock is not held when calling ds->ops->port_fdb_add() and ds->ops->port_fdb_del(). Drivers cannot take that lock privately from those callbacks either. This change is required so that DSA can wait for switchdev FDB work items to finish before leaving the bridge. That change will be made in a future patch series. A small selftest is provided with the patch set in the hope that concurrency issues uncovered by this series, but not spotted by me by code inspection, will be caught. A status of the existing drivers: - mv88e6xxx_port_fdb_add() and mv88e6xxx_port_fdb_del() take mv88e6xxx_reg_lock() so they should be safe. - qca8k_fdb_add() and qca8k_fdb_del() take mutex_lock(&priv->reg_mutex) so they should be safe. - hellcreek_fdb_add() and hellcreek_fdb_add() take mutex_lock(&hellcreek->reg_lock) so they should be safe. - ksz9477_port_fdb_add() and ksz9477_port_fdb_del() take mutex_lock(&dev->alu_mutex) so they should be safe. - b53_fdb_add() and b53_fdb_del() did not have locking, so I've added a scheme based on my own judgement there (not tested). - felix_fdb_add() and felix_fdb_del() did not have locking, I've added and tested a locking scheme there. - mt7530_port_fdb_add() and mt7530_port_fdb_del() take mutex_lock(&priv->reg_mutex), so they should be safe. - gswip_port_fdb() did not have locking, so I've added a non-expert locking scheme based on my own judgement (not tested). - lan9303_alr_add_port() and lan9303_alr_del_port() take mutex_lock(&chip->alr_mutex) so they should be safe. - sja1105_fdb_add() and sja1105_fdb_del() did not have locking, I've added and tested a locking scheme. Changes in v3: Unlock arl_mutex only once in b53_fdb_dump(). Changes in v4: - Use __must_hold in ocelot and b53 - Add missing mutex_init in lantiq_gswip - Clean up the selftest a bit. Changes in v5: - Replace __must_hold with a comment. - Add a new patch (01/10). ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/dsa/b53/b53_common.c36
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/lantiq_gswip.c28
-rw-r--r--drivers/net/dsa/sja1105/sja1105.h2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_dynamic_config.c91
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c1
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c53
-rw-r--r--include/net/dsa.h1
-rw-r--r--include/soc/mscc/ocelot.h3
-rw-r--r--net/dsa/dsa2.c1
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/dsa/switch.c80
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh47
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh10
15 files changed, 280 insertions, 77 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c5aa142d4b3a..975086c5345d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13056,6 +13056,7 @@ F: include/linux/dsa/
F: include/linux/platform_data/dsa.h
F: include/net/dsa.h
F: net/dsa/
+F: tools/testing/selftests/drivers/net/dsa/
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 51dfaf817a40..af4761968733 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1542,7 +1542,7 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
}
EXPORT_SYMBOL(b53_vlan_del);
-/* Address Resolution Logic routines */
+/* Address Resolution Logic routines. Caller must hold &dev->arl_mutex. */
static int b53_arl_op_wait(struct b53_device *dev)
{
unsigned int timeout = 10;
@@ -1707,6 +1707,7 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct b53_device *priv = ds->priv;
+ int ret;
/* 5325 and 5365 require some more massaging, but could
* be supported eventually
@@ -1714,7 +1715,11 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
- return b53_arl_op(priv, 0, port, addr, vid, true);
+ mutex_lock(&priv->arl_mutex);
+ ret = b53_arl_op(priv, 0, port, addr, vid, true);
+ mutex_unlock(&priv->arl_mutex);
+
+ return ret;
}
EXPORT_SYMBOL(b53_fdb_add);
@@ -1722,8 +1727,13 @@ int b53_fdb_del(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct b53_device *priv = ds->priv;
+ int ret;
- return b53_arl_op(priv, 0, port, addr, vid, false);
+ mutex_lock(&priv->arl_mutex);
+ ret = b53_arl_op(priv, 0, port, addr, vid, false);
+ mutex_unlock(&priv->arl_mutex);
+
+ return ret;
}
EXPORT_SYMBOL(b53_fdb_del);
@@ -1780,6 +1790,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
int ret;
u8 reg;
+ mutex_lock(&priv->arl_mutex);
+
/* Start search operation */
reg = ARL_SRCH_STDN;
b53_write8(priv, B53_ARLIO_PAGE, B53_ARL_SRCH_CTL, reg);
@@ -1787,18 +1799,18 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
do {
ret = b53_arl_search_wait(priv);
if (ret)
- return ret;
+ break;
b53_arl_search_rd(priv, 0, &results[0]);
ret = b53_fdb_copy(port, &results[0], cb, data);
if (ret)
- return ret;
+ break;
if (priv->num_arl_bins > 2) {
b53_arl_search_rd(priv, 1, &results[1]);
ret = b53_fdb_copy(port, &results[1], cb, data);
if (ret)
- return ret;
+ break;
if (!results[0].is_valid && !results[1].is_valid)
break;
@@ -1806,6 +1818,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
} while (count++ < b53_max_arl_entries(priv) / 2);
+ mutex_unlock(&priv->arl_mutex);
+
return 0;
}
EXPORT_SYMBOL(b53_fdb_dump);
@@ -1814,6 +1828,7 @@ int b53_mdb_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
struct b53_device *priv = ds->priv;
+ int ret;
/* 5325 and 5365 require some more massaging, but could
* be supported eventually
@@ -1821,7 +1836,11 @@ int b53_mdb_add(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
- return b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
+ mutex_lock(&priv->arl_mutex);
+ ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
+ mutex_unlock(&priv->arl_mutex);
+
+ return ret;
}
EXPORT_SYMBOL(b53_mdb_add);
@@ -1831,7 +1850,9 @@ int b53_mdb_del(struct dsa_switch *ds, int port,
struct b53_device *priv = ds->priv;
int ret;
+ mutex_lock(&priv->arl_mutex);
ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, false);
+ mutex_unlock(&priv->arl_mutex);
if (ret)
dev_err(ds->dev, "failed to delete MDB entry\n");
@@ -2668,6 +2689,7 @@ struct b53_device *b53_switch_alloc(struct device *base,
mutex_init(&dev->reg_mutex);
mutex_init(&dev->stats_mutex);
+ mutex_init(&dev->arl_mutex);
return dev;
}
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 544101e74bca..579da74ada64 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -107,6 +107,7 @@ struct b53_device {
struct mutex reg_mutex;
struct mutex stats_mutex;
+ struct mutex arl_mutex;
const struct b53_io_ops *ops;
/* chip specific data */
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index a3e937a371ef..7056d98d8177 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -276,6 +276,7 @@ struct gswip_priv {
int num_gphy_fw;
struct gswip_gphy_fw *gphy_fw;
u32 port_vlan_filter;
+ struct mutex pce_table_lock;
};
struct gswip_pce_table_entry {
@@ -523,10 +524,14 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSRD :
GSWIP_PCE_TBL_CTRL_OPMOD_ADRD;
+ mutex_lock(&priv->pce_table_lock);
+
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
- if (err)
+ if (err) {
+ mutex_unlock(&priv->pce_table_lock);
return err;
+ }
gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR);
gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK |
@@ -536,8 +541,10 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
- if (err)
+ if (err) {
+ mutex_unlock(&priv->pce_table_lock);
return err;
+ }
for (i = 0; i < ARRAY_SIZE(tbl->key); i++)
tbl->key[i] = gswip_switch_r(priv, GSWIP_PCE_TBL_KEY(i));
@@ -553,6 +560,8 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
tbl->valid = !!(crtl & GSWIP_PCE_TBL_CTRL_VLD);
tbl->gmap = (crtl & GSWIP_PCE_TBL_CTRL_GMAP_MASK) >> 7;
+ mutex_unlock(&priv->pce_table_lock);
+
return 0;
}
@@ -565,10 +574,14 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv,
u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSWR :
GSWIP_PCE_TBL_CTRL_OPMOD_ADWR;
+ mutex_lock(&priv->pce_table_lock);
+
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
- if (err)
+ if (err) {
+ mutex_unlock(&priv->pce_table_lock);
return err;
+ }
gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR);
gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK |
@@ -600,8 +613,12 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv,
crtl |= GSWIP_PCE_TBL_CTRL_BAS;
gswip_switch_w(priv, crtl, GSWIP_PCE_TBL_CTRL);
- return gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
- GSWIP_PCE_TBL_CTRL_BAS);
+ err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
+ GSWIP_PCE_TBL_CTRL_BAS);
+
+ mutex_unlock(&priv->pce_table_lock);
+
+ return err;
}
/* Add the LAN port into a bridge with the CPU port by
@@ -2104,6 +2121,7 @@ static int gswip_probe(struct platform_device *pdev)
priv->ds->priv = priv;
priv->ds->ops = priv->hw_info->ops;
priv->dev = dev;
+ mutex_init(&priv->pce_table_lock);
version = gswip_switch_r(priv, GSWIP_VERSION);
np = dev->of_node;
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 808419f3b808..21dba16af097 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -261,6 +261,8 @@ struct sja1105_private {
* the switch doesn't confuse them with one another.
*/
struct mutex mgmt_lock;
+ /* Serializes access to the dynamic config interface */
+ struct mutex dynamic_config_lock;
struct devlink_region **regions;
struct sja1105_cbs_entry *cbs;
struct mii_bus *mdio_base_t1;
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index f2049f52833c..7729d3f8b7f5 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -1170,6 +1170,56 @@ const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN] = {
},
};
+#define SJA1105_DYNAMIC_CONFIG_SLEEP_US 10
+#define SJA1105_DYNAMIC_CONFIG_TIMEOUT_US 100000
+
+static int
+sja1105_dynamic_config_poll_valid(struct sja1105_private *priv,
+ struct sja1105_dyn_cmd *cmd,
+ const struct sja1105_dynamic_table_ops *ops)
+{
+ u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {};
+ int rc;
+
+ /* We don't _need_ to read the full entry, just the command area which
+ * is a fixed SJA1105_SIZE_DYN_CMD. But our cmd_packing() API expects a
+ * buffer that contains the full entry too. Additionally, our API
+ * doesn't really know how many bytes into the buffer does the command
+ * area really begin. So just read back the whole entry.
+ */
+ rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
+ ops->packed_size);
+ if (rc)
+ return rc;
+
+ /* Unpack the command structure, and return it to the caller in case it
+ * needs to perform further checks on it (VALIDENT).
+ */
+ memset(cmd, 0, sizeof(*cmd));
+ ops->cmd_packing(packed_buf, cmd, UNPACK);
+
+ /* Hardware hasn't cleared VALID => still working on it */
+ return cmd->valid ? -EAGAIN : 0;
+}
+
+/* Poll the dynamic config entry's control area until the hardware has
+ * cleared the VALID bit, which means we have confirmation that it has
+ * finished processing the command.
+ */
+static int
+sja1105_dynamic_config_wait_complete(struct sja1105_private *priv,
+ struct sja1105_dyn_cmd *cmd,
+ const struct sja1105_dynamic_table_ops *ops)
+{
+ int rc;
+
+ return read_poll_timeout(sja1105_dynamic_config_poll_valid,
+ rc, rc != -EAGAIN,
+ SJA1105_DYNAMIC_CONFIG_SLEEP_US,
+ SJA1105_DYNAMIC_CONFIG_TIMEOUT_US,
+ false, priv, cmd, ops);
+}
+
/* Provides read access to the settings through the dynamic interface
* of the switch.
* @blk_idx is used as key to select from the sja1105_dynamic_table_ops.
@@ -1196,7 +1246,6 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
struct sja1105_dyn_cmd cmd = {0};
/* SPI payload buffer */
u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {0};
- int retries = 3;
int rc;
if (blk_idx >= BLK_IDX_MAX_DYN)
@@ -1234,33 +1283,21 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
ops->entry_packing(packed_buf, entry, PACK);
/* Send SPI write operation: read config table entry */
+ mutex_lock(&priv->dynamic_config_lock);
rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
ops->packed_size);
- if (rc < 0)
+ if (rc < 0) {
+ mutex_unlock(&priv->dynamic_config_lock);
return rc;
+ }
- /* Loop until we have confirmation that hardware has finished
- * processing the command and has cleared the VALID field
- */
- do {
- memset(packed_buf, 0, ops->packed_size);
-
- /* Retrieve the read operation's result */
- rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
- ops->packed_size);
- if (rc < 0)
- return rc;
-
- cmd = (struct sja1105_dyn_cmd) {0};
- ops->cmd_packing(packed_buf, &cmd, UNPACK);
-
- if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
- return -ENOENT;
- cpu_relax();
- } while (cmd.valid && --retries);
+ rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
+ mutex_unlock(&priv->dynamic_config_lock);
+ if (rc < 0)
+ return rc;
- if (cmd.valid)
- return -ETIMEDOUT;
+ if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
+ return -ENOENT;
/* Don't dereference possibly NULL pointer - maybe caller
* only wanted to see whether the entry existed or not.
@@ -1316,8 +1353,16 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv,
ops->entry_packing(packed_buf, entry, PACK);
/* Send SPI write operation: read config table entry */
+ mutex_lock(&priv->dynamic_config_lock);
rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
ops->packed_size);
+ if (rc < 0) {
+ mutex_unlock(&priv->dynamic_config_lock);
+ return rc;
+ }
+
+ rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
+ mutex_unlock(&priv->dynamic_config_lock);
if (rc < 0)
return rc;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index ef46ae53ab56..c343effe2e96 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3365,6 +3365,7 @@ static int sja1105_probe(struct spi_device *spi)
priv->ds = ds;
mutex_init(&priv->ptp_data.lock);
+ mutex_init(&priv->dynamic_config_lock);
mutex_init(&priv->mgmt_lock);
rc = sja1105_parse_dt(priv);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 4e5ae687d2e2..e6c18b598d5c 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -20,11 +20,13 @@ struct ocelot_mact_entry {
enum macaccess_entry_type type;
};
+/* Caller must hold &ocelot->mact_lock */
static inline u32 ocelot_mact_read_macaccess(struct ocelot *ocelot)
{
return ocelot_read(ocelot, ANA_TABLES_MACACCESS);
}
+/* Caller must hold &ocelot->mact_lock */
static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
{
u32 val;
@@ -36,6 +38,7 @@ static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
}
+/* Caller must hold &ocelot->mact_lock */
static void ocelot_mact_select(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN],
unsigned int vid)
@@ -67,6 +70,7 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN);
unsigned int mc_ports;
+ int err;
/* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */
if (type == ENTRYTYPE_MACv4)
@@ -79,18 +83,28 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
if (mc_ports & BIT(ocelot->num_phys_ports))
cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY;
+ mutex_lock(&ocelot->mact_lock);
+
ocelot_mact_select(ocelot, mac, vid);
/* Issue a write command */
ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS);
- return ocelot_mact_wait_for_completion(ocelot);
+ err = ocelot_mact_wait_for_completion(ocelot);
+
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
}
EXPORT_SYMBOL(ocelot_mact_learn);
int ocelot_mact_forget(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN], unsigned int vid)
{
+ int err;
+
+ mutex_lock(&ocelot->mact_lock);
+
ocelot_mact_select(ocelot, mac, vid);
/* Issue a forget command */
@@ -98,7 +112,11 @@ int ocelot_mact_forget(struct ocelot *ocelot,
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_FORGET),
ANA_TABLES_MACACCESS);
- return ocelot_mact_wait_for_completion(ocelot);
+ err = ocelot_mact_wait_for_completion(ocelot);
+
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
}
EXPORT_SYMBOL(ocelot_mact_forget);
@@ -114,7 +132,9 @@ static void ocelot_mact_init(struct ocelot *ocelot)
| ANA_AGENCTRL_LEARN_IGNORE_VLAN,
ANA_AGENCTRL);
- /* Clear the MAC table */
+ /* Clear the MAC table. We are not concurrent with anyone, so
+ * holding &ocelot->mact_lock is pointless.
+ */
ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
}
@@ -1170,6 +1190,7 @@ nla_put_failure:
}
EXPORT_SYMBOL(ocelot_port_fdb_do_dump);
+/* Caller must hold &ocelot->mact_lock */
static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
struct ocelot_mact_entry *entry)
{
@@ -1220,33 +1241,40 @@ static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
+ int err = 0;
int i, j;
+ /* We could take the lock just around ocelot_mact_read, but doing so
+ * thousands of times in a row seems rather pointless and inefficient.
+ */
+ mutex_lock(&ocelot->mact_lock);
+
/* Loop through all the mac tables entries. */
for (i = 0; i < ocelot->num_mact_rows; i++) {
for (j = 0; j < 4; j++) {
struct ocelot_mact_entry entry;
bool is_static;
- int ret;
- ret = ocelot_mact_read(ocelot, port, i, j, &entry);
+ err = ocelot_mact_read(ocelot, port, i, j, &entry);
/* If the entry is invalid (wrong port, invalid...),
* skip it.
*/
- if (ret == -EINVAL)
+ if (err == -EINVAL)
continue;
- else if (ret)
- return ret;
+ else if (err)
+ break;
is_static = (entry.type == ENTRYTYPE_LOCKED);
- ret = cb(entry.mac, entry.vid, is_static, data);
- if (ret)
- return ret;
+ err = cb(entry.mac, entry.vid, is_static, data);
+ if (err)
+ break;
}
}
- return 0;
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
}
EXPORT_SYMBOL(ocelot_fdb_dump);
@@ -2231,6 +2259,7 @@ int ocelot_init(struct ocelot *ocelot)
mutex_init(&ocelot->stats_lock);
mutex_init(&ocelot->ptp_lock);
+ mutex_init(&ocelot->mact_lock);
spin_lock_init(&ocelot->ptp_clock_lock);
spin_lock_init(&ocelot->ts_id_lock);
snprintf(queue_name, sizeof(queue_name), "%s-stats",
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 1cd9c2461f0d..badd214f7470 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -287,6 +287,7 @@ struct dsa_port {
/* List of MAC addresses that must be forwarded on this port.
* These are only valid on CPU ports and DSA links.
*/
+ struct mutex addr_lists_lock;
struct list_head fdbs;
struct list_head mdbs;
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 9b872da0c246..fef3a36b0210 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -675,6 +675,9 @@ struct ocelot {
struct delayed_work stats_work;
struct workqueue_struct *stats_queue;
+ /* Lock for serializing access to the MAC table */
+ struct mutex mact_lock;
+
struct workqueue_struct *owq;
u8 ptp:1;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f5270114dcb8..826957b6442b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -433,6 +433,7 @@ static int dsa_port_setup(struct dsa_port *dp)
if (dp->setup)
return 0;
+ mutex_init(&dp->addr_lists_lock);
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9d9fef668eba..adcfb2cb4e61 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2413,7 +2413,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
dp = dsa_to_port(ds, switchdev_work->port);
- rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
if (switchdev_work->host_addr)
@@ -2448,7 +2447,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
break;
}
- rtnl_unlock();
dev_put(switchdev_work->dev);
kfree(switchdev_work);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 2b1b21bde830..bb155a16d454 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -215,26 +215,30 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_add(ds, port, mdb);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
if (a) {
refcount_inc(&a->refcount);
- return 0;
+ goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
- if (!a)
- return -ENOMEM;
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
err = ds->ops->port_mdb_add(ds, port, mdb);
if (err) {
kfree(a);
- return err;
+ goto out;
}
ether_addr_copy(a->addr, mdb->addr);
@@ -242,7 +246,10 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->mdbs);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_port_do_mdb_del(struct dsa_port *dp,
@@ -251,29 +258,36 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_del(ds, port, mdb);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
- if (!a)
- return -ENOENT;
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
if (!refcount_dec_and_test(&a->refcount))
- return 0;
+ goto out;
err = ds->ops->port_mdb_del(ds, port, mdb);
if (err) {
- refcount_inc(&a->refcount);
- return err;
+ refcount_set(&a->refcount, 1);
+ goto out;
}
list_del(&a->list);
kfree(a);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
@@ -282,26 +296,30 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_add(ds, port, addr, vid);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
if (a) {
refcount_inc(&a->refcount);
- return 0;
+ goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
- if (!a)
- return -ENOMEM;
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
err = ds->ops->port_fdb_add(ds, port, addr, vid);
if (err) {
kfree(a);
- return err;
+ goto out;
}
ether_addr_copy(a->addr, addr);
@@ -309,7 +327,10 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->fdbs);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
@@ -318,29 +339,36 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_del(ds, port, addr, vid);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
- if (!a)
- return -ENOENT;
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
if (!refcount_dec_and_test(&a->refcount))
- return 0;
+ goto out;
err = ds->ops->port_fdb_del(ds, port, addr, vid);
if (err) {
- refcount_inc(&a->refcount);
- return err;
+ refcount_set(&a->refcount, 1);
+ goto out;
}
list_del(&a->list);
kfree(a);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
new file mode 100755
index 000000000000..dca8be6092b9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Bridge FDB entries can be offloaded to DSA switches without holding the
+# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
+# serialization, which means their code paths are not well tested in the
+# presence of concurrency.
+# This test creates a background task that stresses the FDB by adding and
+# deleting an entry many times in a row without the rtnl_mutex held.
+# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
+# (with rtnl_mutex held) from a foreground task.
+# Since either the FDB dump or the additions/removals can fail, but the
+# additions and removals are performed in deferred as opposed to process
+# context, we cannot simply check for user space error codes.
+
+WAIT_TIME=1
+NUM_NETIFS=1
+REQUIRE_JQ="no"
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+cleanup() {
+ echo "Cleaning up"
+ kill $pid && wait $pid &> /dev/null
+ ip link del br0
+ echo "Please check kernel log for errors"
+}
+trap 'cleanup' EXIT
+
+eth=${NETIFS[p1]}
+
+ip link del br0 2&>1 >/dev/null || :
+ip link add br0 type bridge && ip link set $eth master br0
+
+(while :; do
+ bridge fdb add 00:01:02:03:04:05 dev $eth master static
+ bridge fdb del 00:01:02:03:04:05 dev $eth master static
+done) &
+pid=$!
+
+for i in $(seq 1 50); do
+ bridge fdb show > /dev/null
+ sleep 3
+ echo "$((${i} * 2))% complete..."
+done
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 92087d423bcf..520d8b53464b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -23,6 +23,8 @@ MC_CLI=${MC_CLI:=smcroutectl}
PING_TIMEOUT=${PING_TIMEOUT:=5}
WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
+REQUIRE_JQ=${REQUIRE_JQ:=yes}
+REQUIRE_MZ=${REQUIRE_MZ:=yes}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -141,8 +143,12 @@ require_command()
fi
}
-require_command jq
-require_command $MZ
+if [[ "$REQUIRE_JQ" = "yes" ]]; then
+ require_command jq
+fi
+if [[ "$REQUIRE_MZ" = "yes" ]]; then
+ require_command $MZ
+fi
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""