summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoshe Shemesh <moshe@nvidia.com>2025-09-29 00:02:09 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-10-15 11:58:06 +0200
commit3311f0d15bb3bd38611533468e8a6284a6cf3c2c (patch)
treeb946e63e55a59ca8463a1f67a80445faaaf17dcb
parent3df251a65b4e6154e7682392760d31863c76f48e (diff)
downloadlinux-3311f0d15bb3bd38611533468e8a6284a6cf3c2c.tar.gz
linux-3311f0d15bb3bd38611533468e8a6284a6cf3c2c.tar.bz2
linux-3311f0d15bb3bd38611533468e8a6284a6cf3c2c.zip
net/mlx5: fw reset, add reset timeout work
[ Upstream commit 5cfbe7ebfa42fd3c517a701dab5bd73524da9088 ] Add sync reset timeout to stop poll_sync_reset in case there was no reset done or abort event within timeout. Otherwise poll sync reset will just continue and in case of fw fatal error no health reporting will be done. Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event") Signed-off-by: Moshe Shemesh <moshe@nvidia.com> Reviewed-by: Shay Drori <shayd@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 1547704c8976..4d64d179b5dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -27,6 +27,7 @@ struct mlx5_fw_reset {
struct work_struct reset_reload_work;
struct work_struct reset_now_work;
struct work_struct reset_abort_work;
+ struct delayed_work reset_timeout_work;
unsigned long reset_flags;
u8 reset_method;
struct timer_list timer;
@@ -257,6 +258,8 @@ static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool
return -EALREADY;
}
+ if (current_work() != &fw_reset->reset_timeout_work.work)
+ cancel_delayed_work(&fw_reset->reset_timeout_work);
mlx5_stop_sync_reset_poll(dev);
if (poll_health)
mlx5_start_health_poll(dev);
@@ -327,6 +330,11 @@ static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
}
mlx5_stop_health_poll(dev, true);
mlx5_start_sync_reset_poll(dev);
+
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
+ &fw_reset->reset_flags))
+ schedule_delayed_work(&fw_reset->reset_timeout_work,
+ msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)));
return 0;
}
@@ -700,6 +708,19 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
}
}
+static void mlx5_sync_reset_timeout_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+ struct mlx5_fw_reset *fw_reset =
+ container_of(dwork, struct mlx5_fw_reset, reset_timeout_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
+ return;
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
+}
+
static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
{
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
@@ -783,6 +804,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
cancel_work_sync(&fw_reset->reset_reload_work);
cancel_work_sync(&fw_reset->reset_now_work);
cancel_work_sync(&fw_reset->reset_abort_work);
+ cancel_delayed_work(&fw_reset->reset_timeout_work);
}
static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
@@ -826,6 +848,8 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
+ INIT_DELAYED_WORK(&fw_reset->reset_timeout_work,
+ mlx5_sync_reset_timeout_work);
init_completion(&fw_reset->done);
return 0;