summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorYu Kuai <yukuai3@huawei.com>2025-09-10 16:04:39 +0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-10-15 12:03:29 +0200
commitb1b9ba3c2ea2e6b09f8b23ae0cf54a3b5c63c288 (patch)
treefafacec93bda63d7348b76f74176c37663659cd8 /block
parent10bc65a048477ee906a6c6a8b85c167098f04f9a (diff)
downloadlinux-b1b9ba3c2ea2e6b09f8b23ae0cf54a3b5c63c288.tar.gz
linux-b1b9ba3c2ea2e6b09f8b23ae0cf54a3b5c63c288.tar.bz2
linux-b1b9ba3c2ea2e6b09f8b23ae0cf54a3b5c63c288.zip
blk-mq: convert to serialize updating nr_requests with update_nr_hwq_lock
[ Upstream commit 626ff4f8ebcb7207f01e7810acb85812ccf06bd8 ] request_queue->nr_requests can be changed by: a) switch elevator by updating nr_hw_queues b) switch elevator by elevator sysfs attribute c) configue queue sysfs attribute nr_requests Current lock order is: 1) update_nr_hwq_lock, case a,b 2) freeze_queue 3) elevator_lock, case a,b,c And update nr_requests is seriablized by elevator_lock() already, however, in the case c, we'll have to allocate new sched_tags if nr_requests grow, and do this with elevator_lock held and queue freezed has the risk of deadlock. Hence use update_nr_hwq_lock instead, make it possible to allocate memory if tags grow, meanwhile also prevent nr_requests to be changed concurrently. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Reviewed-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> Stable-dep-of: b86433721f46 ("blk-mq: fix potential deadlock while nr_requests grown") Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'block')
-rw-r--r--block/blk-sysfs.c25
1 files changed, 20 insertions, 5 deletions
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b61e956a868e..163264e4ec62 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -68,6 +68,7 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
int ret, err;
unsigned int memflags;
struct request_queue *q = disk->queue;
+ struct blk_mq_tag_set *set = q->tag_set;
if (!queue_is_mq(q))
return -EINVAL;
@@ -76,8 +77,11 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
if (ret < 0)
return ret;
- memflags = blk_mq_freeze_queue(q);
- mutex_lock(&q->elevator_lock);
+ /*
+ * Serialize updating nr_requests with concurrent queue_requests_store()
+ * and switching elevator.
+ */
+ down_write(&set->update_nr_hwq_lock);
if (nr == q->nr_requests)
goto unlock;
@@ -85,20 +89,31 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
- if (nr <= q->tag_set->reserved_tags ||
+ /*
+ * Switching elevator is protected by update_nr_hwq_lock:
+ * - read lock is held from elevator sysfs attribute;
+ * - write lock is held from updating nr_hw_queues;
+ * Hence it's safe to access q->elevator here with write lock held.
+ */
+ if (nr <= set->reserved_tags ||
(q->elevator && nr > MAX_SCHED_RQ) ||
- (!q->elevator && nr > q->tag_set->queue_depth)) {
+ (!q->elevator && nr > set->queue_depth)) {
ret = -EINVAL;
goto unlock;
}
+ memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->elevator_lock);
+
err = blk_mq_update_nr_requests(disk->queue, nr);
if (err)
ret = err;
-unlock:
mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags);
+
+unlock:
+ up_write(&set->update_nr_hwq_lock);
return ret;
}