summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2025-01-31 13:03:47 +0100
committerJens Axboe <axboe@kernel.dk>2025-01-31 07:20:08 -0700
commit1e1a9cecfab3f22ebef0a976f849c87be8d03c1c (patch)
tree34fa7958ed94c56127aa0fc55347bb409574af3a /block
parent14ef49657ff3b7156952b2eadcf2e5bafd735795 (diff)
downloadlinux-1e1a9cecfab3f22ebef0a976f849c87be8d03c1c.tar.gz
linux-1e1a9cecfab3f22ebef0a976f849c87be8d03c1c.tar.bz2
linux-1e1a9cecfab3f22ebef0a976f849c87be8d03c1c.zip
block: force noio scope in blk_mq_freeze_queue
When block drivers or the core block code perform allocations with a frozen queue, this could try to recurse into the block device to reclaim memory and deadlock. Thus all allocations done by a process that froze a queue need to be done without __GFP_IO and __GFP_FS. Instead of tying to track all of them down, force a noio scope as part of freezing the queue. Note that nvme is a bit of a mess here due to the non-owner freezes, and they will be addressed separately. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250131120352.1315351-2-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c10
-rw-r--r--block/blk-iocost.c14
-rw-r--r--block/blk-iolatency.c6
-rw-r--r--block/blk-mq.c21
-rw-r--r--block/blk-pm.c2
-rw-r--r--block/blk-rq-qos.c12
-rw-r--r--block/blk-settings.c5
-rw-r--r--block/blk-sysfs.c8
-rw-r--r--block/blk-throttle.c5
-rw-r--r--block/blk-zoned.c5
-rw-r--r--block/elevator.c16
11 files changed, 61 insertions, 43 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 45a395862fbc..c795fa3a30e1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1545,6 +1545,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
struct request_queue *q = disk->queue;
struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg, *pinned_blkg = NULL;
+ unsigned int memflags;
int ret;
if (blkcg_policy_enabled(q, pol))
@@ -1559,7 +1560,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
return -EINVAL;
if (queue_is_mq(q))
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
retry:
spin_lock_irq(&q->queue_lock);
@@ -1623,7 +1624,7 @@ retry:
spin_unlock_irq(&q->queue_lock);
out:
if (queue_is_mq(q))
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
if (pinned_blkg)
blkg_put(pinned_blkg);
if (pd_prealloc)
@@ -1667,12 +1668,13 @@ void blkcg_deactivate_policy(struct gendisk *disk,
{
struct request_queue *q = disk->queue;
struct blkcg_gq *blkg;
+ unsigned int memflags;
if (!blkcg_policy_enabled(q, pol))
return;
if (queue_is_mq(q))
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
mutex_lock(&q->blkcg_mutex);
spin_lock_irq(&q->queue_lock);
@@ -1696,7 +1698,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
mutex_unlock(&q->blkcg_mutex);
if (queue_is_mq(q))
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a5894ec9696e..65a1d4427ccf 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
u32 qos[NR_QOS_PARAMS];
bool enable, user;
char *body, *p;
+ unsigned int memflags;
int ret;
blkg_conf_init(&ctx, input);
@@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(disk->queue);
}
- blk_mq_freeze_queue(disk->queue);
+ memflags = blk_mq_freeze_queue(disk->queue);
blk_mq_quiesce_queue(disk->queue);
spin_lock_irq(&ioc->lock);
@@ -3347,7 +3348,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
wbt_enable_default(disk);
blk_mq_unquiesce_queue(disk->queue);
- blk_mq_unfreeze_queue(disk->queue);
+ blk_mq_unfreeze_queue(disk->queue, memflags);
blkg_conf_exit(&ctx);
return nbytes;
@@ -3355,7 +3356,7 @@ einval:
spin_unlock_irq(&ioc->lock);
blk_mq_unquiesce_queue(disk->queue);
- blk_mq_unfreeze_queue(disk->queue);
+ blk_mq_unfreeze_queue(disk->queue, memflags);
ret = -EINVAL;
err:
@@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
{
struct blkg_conf_ctx ctx;
struct request_queue *q;
+ unsigned int memflags;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
@@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(q);
}
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
spin_lock_irq(&ioc->lock);
@@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
spin_unlock_irq(&ioc->lock);
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
blkg_conf_exit(&ctx);
return nbytes;
@@ -3502,7 +3504,7 @@ einval:
spin_unlock_irq(&ioc->lock);
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
ret = -EINVAL;
err:
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ebb522788d97..42c1e0b9a68f 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
*/
enabled = atomic_read(&blkiolat->enable_cnt);
if (enabled != blkiolat->enabled) {
- blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
+ unsigned int memflags;
+
+ memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
blkiolat->enabled = enabled;
- blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
+ blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags);
}
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da39a1cac702..40490ac88045 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
-void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue_nomemsave(struct request_queue *q)
{
blk_freeze_queue_start(q);
blk_mq_freeze_queue_wait(q);
}
-EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave);
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
@@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
return unfreeze;
}
-void blk_mq_unfreeze_queue(struct request_queue *q)
+void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q)
{
if (__blk_mq_unfreeze_queue(q, false))
blk_unfreeze_release_lock(q);
}
-EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore);
/*
* non_owner variant of blk_freeze_queue_start
@@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
bool shared)
{
struct request_queue *q;
+ unsigned int memflags;
lockdep_assert_held(&set->tag_list_lock);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
queue_set_hctx_shared(q, shared);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
}
@@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
struct request_queue *q;
LIST_HEAD(head);
int prev_nr_hw_queues = set->nr_hw_queues;
+ unsigned int memflags;
int i;
lockdep_assert_held(&set->tag_list_lock);
@@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
return;
+ memflags = memalloc_noio_save();
list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_freeze_queue(q);
+ blk_mq_freeze_queue_nomemsave(q);
+
/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
@@ -5052,7 +5056,8 @@ switch_back:
blk_mq_elv_switch_back(&head, q);
list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue_nomemrestore(q);
+ memalloc_noio_restore(memflags);
/* Free the excess tags when nr_hw_queues shrink. */
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
diff --git a/block/blk-pm.c b/block/blk-pm.c
index 42e842074715..8d3e052f91da 100644
--- a/block/blk-pm.c
+++ b/block/blk-pm.c
@@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q)
if (percpu_ref_is_zero(&q->q_usage_counter))
ret = 0;
/* Switch q_usage_counter back to per-cpu mode. */
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue_nomemrestore(q);
if (ret < 0) {
spin_lock_irq(&q->queue_lock);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index eb9618cd68ad..d4d4f4dc0e23 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -299,6 +299,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
const struct rq_qos_ops *ops)
{
struct request_queue *q = disk->queue;
+ unsigned int memflags;
lockdep_assert_held(&q->rq_qos_mutex);
@@ -310,14 +311,14 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
* No IO can be in-flight when adding rqos, so freeze queue, which
* is fine since we only support rq_qos for blk-mq queue.
*/
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
if (rq_qos_id(q, rqos->id))
goto ebusy;
rqos->next = q->rq_qos;
q->rq_qos = rqos;
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
if (rqos->ops->debugfs_attrs) {
mutex_lock(&q->debugfs_mutex);
@@ -327,7 +328,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
return 0;
ebusy:
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
return -EBUSY;
}
@@ -335,17 +336,18 @@ void rq_qos_del(struct rq_qos *rqos)
{
struct request_queue *q = rqos->disk->queue;
struct rq_qos **cur;
+ unsigned int memflags;
lockdep_assert_held(&q->rq_qos_mutex);
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
if (*cur == rqos) {
*cur = rqos->next;
break;
}
}
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_unregister_rqos(rqos);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index db12396ff5c7..c44dadc35e1e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -461,11 +461,12 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update);
int queue_limits_commit_update_frozen(struct request_queue *q,
struct queue_limits *lim)
{
+ unsigned int memflags;
int ret;
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
ret = queue_limits_commit_update(q, lim);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
return ret;
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7b970e6765e7..6f548a4376aa 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -681,7 +681,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue;
- unsigned int noio_flag;
+ unsigned int memflags;
ssize_t res;
if (!entry->store_limit && !entry->store)
@@ -711,11 +711,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
}
mutex_lock(&q->sysfs_lock);
- blk_mq_freeze_queue(q);
- noio_flag = memalloc_noio_save();
+ memflags = blk_mq_freeze_queue(q);
res = entry->store(disk, page, length);
- memalloc_noio_restore(noio_flag);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
mutex_unlock(&q->sysfs_lock);
return res;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 82dbaefcfa3b..8d149aff9fd0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1202,6 +1202,7 @@ static int blk_throtl_init(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct throtl_data *td;
+ unsigned int memflags;
int ret;
td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
@@ -1215,7 +1216,7 @@ static int blk_throtl_init(struct gendisk *disk)
* Freeze queue before activating policy, to synchronize with IO path,
* which is protected by 'q_usage_counter'.
*/
- blk_mq_freeze_queue(disk->queue);
+ memflags = blk_mq_freeze_queue(disk->queue);
blk_mq_quiesce_queue(disk->queue);
q->td = td;
@@ -1239,7 +1240,7 @@ static int blk_throtl_init(struct gendisk *disk)
out:
blk_mq_unquiesce_queue(disk->queue);
- blk_mq_unfreeze_queue(disk->queue);
+ blk_mq_unfreeze_queue(disk->queue, memflags);
return ret;
}
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 9d08a54c201e..761ea662ddc3 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -1717,9 +1717,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
else
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
if (ret) {
- blk_mq_freeze_queue(q);
+ unsigned int memflags = blk_mq_freeze_queue(q);
+
disk_free_zone_resources(disk);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
return ret;
diff --git a/block/elevator.c b/block/elevator.c
index b81216c48b6b..cd2ce4921601 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -570,6 +570,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
void elevator_init_mq(struct request_queue *q)
{
struct elevator_type *e;
+ unsigned int memflags;
int err;
WARN_ON_ONCE(blk_queue_registered(q));
@@ -590,13 +591,13 @@ void elevator_init_mq(struct request_queue *q)
*
* Disk isn't added yet, so verifying queue lock only manually.
*/
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_cancel_work_sync(q);
err = blk_mq_init_sched(q, e);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
if (err) {
pr_warn("\"%s\" elevator initialization failed, "
@@ -614,11 +615,12 @@ void elevator_init_mq(struct request_queue *q)
*/
int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
+ unsigned int memflags;
int ret;
lockdep_assert_held(&q->sysfs_lock);
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
if (q->elevator) {
@@ -639,7 +641,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
out_unfreeze:
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
if (ret) {
pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
@@ -651,9 +653,11 @@ out_unfreeze:
void elevator_disable(struct request_queue *q)
{
+ unsigned int memflags;
+
lockdep_assert_held(&q->sysfs_lock);
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
elv_unregister_queue(q);
@@ -664,7 +668,7 @@ void elevator_disable(struct request_queue *q)
blk_add_trace_msg(q, "elv switch: none");
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
/*