diff options
| author | Yu Kuai <yukuai3@huawei.com> | 2025-08-07 11:24:12 +0800 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2025-08-07 06:30:17 -0600 |
| commit | 42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338 (patch) | |
| tree | b58739caedd28044ba06837ddb6461c2fc3e73dc /block | |
| parent | 80f21806b8e34ae1e24c0fc6a0f0dfd9b055e130 (diff) | |
| download | linux-42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338.tar.gz linux-42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338.tar.bz2 linux-42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338.zip | |
lib/sbitmap: convert shallow_depth from one word to the whole sbitmap
Currently elevators will record internal 'async_depth' to throttle
asynchronous requests, and they both calculate shallow_dpeth based on
sb->shift, with the respect that sb->shift is the available tags in one
word.
However, sb->shift is not the availbale tags in the last word, see
__map_depth:
if (index == sb->map_nr - 1)
return sb->depth - (index << sb->shift);
For consequence, if the last word is used, more tags can be get than
expected, for example, assume nr_requests=256 and there are four words,
in the worst case if user set nr_requests=32, then the first word is
the last word, and still use bits per word, which is 64, to calculate
async_depth is wrong.
One the ohter hand, due to cgroup qos, bfq can allow only one request
to be allocated, and set shallow_dpeth=1 will still allow the number
of words request to be allocated.
Fix this problems by using shallow_depth to the whole sbitmap instead
of per word, also change kyber, mq-deadline and bfq to follow this,
a new helper __map_depth_with_shallow() is introduced to calculate
available bits in each word.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
| -rw-r--r-- | block/bfq-iosched.c | 35 | ||||
| -rw-r--r-- | block/bfq-iosched.h | 3 | ||||
| -rw-r--r-- | block/kyber-iosched.c | 9 | ||||
| -rw-r--r-- | block/mq-deadline.c | 16 |
4 files changed, 20 insertions, 43 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index aca9886c9ee3..3bf76902f07f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -694,17 +694,13 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - int depth; - unsigned limit = data->q->nr_requests; - unsigned int act_idx; + unsigned int limit, act_idx; /* Sync reads have full depth available */ - if (op_is_sync(opf) && !op_is_write(opf)) { - depth = 0; - } else { - depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; - limit = (limit * depth) >> bfqd->full_depth_shift; - } + if (op_is_sync(opf) && !op_is_write(opf)) + limit = data->q->nr_requests; + else + limit = bfqd->async_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { /* Fast path to check if bfqq is already allocated. */ @@ -718,14 +714,16 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * available requests and thus starve other entities. */ if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) { - depth = 1; + limit = 1; break; } } + bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); - if (depth) - data->shallow_depth = depth; + __func__, bfqd->wr_busy_queues, op_is_sync(opf), limit); + + if (limit < data->q->nr_requests) + data->shallow_depth = limit; } static struct bfq_queue * @@ -7114,9 +7112,8 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) */ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) { - unsigned int depth = 1U << bt->sb.shift; + unsigned int nr_requests = bfqd->queue->nr_requests; - bfqd->full_depth_shift = bt->sb.shift; /* * In-word depths if no bfq_queue is being weight-raised: * leaving 25% of tags only for sync reads. @@ -7128,13 +7125,13 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max(depth >> 1, 1U); + bfqd->async_depths[0][0] = max(nr_requests >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U); + bfqd->async_depths[0][1] = max((nr_requests * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -7144,9 +7141,9 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U); + bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U); + bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U); } static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 0b4704932d72..34a498e6b2a5 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -813,8 +813,7 @@ struct bfq_data { * Depth limits used in bfq_limit_depth (see comments on the * function) */ - unsigned int word_depths[2][2]; - unsigned int full_depth_shift; + unsigned int async_depths[2][2]; /* * Number of independent actuators. This is equal to 1 in diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 7b6832cb3a8d..70cbc7b2deb4 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -157,10 +157,7 @@ struct kyber_queue_data { */ struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; - /* - * Async request percentage, converted to per-word depth for - * sbitmap_get_shallow(). - */ + /* Number of allowed async requests. */ unsigned int async_depth; struct kyber_cpu_latency __percpu *cpu_latency; @@ -447,10 +444,8 @@ static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx) { struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; - unsigned int shift = tags->bitmap_tags.sb.shift; - - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; + kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U; sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth); } diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 7b6caf30e00a..b9b7cdf1d3c9 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -488,20 +488,6 @@ unlock: } /* - * 'depth' is a number in the range 1..INT_MAX representing a number of - * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since - * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow(). - * Values larger than q->nr_requests have the same effect as q->nr_requests. - */ -static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) -{ - struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; - const unsigned int nrr = hctx->queue->nr_requests; - - return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; -} - -/* * Called by __blk_mq_alloc_request(). The shallow_depth value set by this * function is used by __blk_mq_get_tag(). */ @@ -517,7 +503,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * Throttle asynchronous requests and writes such that these requests * do not block the allocation of synchronous requests. */ - data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth); + data->shallow_depth = dd->async_depth; } /* Called by blk_mq_update_nr_requests(). */ |
