diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 14:27:21 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 14:27:21 -0800 |
| commit | 5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f (patch) | |
| tree | 02df7848b8c28552039bf463e0034f5d5518b2a9 /block | |
| parent | 553637f73c314c742243b8dc5ef072e9dadbe581 (diff) | |
| parent | 0aa2988e4fd23c0c8b33999d7b47dfbc5e6bf24b (diff) | |
| download | linux-5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f.tar.gz linux-5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f.tar.bz2 linux-5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f.zip | |
Merge tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe updates via Christoph:
- Small improvements to the logging functionality (Amit Engel)
- Authentication cleanups (Hannes Reinecke)
- Cleanup and optimize the DMA mapping cod in the PCIe driver
(Keith Busch)
- Work around the command effects for Format NVM (Keith Busch)
- Misc cleanups (Keith Busch, Christoph Hellwig)
- Fix and cleanup freeing single sgl (Keith Busch)
- MD updates via Song:
- Fix a rare crash during the takeover process
- Don't update recovery_cp when curr_resync is ACTIVE
- Free writes_pending in md_stop
- Change active_io to percpu
- Updates to drbd, inching us closer to unifying the out-of-tree driver
with the in-tree one (Andreas, Christoph, Lars, Robert)
- BFQ update adding support for multi-actuator drives (Paolo, Federico,
Davide)
- Make brd compliant with REQ_NOWAIT (me)
- Fix for IOPOLL and queue entering, fixing stalled IO waiting on
timeouts (me)
- Fix for REQ_NOWAIT with multiple bios (me)
- Fix memory leak in blktrace cleanup (Greg)
- Clean up sbitmap and fix a potential hang (Kemeng)
- Clean up some bits in BFQ, and fix a bug in the request injection
(Kemeng)
- Clean up the request allocation and issue code, and fix some bugs
related to that (Kemeng)
- ublk updates and fixes:
- Add support for unprivileged ublk (Ming)
- Improve device deletion handling (Ming)
- Misc (Liu, Ziyang)
- s390 dasd fixes (Alexander, Qiheng)
- Improve utility of request caching and fixes (Anuj, Xiao)
- zoned cleanups (Pankaj)
- More constification for kobjs (Thomas)
- blk-iocost cleanups (Yu)
- Remove bio splitting from drivers that don't need it (Christoph)
- Switch blk-cgroups to use struct gendisk. Some of this is now
incomplete as select late reverts were done. (Christoph)
- Add bvec initialization helpers, and convert callers to use that
rather than open-coding it (Christoph)
- Misc fixes and cleanups (Jinke, Keith, Arnd, Bart, Li, Martin,
Matthew, Ulf, Zhong)
* tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux: (169 commits)
brd: use radix_tree_maybe_preload instead of radix_tree_preload
block: use proper return value from bio_failfast()
block: bio-integrity: Copy flags when bio_integrity_payload is cloned
block: Fix io statistics for cgroup in throttle path
brd: mark as nowait compatible
brd: check for REQ_NOWAIT and set correct page allocation mask
brd: return 0/-error from brd_insert_page()
block: sync mixed merged request's failfast with 1st bio's
Revert "blk-cgroup: pin the gendisk in struct blkcg_gq"
Revert "blk-cgroup: pass a gendisk to blkg_lookup"
Revert "blk-cgroup: delay blk-cgroup initialization until add_disk"
Revert "blk-cgroup: delay calling blkcg_exit_disk until disk_release"
Revert "blk-cgroup: move the cgroup information to struct gendisk"
nvme-pci: remove iod use_sgls
nvme-pci: fix freeing single sgl
block: ublk: check IO buffer based on flag need_get_data
s390/dasd: Fix potential memleak in dasd_eckd_init()
s390/dasd: sort out physical vs virtual pointers usage
block: Remove the ALLOC_CACHE_SLACK constant
block: make kobj_type structures constant
...
Diffstat (limited to 'block')
34 files changed, 1128 insertions, 780 deletions
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 615516146086..27f11320b8d1 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -30,6 +30,7 @@ config IOSCHED_BFQ config BFQ_GROUP_IOSCHED bool "BFQ hierarchical scheduling support" depends on IOSCHED_BFQ && BLK_CGROUP + default y select BLK_CGROUP_RWSTAT help diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 0fbde0fc0628..89ffb3aa992c 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -513,12 +513,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd) kfree(cpd_to_bfqgd(cpd)); } -static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; - bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node); + bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; @@ -551,7 +551,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd) bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; - bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -614,7 +613,7 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) continue; } bfqg = blkg_to_bfqg(blkg); - if (bfqg->online) { + if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } @@ -706,12 +705,52 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_activate_bfqq(bfqd, bfqq); } - if (!bfqd->in_service_queue && !bfqd->rq_in_driver) + if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } +static void bfq_sync_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *sync_bfqq, + struct bfq_io_cq *bic, + struct bfq_group *bfqg, + unsigned int act_idx) +{ + struct bfq_queue *bfqq; + + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + return; + } + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is not valid + * anymore. We cannot easily just cancel the merge (by + * clearing new_bfqq) as there may be other processes + * using this queue and holding refs to all queues + * below sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from bfqq now + * so that we cannot merge bio to a request from the + * old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bic_set_bfqq(bic, NULL, true, act_idx); + bfq_release_process_ref(bfqd, sync_bfqq); + } +} + /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. @@ -726,53 +765,20 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { - struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false); - struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true); - struct bfq_entity *entity; + unsigned int act_idx; - if (async_bfqq) { - entity = &async_bfqq->entity; + for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { + struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); + struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); - if (entity->sched_data != &bfqg->sched_data) { - bic_set_bfqq(bic, NULL, false); + if (async_bfqq && + async_bfqq->entity.sched_data != &bfqg->sched_data) { + bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } - } - if (sync_bfqq) { - if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { - /* We are the only user of this bfqq, just move it */ - if (sync_bfqq->entity.sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); - } else { - struct bfq_queue *bfqq; - - /* - * The queue was merged to a different queue. Check - * that the merge chain still belongs to the same - * cgroup. - */ - for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) - if (bfqq->entity.sched_data != - &bfqg->sched_data) - break; - if (bfqq) { - /* - * Some queue changed cgroup so the merge is - * not valid anymore. We cannot easily just - * cancel the merge (by clearing new_bfqq) as - * there may be other processes using this - * queue and holding refs to all queues below - * sync_bfqq->new_bfqq. Similarly if the merge - * already happened, we need to detach from - * bfqq now so that we cannot merge bio to a - * request from the old cgroup. - */ - bfq_put_cooperator(sync_bfqq); - bic_set_bfqq(bic, NULL, true); - bfq_release_process_ref(bfqd, sync_bfqq); - } - } + if (sync_bfqq) + bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } } @@ -978,7 +984,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); - bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -1284,7 +1289,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; - ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq); + ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 380e9bda2e57..8a8d4441519c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -377,20 +377,23 @@ static const unsigned long bfq_late_stable_merging = 600; #define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0])) #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) -struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) +struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync, + unsigned int actuator_idx) { - return bic->bfqq[is_sync]; + if (is_sync) + return bic->bfqq[1][actuator_idx]; + + return bic->bfqq[0][actuator_idx]; } static void bfq_put_stable_ref(struct bfq_queue *bfqq); -void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) +void bic_set_bfqq(struct bfq_io_cq *bic, + struct bfq_queue *bfqq, + bool is_sync, + unsigned int actuator_idx) { - struct bfq_queue *old_bfqq = bic->bfqq[is_sync]; - - /* Clear bic pointer if bfqq is detached from this bic */ - if (old_bfqq && old_bfqq->bic == bic) - old_bfqq->bic = NULL; + struct bfq_queue *old_bfqq = bic->bfqq[is_sync][actuator_idx]; /* * If bfqq != NULL, then a non-stable queue merge between @@ -405,9 +408,18 @@ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) * we cancel the stable merge if * bic->stable_merge_bfqq == bfqq. */ - bic->bfqq[is_sync] = bfqq; + struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[actuator_idx]; + + /* Clear bic pointer if bfqq is detached from this bic */ + if (old_bfqq && old_bfqq->bic == bic) + old_bfqq->bic = NULL; - if (bfqq && bic->stable_merge_bfqq == bfqq) { + if (is_sync) + bic->bfqq[1][actuator_idx] = bfqq; + else + bic->bfqq[0][actuator_idx] = bfqq; + + if (bfqq && bfqq_data->stable_merge_bfqq == bfqq) { /* * Actually, these same instructions are executed also * in bfq_setup_cooperator, in case of abort or actual @@ -416,9 +428,9 @@ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) * did so, we would nest even more complexity in this * function. */ - bfq_put_stable_ref(bic->stable_merge_bfqq); + bfq_put_stable_ref(bfqq_data->stable_merge_bfqq); - bic->stable_merge_bfqq = NULL; + bfqq_data->stable_merge_bfqq = NULL; } } @@ -678,9 +690,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL; int depth; unsigned limit = data->q->nr_requests; + unsigned int act_idx; /* Sync reads have full depth available */ if (op_is_sync(opf) && !op_is_write(opf)) { @@ -690,14 +702,21 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) limit = (limit * depth) >> bfqd->full_depth_shift; } - /* - * Does queue (or any parent entity) exceed number of requests that - * should be available to it? Heavily limit depth so that it cannot - * consume more available requests and thus starve other entities. - */ - if (bfqq && bfqq_request_over_limit(bfqq, limit)) - depth = 1; + for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { + struct bfq_queue *bfqq = + bic_to_bfqq(bic, op_is_sync(opf), act_idx); + /* + * Does queue (or any parent entity) exceed number of + * requests that should be available to it? Heavily + * limit depth so that it cannot consume more + * available requests and thus starve other entities. + */ + if (bfqq && bfqq_request_over_limit(bfqq, limit)) { + depth = 1; + break; + } + } bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); if (depth) @@ -1074,9 +1093,6 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) { u64 dur; - if (bfqd->bfq_wr_max_time > 0) - return bfqd->bfq_wr_max_time; - dur = bfqd->rate_dur_prod; do_div(dur, bfqd->peak_rate); @@ -1118,36 +1134,39 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, { unsigned int old_wr_coeff = 1; bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq); + unsigned int a_idx = bfqq->actuator_idx; + struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[a_idx]; - if (bic->saved_has_short_ttime) + if (bfqq_data->saved_has_short_ttime) bfq_mark_bfqq_has_short_ttime(bfqq); else bfq_clear_bfqq_has_short_ttime(bfqq); - if (bic->saved_IO_bound) + if (bfqq_data->saved_IO_bound) bfq_mark_bfqq_IO_bound(bfqq); else bfq_clear_bfqq_IO_bound(bfqq); - bfqq->last_serv_time_ns = bic->saved_last_serv_time_ns; - bfqq->inject_limit = bic->saved_inject_limit; - bfqq->decrease_time_jif = bic->saved_decrease_time_jif; + bfqq->last_serv_time_ns = bfqq_data->saved_last_serv_time_ns; + bfqq->inject_limit = bfqq_data->saved_inject_limit; + bfqq->decrease_time_jif = bfqq_data->saved_decrease_time_jif; - bfqq->entity.new_weight = bic->saved_weight; - bfqq->ttime = bic->saved_ttime; - bfqq->io_start_time = bic->saved_io_start_time; - bfqq->tot_idle_time = bic->saved_tot_idle_time; + bfqq->entity.new_weight = bfqq_data->saved_weight; + bfqq->ttime = bfqq_data->saved_ttime; + bfqq->io_start_time = bfqq_data->saved_io_start_time; + bfqq->tot_idle_time = bfqq_data->saved_tot_idle_time; /* * Restore weight coefficient only if low_latency is on */ if (bfqd->low_latency) { old_wr_coeff = bfqq->wr_coeff; - bfqq->wr_coeff = bic->saved_wr_coeff; + bfqq->wr_coeff = bfqq_data->saved_wr_coeff; } - bfqq->service_from_wr = bic->saved_service_from_wr; - bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; - bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; - bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; + bfqq->service_from_wr = bfqq_data->saved_service_from_wr; + bfqq->wr_start_at_switch_to_srt = + bfqq_data->saved_wr_start_at_switch_to_srt; + bfqq->last_wr_start_finish = bfqq_data->saved_last_wr_start_finish; + bfqq->wr_cur_max_time = bfqq_data->saved_wr_cur_max_time; if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || time_is_before_jiffies(bfqq->last_wr_start_finish + @@ -1766,6 +1785,33 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq, return bfqq_weight > in_serv_weight; } +/* + * Get the index of the actuator that will serve bio. + */ +static unsigned int bfq_actuator_index(struct bfq_data *bfqd, struct bio *bio) +{ + unsigned int i; + sector_t end; + + /* no search needed if one or zero ranges present */ + if (bfqd->num_actuators == 1) + return 0; + + /* bio_end_sector(bio) gives the sector after the last one */ + end = bio_end_sector(bio) - 1; + + for (i = 0; i < bfqd->num_actuators; i++) { + if (end >= bfqd->sector[i] && + end < bfqd->sector[i] + bfqd->nr_sectors[i]) + return i; + } + + WARN_ONCE(true, + "bfq_actuator_index: bio sector out of ranges: end=%llu\n", + end); + return 0; +} + static bool bfq_better_to_idle(struct bfq_queue *bfqq); static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, @@ -1785,7 +1831,9 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, arrived_in_time = ktime_get_ns() <= bfqq->ttime.last_end_request + bfqd->bfq_slice_idle * 3; - + unsigned int act_idx = bfq_actuator_index(bfqd, rq->bio); + bool bfqq_non_merged_or_stably_merged = + bfqq->bic || RQ_BIC(rq)->bfqq_data[act_idx].stably_merged; /* * bfqq deserves to be weight-raised if: @@ -1819,9 +1867,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, */ wr_or_deserves_wr = bfqd->low_latency && (bfqq->wr_coeff > 1 || - (bfq_bfqq_sync(bfqq) && - (bfqq->bic || RQ_BIC(rq)->stably_merged) && - (*interactive || soft_rt))); + (bfq_bfqq_sync(bfqq) && bfqq_non_merged_or_stably_merged && + (*interactive || soft_rt))); /* * Using the last flag, update budget and check whether bfqq @@ -2098,7 +2145,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, * We reset waker detection logic also if too much time has passed * since the first detection. If wakeups are rare, pointless idling * doesn't hurt throughput that much. The condition below makes sure - * we do not uselessly idle blocking waker in more than 1/64 cases. + * we do not uselessly idle blocking waker in more than 1/64 cases. */ if (bfqd->last_completed_rq_bfqq != bfqq->tentative_waker_bfqq || @@ -2209,9 +2256,9 @@ static void bfq_add_request(struct request *rq) * elapsed. */ if (bfqq == bfqd->in_service_queue && - (bfqd->rq_in_driver == 0 || + (bfqd->tot_rq_in_driver == 0 || (bfqq->last_serv_time_ns > 0 && - bfqd->rqs_injected && bfqd->rq_in_driver > 0)) && + bfqd->rqs_injected && bfqd->tot_rq_in_driver > 0)) && time_is_before_eq_jiffies(bfqq->decrease_time_jif + msecs_to_jiffies(10))) { bfqd->last_empty_occupied_ns = ktime_get_ns(); @@ -2235,7 +2282,7 @@ static void bfq_add_request(struct request *rq) * will be set in case injection is performed * on bfqq before rq is completed). */ - if (bfqd->rq_in_driver == 0) + if (bfqd->tot_rq_in_driver == 0) bfqd->rqs_injected = false; } } @@ -2418,7 +2465,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, */ bfq_bic_update_cgroup(bic, bio); - bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf), + bfq_actuator_index(bfqd, bio)); } else { bfqd->bio_bfqq = NULL; } @@ -2584,24 +2632,29 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) { - int i, j; + int i, j, k; - for (i = 0; i < 2; i++) - for (j = 0; j < IOPRIO_NR_LEVELS; j++) - if (bfqg->async_bfqq[i][j]) - bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); - if (bfqg->async_idle_bfqq) - bfq_bfqq_end_wr(bfqg->async_idle_bfqq); + for (k = 0; k < bfqd->num_actuators; k++) { + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_NR_LEVELS; j++) + if (bfqg->async_bfqq[i][j][k]) + bfq_bfqq_end_wr(bfqg->async_bfqq[i][j][k]); + if (bfqg->async_idle_bfqq[k]) + bfq_bfqq_end_wr(bfqg->async_idle_bfqq[k]); + } } static void bfq_end_wr(struct bfq_data *bfqd) { struct bfq_queue *bfqq; + int i; spin_lock_irq(&bfqd->lock); - list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) - bfq_bfqq_end_wr(bfqq); + for (i = 0; i < bfqd->num_actuators; i++) { + list_for_each_entry(bfqq, &bfqd->active_list[i], bfqq_list) + bfq_bfqq_end_wr(bfqq); + } list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) bfq_bfqq_end_wr(bfqq); bfq_end_wr_async(bfqd); @@ -2794,6 +2847,35 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, struct bfq_queue *bfqq); +static struct bfq_queue * +bfq_setup_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct bfq_queue *stable_merge_bfqq, + struct bfq_iocq_bfqq_data *bfqq_data) +{ + int proc_ref = min(bfqq_process_refs(bfqq), + bfqq_process_refs(stable_merge_bfqq)); + struct bfq_queue *new_bfqq; + + if (idling_boosts_thr_without_issues(bfqd, bfqq) || + proc_ref == 0) + return NULL; + + /* next function will take at least one ref */ + new_bfqq = bfq_setup_merge(bfqq, stable_merge_bfqq); + + if (new_bfqq) { + bfqq_data->stably_merged = true; + if (new_bfqq->bic) { + unsigned int new_a_idx = new_bfqq->actuator_idx; + struct bfq_iocq_bfqq_data *new_bfqq_data = + &new_bfqq->bic->bfqq_data[new_a_idx]; + + new_bfqq_data->stably_merged = true; + } + } + return new_bfqq; +} + /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return @@ -2819,6 +2901,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, void *io_struct, bool request, struct bfq_io_cq *bic) { struct bfq_queue *in_service_bfqq, *new_bfqq; + unsigned int a_idx = bfqq->actuator_idx; + struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[a_idx]; /* if a merge has already been setup, then proceed with that first */ if (bfqq->new_bfqq) @@ -2840,37 +2924,23 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, * stable merging) also if bic is associated with a * sync queue, but this bfqq is async */ - if (bfq_bfqq_sync(bfqq) && bic->stable_merge_bfqq && + if (bfq_bfqq_sync(bfqq) && bfqq_data->stable_merge_bfqq && !bfq_bfqq_just_created(bfqq) && time_is_before_jiffies(bfqq->split_time + msecs_to_jiffies(bfq_late_stable_merging)) && time_is_before_jiffies(bfqq->creation_time + msecs_to_jiffies(bfq_late_stable_merging))) { struct bfq_queue *stable_merge_bfqq = - bic->stable_merge_bfqq; - int proc_ref = min(bfqq_process_refs(bfqq), - bfqq_process_refs(stable_merge_bfqq)); + bfqq_data->stable_merge_bfqq; /* deschedule stable merge, because done or aborted here */ bfq_put_stable_ref(stable_merge_bfqq); - bic->stable_merge_bfqq = NULL; - - if (!idling_boosts_thr_without_issues(bfqd, bfqq) && - proc_ref > 0) { - /* next function will take at least one ref */ - struct bfq_queue *new_bfqq = - bfq_setup_merge(bfqq, stable_merge_bfqq); - - if (new_bfqq) { - bic->stably_merged = true; - if (new_bfqq->bic) - new_bfqq->bic->stably_merged = - true; - } - return new_bfqq; - } else - return NULL; + bfqq_data->stable_merge_bfqq = NULL; + + return bfq_setup_stable_merge(bfqd, bfqq, + stable_merge_bfqq, + bfqq_data); } } @@ -2965,6 +3035,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, static void bfq_bfqq_save_state(struct bfq_queue *bfqq) { struct bfq_io_cq *bic = bfqq->bic; + unsigned int a_idx = bfqq->actuator_idx; + struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[a_idx]; /* * If !bfqq->bic, the queue is already shared or its requests @@ -2974,18 +3046,21 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) if (!bic) return; - bic->saved_last_serv_time_ns = bfqq->last_serv_time_ns; - bic->saved_inject_limit = bfqq->inject_limit; - bic->saved_decrease_time_jif = bfqq->decrease_time_jif; - - bic->saved_weight = bfqq->entity.orig_weight; - bic->saved_ttime = bfqq->ttime; - bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); - bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); - bic->saved_io_start_time = bfqq->io_start_time; - bic->saved_tot_idle_time = bfqq->tot_idle_time; - bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); - bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); + bfqq_data->saved_last_serv_time_ns = bfqq->last_serv_time_ns; + bfqq_data->saved_inject_limit = bfqq->inject_limit; + bfqq_data->saved_decrease_time_jif = bfqq->decrease_time_jif; + + bfqq_data->saved_weight = bfqq->entity.orig_weight; + bfqq_data->saved_ttime = bfqq->ttime; + bfqq_data->saved_has_short_ttime = + bfq_bfqq_has_short_ttime(bfqq); + bfqq_data->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); + bfqq_data->saved_io_start_time = bfqq->io_start_time; + bfqq_data->saved_tot_idle_time = bfqq->tot_idle_time; + bfqq_data->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); + bfqq_data->was_in_burst_list = + !hlist_unhashed(&bfqq->burst_list_node); + if (unlikely(bfq_bfqq_just_created(bfqq) && !bfq_bfqq_in_large_burst(bfqq) && bfqq->bfqd->low_latency)) { @@ -2998,17 +3073,21 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) * to bfqq, so that to avoid that bfqq unjustly fails * to enjoy weight raising if split soon. */ - bic->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff; - bic->saved_wr_start_at_switch_to_srt = bfq_smallest_from_now(); - bic->saved_wr_cur_max_time = bfq_wr_duration(bfqq->bfqd); - bic->saved_last_wr_start_finish = jiffies; + bfqq_data->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff; + bfqq_data->saved_wr_start_at_switch_to_srt = + bfq_smallest_from_now(); + bfqq_data->saved_wr_cur_max_time = + bfq_wr_duration(bfqq->bfqd); + bfqq_data->saved_last_wr_start_finish = jiffies; } else { - bic->saved_wr_coeff = bfqq->wr_coeff; - bic->saved_wr_start_at_switch_to_srt = + bfqq_data->saved_wr_coeff = bfqq->wr_coeff; + bfqq_data->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; - bic->saved_service_from_wr = bfqq->service_from_wr; - bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; - bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; + bfqq_data->saved_service_from_wr = + bfqq->service_from_wr; + bfqq_data->saved_last_wr_start_finish = + bfqq->last_wr_start_finish; + bfqq_data->saved_wr_cur_max_time = bfqq->wr_cur_max_time; } } @@ -3114,7 +3193,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, /* * Merge queues (that is, let bic redirect its requests to new_bfqq) */ - bic_set_bfqq(bic, new_bfqq, true); + bic_set_bfqq(bic, new_bfqq, true, bfqq->actuator_idx); bfq_mark_bfqq_coop(new_bfqq); /* * new_bfqq now belongs to at least two bics (it is a shared queue): @@ -3532,13 +3611,13 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) * - start a new observation interval with this dispatch */ if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && - bfqd->rq_in_driver == 0) + bfqd->tot_rq_in_driver == 0) goto update_rate_and_reset; /* Update sampling information */ bfqd->peak_rate_samples++; - if ((bfqd->rq_in_driver > 0 || + if ((bfqd->tot_rq_in_driver > 0 || now_ns - bfqd->last_completion < BFQ_MIN_TT) && !BFQ_RQ_SEEKY(bfqd, bfqd->last_position, rq)) bfqd->sequential_samples++; @@ -3803,10 +3882,8 @@ static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, return false; return (bfqq->wr_coeff > 1 && - (bfqd->wr_busy_queues < - tot_busy_queues || - bfqd->rq_in_driver >= |
