diff options
64 files changed, 1711 insertions, 1301 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f0310d42374d..7081f93bdb9a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3813,10 +3813,9 @@ F: Documentation/filesystems/befs.rst F: fs/befs/ BFQ I/O SCHEDULER -M: Paolo Valente <paolo.valente@unimore.it> -M: Jens Axboe <axboe@kernel.dk> +M: Yu Kuai <yukuai3@huawei.com> L: linux-block@vger.kernel.org -S: Maintained +S: Odd Fixes F: Documentation/block/bfq-iosched.rst F: block/bfq-* diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index b758693697c0..e831aedb4643 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -679,12 +679,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(old_parent); - if (entity->parent && - entity->parent->last_bfqq_created == bfqq) - entity->parent->last_bfqq_created = NULL; - else if (bfqd->last_bfqq_created == bfqq) - bfqd->last_bfqq_created = NULL; - + bfq_reassign_last_bfqq(bfqq, NULL); entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; /* pin down bfqg and its associated blkg */ @@ -741,7 +736,6 @@ static void bfq_sync_bfqq_move(struct bfq_data *bfqd, */ bfq_put_cooperator(sync_bfqq); bic_set_bfqq(bic, NULL, true, act_idx); - bfq_release_process_ref(bfqd, sync_bfqq); } } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 36a4998c4b37..0747d9d0e48c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2911,8 +2911,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[a_idx]; /* if a merge has already been setup, then proceed with that first */ - if (bfqq->new_bfqq) - return bfqq->new_bfqq; + new_bfqq = bfqq->new_bfqq; + if (new_bfqq) { + while (new_bfqq->new_bfqq) + new_bfqq = new_bfqq->new_bfqq; + return new_bfqq; + } /* * Check delayed stable merge for rotational or non-queueing @@ -3093,8 +3097,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } -static void -bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq, struct bfq_queue *new_bfqq) +void bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq, + struct bfq_queue *new_bfqq) { if (cur_bfqq->entity.parent && cur_bfqq->entity.parent->last_bfqq_created == cur_bfqq) @@ -3125,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void -bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_queue *bfqq) { + struct bfq_queue *new_bfqq = bfqq->new_bfqq; + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", (unsigned long)new_bfqq->pid); /* Save weight raising and idle window of the merged queues */ @@ -3222,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, bfq_reassign_last_bfqq(bfqq, new_bfqq); bfq_release_process_ref(bfqd, bfqq); + + return new_bfqq; } static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, @@ -3257,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, * fulfilled, i.e., bic can be redirected to new_bfqq * and bfqq can be put. */ - bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, - new_bfqq); - /* - * If we get here, bio will be queued into new_queue, - * so use new_bfqq to decide whether bio and rq can be - * merged. - */ - bfqq = new_bfqq; + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq); /* * Change also bqfd->bio_bfqq, as @@ -5432,6 +5434,8 @@ void bfq_put_cooperator(struct bfq_queue *bfqq) bfq_put_queue(__bfqq); __bfqq = next; } + + bfq_release_process_ref(bfqq->bfqd, bfqq); } static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -5444,8 +5448,6 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); bfq_put_cooperator(bfqq); - - bfq_release_process_ref(bfqd, bfqq); } static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync, @@ -5701,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq, * state before killing it. */ bfqq->bic = bic; - bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); - - return new_bfqq; + return bfq_merge_bfqqs(bfqd, bic, bfqq); } /* @@ -6158,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bool waiting, idle_timer_disabled = false; if (new_bfqq) { + struct bfq_queue *old_bfqq = bfqq; /* * Release the request's reference to the old bfqq * and make sure one is taken to the shared queue. @@ -6174,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) * new_bfqq. */ if (bic_to_bfqq(RQ_BIC(rq), true, - bfq_actuator_index(bfqd, rq->bio)) == bfqq) - bfq_merge_bfqqs(bfqd, RQ_BIC(rq), - bfqq, new_bfqq); + bfq_actuator_index(bfqd, rq->bio)) == bfqq) { + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq); + } - bfq_clear_bfqq_just_created(bfqq); + bfq_clear_bfqq_just_created(old_bfqq); /* * rq is about to be enqueued into new_bfqq, * release rq reference on bfqq */ - bfq_put_queue(bfqq); + bfq_put_queue(old_bfqq); rq->elv.priv[1] = new_bfqq; - bfqq = new_bfqq; } bfq_update_io_thinktime(bfqd, bfqq); @@ -6723,7 +6724,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); - if (bfqq_process_refs(bfqq) == 1) { + if (bfqq_process_refs(bfqq) == 1 && !bfqq->new_bfqq) { bfqq->pid = current->pid; bfq_clear_bfqq_coop(bfqq); bfq_clear_bfqq_split_coop(bfqq); @@ -6733,16 +6734,13 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx); bfq_put_cooperator(bfqq); - - bfq_release_process_ref(bfqq->bfqd, bfqq); return NULL; } -static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct bio *bio, - bool split, bool is_sync, - bool *new_queue) +static struct bfq_queue * +__bfq_get_bfqq_handle_split(struct bfq_data *bfqd, struct bfq_io_cq *bic, + struct bio *bio, bool split, bool is_sync, + bool *new_queue) { unsigned int act_idx = bfq_actuator_index(bfqd, bio); struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, act_idx); @@ -6821,6 +6819,84 @@ static void bfq_prepare_request(struct request *rq) rq->elv.priv[0] = rq->elv.priv[1] = NULL; } +static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq) +{ + struct bfq_queue *new_bfqq = bfqq->new_bfqq; + struct bfq_queue *waker_bfqq = bfqq->waker_bfqq; + + if (!waker_bfqq) + return NULL; + + while (new_bfqq) { + if (new_bfqq == waker_bfqq) { + /* + * If waker_bfqq is in the merge chain, and current + * is the only procress. + */ + if (bfqq_process_refs(waker_bfqq) == 1) + return NULL; + break; + } + + new_bfqq = new_bfqq->new_bfqq; + } + + return waker_bfqq; +} + +static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bio *bio, + unsigned int idx, + bool is_sync) +{ + struct bfq_queue *waker_bfqq; + struct bfq_queue *bfqq; + bool new_queue = false; + + bfqq = __bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync, + &new_queue); + if (unlikely(new_queue)) + return bfqq; + + /* If the queue was seeky for too long, break it apart. */ + if (!bfq_bfqq_coop(bfqq) || !bfq_bfqq_split_coop(bfqq) || + bic->bfqq_data[idx].stably_merged) + return bfqq; + + waker_bfqq = bfq_waker_bfqq(bfqq); + + /* Update bic before losing reference to bfqq */ + if (bfq_bfqq_in_large_burst(bfqq)) + bic->bfqq_data[idx].saved_in_large_burst = true; + + bfqq = bfq_split_bfqq(bic, bfqq); + if (bfqq) { + bfq_bfqq_resume_state(bfqq, bfqd, bic, true); + return bfqq; + } + + bfqq = __bfq_get_bfqq_handle_split(bfqd, bic, bio, true, is_sync, NULL); + if (unlikely(bfqq == &bfqd->oom_bfqq)) + return bfqq; + + bfq_bfqq_resume_state(bfqq, bfqd, bic, false); + bfqq->waker_bfqq = waker_bfqq; + bfqq->tentative_waker_bfqq = NULL; + + /* + * If the waker queue disappears, then new_bfqq->waker_bfqq must be + * reset. So insert new_bfqq into the + * woken_list of the waker. See + * bfq_check_waker for details. + */ + if (waker_bfqq) + hlist_add_head(&bfqq->woken_list_node, + &bfqq->waker_bfqq->woken_list); + + return bfqq; +} + /* * If needed, init rq, allocate bfq data structures associated with * rq, and increment reference counters in the destination bfq_queue @@ -6852,8 +6928,6 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) struct bfq_io_cq *bic; const int is_sync = rq_is_sync(rq); struct bfq_queue *bfqq; - bool new_queue = false; - bool bfqq_already_existing = false, split = false; unsigned int a_idx = bfq_actuator_index(bfqd, bio); if (unlikely(!rq->elv.icq)) @@ -6870,54 +6944,9 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) return RQ_BFQQ(rq); bic = icq_to_bic(rq->elv.icq); - bfq_check_ioprio_change(bic, bio); - bfq_bic_update_cgroup(bic, bio); - - bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync, - &new_queue); - - if (likely(!new_queue)) { - /* If the queue was seeky for too long, break it apart. */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) && - !bic->bfqq_data[a_idx].stably_merged) { - struct bfq_queue *old_bfqq = bfqq; - - /* Update bic before losing reference to bfqq */ - if (bfq_bfqq_in_large_burst(bfqq)) - bic->bfqq_data[a_idx].saved_in_large_burst = - true; - - bfqq = bfq_split_bfqq(bic, bfqq); - split = true; - - if (!bfqq) { - bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, - true, is_sync, - NULL); - if (unlikely(bfqq == &bfqd->oom_bfqq)) - bfqq_already_existing = true; - } else - bfqq_already_existing = true; - - if (!bfqq_already_existing) { - bfqq->waker_bfqq = old_bfqq->waker_bfqq; - bfqq->tentative_waker_bfqq = NULL; - - /* - * If the waker queue disappears, then - * new_bfqq->waker_bfqq must be - * reset. So insert new_bfqq into the - * woken_list of the waker. See - * bfq_check_waker for details. - */ - if (bfqq->waker_bfqq) - hlist_add_head(&bfqq->woken_list_node, - &bfqq->waker_bfqq->woken_list); - } - } - } + bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, a_idx, is_sync); bfqq_request_allocated(bfqq); bfqq->ref++; @@ -6934,18 +6963,9 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) * addition, if the queue has also just been split, we have to * resume its state. */ - if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { + if (likely(bfqq != &bfqd->oom_bfqq) && !bfqq->new_bfqq && + bfqq_process_refs(bfqq) == 1) bfqq->bic = bic; - if (split) { - /* - * The queue has just been split from a shared - * queue: restore the idle window and the - * possible weight raising period. - */ - bfq_bfqq_resume_state(bfqq, bfqd, bic, - bfqq_already_existing); - } - } /* * Consider bfqq as possibly belonging to a burst of newly diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 08ddf2cfae5b..687a3a7ba784 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -1156,6 +1156,8 @@ void bfq_del_bfqq_busy(struct bfq_queue *bfqq, bool expiration); void bfq_add_bfqq_busy(struct bfq_queue *bfqq); void bfq_add_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq); void bfq_del_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq); +void bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq, + struct bfq_queue *new_bfqq); /* --------------- end of interface of B-WF2Q+ ---------------- */ @@ -1183,11 +1185,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq); "%s " fmt, pid_str, ##args); \ } while (0) -#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ - blk_add_cgroup_trace_msg((bfqd)->queue, \ - &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \ -} while (0) - #else /* CONFIG_BFQ_GROUP_IOSCHED */ #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ @@ -1197,7 +1194,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq); bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \ blk_add_trace_msg((bfqd)->queue, "%s " fmt, pid_str, ##args); \ } while (0) -#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* CONFIG_BFQ_GROUP_IOSCHED */ diff --git a/block/bio.c b/block/bio.c index c4053d49679a..ac4d77c88932 100644 --- a/block/bio.c +++ b/block/bio.c @@ -931,7 +931,8 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) return false; - *same_page = ((vec_end_addr & PAGE_MASK) == page_addr); + *same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) & + PAGE_MASK)); if (!*same_page) { if (IS_ENABLED(CONFIG_KMSAN)) return false; @@ -1017,6 +1018,29 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio, } /** + * bio_add_hw_folio - attempt to add a folio to a bio with hw constraints + * @q: the target queue + * @bio: destination bio + * @folio: folio to add + * @len: vec entry length + * @offset: vec entry offset in the folio + * @max_sectors: maximum number of sectors that can be added + * @same_page: return if the segment has been merged inside the same folio + * + * Add a folio to a bio while respecting the hardware max_sectors, max_segment + * and gap limitations. + */ +int bio_add_hw_folio(struct request_queue *q, struct bio *bio, + struct folio *folio, size_t len, size_t offset, + unsigned int max_sectors, bool *same_page) +{ + if (len > UINT_MAX || offset > UINT_MAX) + return 0; + return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset, + max_sectors, same_page); +} + +/** * bio_add_pc_page - attempt to add page to passthrough bio * @q: the target queue * @bio: destination bio @@ -1166,7 +1190,6 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) struct folio_iter fi; bio_for_each_folio_all(fi, bio) { - struct page *page; size_t nr_pages; if (mark_dirty) { @@ -1174,12 +1197,9 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) folio_mark_dirty(fi.folio); folio_unlock(fi.folio); } - page = folio_page(fi.folio, fi.offset / PAGE_SIZE); nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE - fi.offset / PAGE_SIZE + 1; - do { - bio_release_page(bio, page++); - } while (--nr_pages != 0); + unpin_user_folio(fi.folio, nr_pages); } } EXPORT_SYMBOL_GPL(__bio_release_pages); @@ -1204,8 +1224,8 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) bio_set_flag(bio, BIO_CLONED); } -static int bio_iov_add_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset) +static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len, + size_t offset) { bool same_page = false; @@ -1214,30 +1234,61 @@ static int bio_iov_add_page(struct bio *bio, struct page *page, if (bio->bi_vcnt > 0 && bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], - page, len, offset, &same_page)) { + folio_page(folio, 0), len, offset, + &same_page)) { bio->bi_iter.bi_size += len; - if (same_page) - bio_release_page(bio, page); + if (same_page && bio_flagged(bio, BIO_PAGE_PINNED)) + unpin_user_folio(folio, 1); return 0; } - __bio_add_page(bio, page, len, offset); + bio_add_folio_nofail(bio, folio, len, offset); return 0; } -static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset) +static int bio_iov_add_zone_append_folio(struct bio *bio, struct folio *folio, + size_t len, size_t offset) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); bool same_page = false; - if (bio_add_hw_page(q, bio, page, len, offset, + if (bio_add_hw_folio(q, bio, folio, len, offset, queue_max_zone_append_sectors(q), &same_page) != len) return -EINVAL; - if (same_page) - bio_release_page(bio, page); + if (same_page && bio_flagged(bio, BIO_PAGE_PINNED)) + unpin_user_folio(folio, 1); return 0; } +static unsigned int get_contig_folio_len(unsigned int *num_pages, + struct page **pages, unsigned int i, + struct folio *folio, size_t left, + size_t offset) +{ + size_t bytes = left; + size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, bytes); + unsigned int j; + + /* + * We might COW a single page in the middle of + * a large folio, so we have to check that all + * pages belong to the same folio. + */ + bytes -= contig_sz; + for (j = i + 1; j < i + *num_pages; j++) { + size_t next = min_t(size_t, PAGE_SIZE, bytes); + + if (page_folio(pages[j]) != folio || + pages[j] != pages[j - 1] + 1) { + break; + } + contig_sz += next; + bytes -= next; + } + *num_pages = j - i; + + return contig_sz; +} + #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) /** @@ -1257,9 +1308,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - ssize_t size, left; - unsigned len, i = 0; - size_t offset; + ssize_t size; + unsigned int num_pages, i = 0; + size_t offset, folio_offset, left, len; int ret = 0; /* @@ -1299,17 +1350,28 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) goto out; } - for (left = size, i = 0; left > 0; left -= len, i++) { + for (left = size, i = 0; left > 0; left -= len, i += num_pages) { struct page *page = pages[i]; + struct folio *folio = page_folio(page); + + folio_offset = ((size_t)folio_page_idx(folio, page) << + PAGE_SHIFT) + offset; + + len = min(folio_size(folio) - folio_offset, left); + + num_page |
