diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 13:46:35 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 13:46:35 -0700 |
| commit | c013d0af81f60cc7dbe357c4e2a925fb6738dbfe (patch) | |
| tree | 171dfdf928d0450a3fa98a58b2297d857804bb35 /block | |
| parent | 42df1cbf6a4726934cc5dac12bf263aa73c49fa3 (diff) | |
| parent | 8d9fdb6011b4d413271eba3a62e10f89efecc419 (diff) | |
| download | linux-c013d0af81f60cc7dbe357c4e2a925fb6738dbfe.tar.gz linux-c013d0af81f60cc7dbe357c4e2a925fb6738dbfe.tar.bz2 linux-c013d0af81f60cc7dbe357c4e2a925fb6738dbfe.zip | |
Merge tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
- Improve the type checking of request flags (Bart)
- Ensure queue mapping for a single queues always picks the right queue
(Bart)
- Sanitize the io priority handling (Jan)
- rq-qos race fix (Jinke)
- Reserved tags handling improvements (John)
- Separate memory alignment from file/disk offset aligment for O_DIRECT
(Keith)
- Add new ublk driver, userspace block driver using io_uring for
communication with the userspace backend (Ming)
- Use try_cmpxchg() to cleanup the code in various spots (Uros)
- Finally remove bdevname() (Christoph)
- Clean up the zoned device handling (Christoph)
- Clean up independent access range support (Christoph)
- Clean up and improve block sysfs handling (Christoph)
- Clean up and improve teardown of block devices.
This turns the usual two step process into something that is simpler
to implement and handle in block drivers (Christoph)
- Clean up chunk size handling (Christoph)
- Misc cleanups and fixes (Bart, Bo, Dan, GuoYong, Jason, Keith, Liu,
Ming, Sebastian, Yang, Ying)
* tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block: (178 commits)
ublk_drv: fix double shift bug
ublk_drv: make sure that correct flags(features) returned to userspace
ublk_drv: fix error handling of ublk_add_dev
ublk_drv: fix lockdep warning
block: remove __blk_get_queue
block: call blk_mq_exit_queue from disk_release for never added disks
blk-mq: fix error handling in __blk_mq_alloc_disk
ublk: defer disk allocation
ublk: rewrite ublk_ctrl_get_queue_affinity to not rely on hctx->cpumask
ublk: fold __ublk_create_dev into ublk_ctrl_add_dev
ublk: cleanup ublk_ctrl_uring_cmd
ublk: simplify ublk_ch_open and ublk_ch_release
ublk: remove the empty open and release block device operations
ublk: remove UBLK_IO_F_PREFLUSH
ublk: add a MAINTAINERS entry
block: don't allow the same type rq_qos add more than once
mmc: fix disk/queue leak in case of adding disk failure
ublk_drv: fix an IS_ERR() vs NULL check
ublk: remove UBLK_IO_F_INTEGRITY
ublk_drv: remove unneeded semicolon
...
Diffstat (limited to 'block')
45 files changed, 678 insertions, 688 deletions
diff --git a/block/Kconfig b/block/Kconfig index 50b17e260fa2..444c5ab3b67e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -147,7 +147,6 @@ config BLK_CGROUP_FC_APPID config BLK_CGROUP_IOCOST bool "Enable support for cost model based cgroup IO controller" depends on BLK_CGROUP - select BLK_RQ_IO_DATA_LEN select BLK_RQ_ALLOC_TIME help Enabling this option enables the .weight interface for cost diff --git a/block/bdev.c b/block/bdev.c index 5fe06c1f2def..ce05175e71ce 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -54,12 +54,10 @@ static void bdev_write_inode(struct block_device *bdev) while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); ret = write_inode_now(inode, true); - if (ret) { - char name[BDEVNAME_SIZE]; - pr_warn_ratelimited("VFS: Dirty inode writeback failed " - "for block device %s (err=%d).\n", - bdevname(bdev, name), ret); - } + if (ret) + pr_warn_ratelimited( + "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n", + bdev, ret); spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 09574af83566..30b15a9a47c4 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -220,46 +220,46 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) + blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.queued, op, 1); + blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.queued, op, -1); + blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.merged, op, 1); + blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op) + u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = ktime_get_ns(); if (now > io_start_time_ns) - blkg_rwstat_add(&stats->service_time, op, + blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) - blkg_rwstat_add(&stats->wait_time, op, + blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) { } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } + blk_opf_t opf) { } +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op) { } + u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } @@ -706,10 +706,10 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, } /** - * __bfq_bic_change_cgroup - move @bic to @cgroup. + * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. - * @blkcg: the blk-cgroup to move to. + * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see @@ -863,6 +863,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. + * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, @@ -892,6 +893,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. + * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, @@ -1471,8 +1473,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) return bfqq->bfqd->root_group; } -void bfqg_and_blkg_get(struct bfq_group *bfqg) {} - void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e6d7e6b01a05..c740b41fe0a4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -668,19 +668,19 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) * significantly affect service guarantees coming from the BFQ scheduling * algorithm. */ -static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(op)) : NULL; + struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL; int depth; unsigned limit = data->q->nr_requests; /* Sync reads have full depth available */ - if (op_is_sync(op) && !op_is_write(op)) { + if (op_is_sync(opf) && !op_is_write(opf)) { depth = 0; } else { - depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; + depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; limit = (limit * depth) >> bfqd->full_depth_shift; } @@ -693,7 +693,7 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) depth = 1; bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(op), depth); + __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); if (depth) data->shallow_depth = depth; } @@ -6104,7 +6104,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) static void bfq_update_insert_stats(struct request_queue *q, struct bfq_queue *bfqq, bool idle_timer_disabled, - unsigned int cmd_flags) + blk_opf_t cmd_flags) { if (!bfqq) return; @@ -6129,7 +6129,7 @@ static void bfq_update_insert_stats(struct request_queue *q, static inline void bfq_update_insert_stats(struct request_queue *q, struct bfq_queue *bfqq, bool idle_timer_disabled, - unsigned int cmd_flags) {} + blk_opf_t cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ static struct bfq_queue *bfq_init_rq(struct request *rq); @@ -6141,7 +6141,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq; bool idle_timer_disabled = false; - unsigned int cmd_flags; + blk_opf_t cmd_flags; LIST_HEAD(free); #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ca8177d7bf7c..ad8e513d7e87 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -994,11 +994,11 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq); void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op); -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op); -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op); + blk_opf_t opf); +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf); +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf); void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op); + u64 io_start_time_ns, blk_opf_t opf); void bfqg_stats_update_dequeue(struct bfq_group *bfqg); void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); void bfqg_stats_update_idle_time(struct bfq_group *bfqg); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index f8eb340381cf..983413cdefad 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1360,6 +1360,8 @@ left: /** * __bfq_lookup_next_entity - return the first eligible entity in @st. * @st: the service tree. + * @in_service: whether or not there is an in-service entity for the sched_data + * this active tree belongs to. * * If there is no in-service entity for the sched_data st belongs to, * then return the entity that will be set in service if: @@ -1472,9 +1474,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, break; } - if (!entity) - return NULL; - return entity; } diff --git a/block/bio.c b/block/bio.c index 51c99f2c5c90..6f9f883f9a65 100644 --- a/block/bio.c +++ b/block/bio.c @@ -239,7 +239,7 @@ static void bio_free(struct bio *bio) * when IO has completed, or when the bio is released. */ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf) + unsigned short max_vecs, blk_opf_t opf) { bio->bi_next = NULL; bio->bi_bdev = bdev; @@ -292,7 +292,7 @@ EXPORT_SYMBOL(bio_init); * preserved are the ones that are initialized by bio_alloc_bioset(). See * comment in struct bio. */ -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf) +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf) { bio_uninit(bio); memset(bio, 0, BIO_RESET_BYTES); @@ -341,7 +341,7 @@ void bio_chain(struct bio *bio, struct bio *parent) EXPORT_SYMBOL(bio_chain); struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp) + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp) { struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp); @@ -409,7 +409,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs) } static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp, + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp, struct bio_set *bs) { struct bio_alloc_cache *cache; @@ -468,7 +468,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, * Returns: Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs) { gfp_t saved_gfp = gfp_mask; @@ -1033,7 +1033,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page, if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND)) return 0; - if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) + if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev))) return 0; return bio_add_hw_page(q, bio, page, len, offset, @@ -1159,6 +1159,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off) put_page(pages[i]); } +static int bio_iov_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + bool same_page = false; + + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { + if (WARN_ON_ONCE(bio_full(bio, len))) + return -EINVAL; + __bio_add_page(bio, page, len, offset); + return 0; + } + + if (same_page) + put_page(page); + return 0; +} + +static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + bool same_page = false; + + if (bio_add_hw_page(q, bio, page, len, offset, + queue_max_zone_append_sectors(q), &same_page) != len) + return -EINVAL; + if (same_page) + put_page(page); + return 0; +} + #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) /** @@ -1177,7 +1208,6 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - bool same_page = false; ssize_t size, left; unsigned len, i; size_t offset; @@ -1186,82 +1216,43 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * Move page array up in the allocated memory for the bio vecs as far as * possible so that we can start filling biovecs from the beginning * without overwriting the temporary page array. - */ + */ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); - if (unlikely(size <= 0)) - return size ? size : -EFAULT; - - for (left = size, i = 0; left > 0; left -= len, i++) { - struct page *page = pages[i]; - - len = min_t(size_t, PAGE_SIZE - offset, left); - - if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { - if (same_page) - put_page(page); - } else { - if (WARN_ON_ONCE(bio_full(bio, len))) { - bio_put_pages(pages + i, left, offset); - return -EINVAL; - } - __bio_add_page(bio, page, len, offset); - } - offset = 0; - } - - iov_iter_advance(iter, size); - return 0; -} - -static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) -{ - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; - unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - unsigned int max_append_sectors = queue_max_zone_append_sectors(q); - struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; - struct page **pages = (struct page **)bv; - ssize_t size, left; - unsigned len, i; - size_t offset; - int ret = 0; - - if (WARN_ON_ONCE(!max_append_sectors)) - return 0; - /* - * Move page array up in the allocated memory for the bio vecs as far as - * possible so that we can start filling biovecs from the beginning - * without overwriting the temporary page array. + * Each segment in the iov is required to be a block size multiple. + * However, we may not be able to get the entire segment if it spans + * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the + * result to ensure the bio's total size is correct. The remainder of + * the iov data will be picked up in the next bio iteration. */ - BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); - pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (size > 0) + size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); if (unlikely(size <= 0)) return size ? size : -EFAULT; for (left = size, i = 0; left > 0; left -= len, i++) { struct page *page = pages[i]; - bool same_page = false; + int ret; len = min_t(size_t, PAGE_SIZE - offset, left); - if (bio_add_hw_page(q, bio, page, len, offset, - max_append_sectors, &same_page) != len) { + if (bio_op(bio) == REQ_OP_ZONE_APPEND) + ret = bio_iov_add_zone_append_page(bio, page, len, + offset); + else + ret = bio_iov_add_page(bio, page, len, offset); + + if (ret) { bio_put_pages(pages + i, left, offset); - ret = -EINVAL; - break; + return ret; } - if (same_page) - put_page(page); offset = 0; } - iov_iter_advance(iter, size - left); - return ret; + iov_iter_advance(iter, size); + return 0; } /** @@ -1298,10 +1289,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) } do { - if (bio_op(bio) == REQ_OP_ZONE_APPEND) - ret = __bio_iov_append_get_pages(bio, iter); - else - ret = __bio_iov_iter_get_pages(bio, iter); + ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); /* don't account direct I/O as memory stall */ diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h index 9f2723b34b75..022527b0b043 100644 --- a/block/blk-cgroup-rwstat.h +++ b/block/blk-cgroup-rwstat.h @@ -59,20 +59,20 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, * caller is responsible for synchronizing calls to this function. */ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - unsigned int op, uint64_t val) + blk_opf_t opf, uint64_t val) { struct percpu_counter *cnt; - if (op_is_discard(op)) + if (op_is_discard(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; - else if (op_is_write(op)) + else if (op_is_write(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); - if (op_is_sync(op)) + if (op_is_sync(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 764e740b0c0f..869af9d72bcf 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -846,6 +846,21 @@ static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src) } } +static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, + struct blkg_iostat *last) +{ + struct blkg_iostat delta; + unsigned long flags; + + /* propagate percpu delta to global */ + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&delta, cur); + blkg_iostat_sub(&delta, last); + blkg_iostat_add(&blkg->iostat.cur, &delta); + blkg_iostat_add(last, &delta); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); +} + static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct blkcg *blkcg = css_to_blkcg(css); @@ -860,8 +875,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { struct blkcg_gq *parent = blkg->parent; struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); - struct blkg_iostat cur, delta; - unsigned long flags; + struct blkg_iostat cur; unsigned int seq; /* fetch the current per-cpu values */ @@ -870,23 +884,12 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) blkg_iostat_set(&cur, &bisc->cur); } while (u64_stats_fetch_retry(&bisc->sync, seq)); - /* propagate percpu delta to global */ - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); - blkg_iostat_set(&delta, &cur); - blkg_iostat_sub(&delta, &bisc->last); - blkg_iostat_add(&blkg->iostat.cur, &delta); - blkg_iostat_add(&bisc->last, &delta); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + blkcg_iostat_update(blkg, &cur, &bisc->last); /* propagate global delta to parent (unless that's root) */ - if (parent && parent->parent) { - flags = u64_stats_update_begin_irqsave(&parent->iostat.sync); - blkg_iostat_set(&delta, &blkg->iostat.cur); - blkg_iostat_sub(&delta, &blkg->iostat.last); - blkg_iostat_add(&parent->iostat.cur, &delta); - blkg_iostat_add(&blkg->iostat.last, &delta); - u64_stats_update_end_irqrestore(&parent->iostat.sync, flags); - } + if (parent && parent->parent) + blkcg_iostat_update(parent, &blkg->iostat.cur, + &blkg->iostat.last); } rcu_read_unlock(); @@ -1299,6 +1302,7 @@ int blkcg_init_queue(struct request_queue *q) ret = blk_iolatency_init(q); if (ret) { blk_throtl_exit(q); + blk_ioprio_exit(q); goto err_destroy_all; } @@ -1529,6 +1533,18 @@ void blkcg_deactivate_policy(struct request_queue *q, } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); +static void blkcg_free_all_cpd(struct blkcg_policy *pol) +{ + struct blkcg *blkcg; + + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + if (blkcg->cpd[pol->plid]) { + pol->cpd_free_fn(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; + } + } +} + /** * blkcg_policy_register - register a blkcg policy * @pol: blkcg policy to register @@ -1593,14 +1609,9 @@ int blkcg_policy_register(struct blkcg_policy *pol) return 0; err_free_cpds: - if (pol->cpd_free_fn) { - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { - if (blkcg->cpd[pol->plid]) { - pol->cpd_free_fn(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; - } - } - } + if (pol->cpd_free_fn) + blkcg_free_all_cpd(pol); + blkcg_policy[pol->plid] = NULL; err_unlock: mutex_unlock(&blkcg_pol_mutex); @@ -1617,8 +1628,6 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { - struct blkcg *blkcg; - mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) @@ -1633,14 +1642,9 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) /* remove cpds and unregister */ mutex_lock(&blkcg_pol_mutex); - if (pol->cpd_free_fn) { - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { - if (blkcg->cpd[pol->plid]) { - pol->cpd_free_fn(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; - } - } - } + if (pol->cpd_free_fn) + blkcg_free_all_cpd(pol); + blkcg_policy[pol->plid] = NULL; mutex_unlock(&blkcg_pol_mutex); @@ -1696,7 +1700,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) * |
