diff options
Diffstat (limited to 'block')
45 files changed, 679 insertions, 688 deletions
diff --git a/block/Kconfig b/block/Kconfig index 50b17e260fa2..444c5ab3b67e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -147,7 +147,6 @@ config BLK_CGROUP_FC_APPID config BLK_CGROUP_IOCOST bool "Enable support for cost model based cgroup IO controller" depends on BLK_CGROUP - select BLK_RQ_IO_DATA_LEN select BLK_RQ_ALLOC_TIME help Enabling this option enables the .weight interface for cost diff --git a/block/bdev.c b/block/bdev.c index 5fe06c1f2def..ce05175e71ce 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -54,12 +54,10 @@ static void bdev_write_inode(struct block_device *bdev) while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); ret = write_inode_now(inode, true); - if (ret) { - char name[BDEVNAME_SIZE]; - pr_warn_ratelimited("VFS: Dirty inode writeback failed " - "for block device %s (err=%d).\n", - bdevname(bdev, name), ret); - } + if (ret) + pr_warn_ratelimited( + "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n", + bdev, ret); spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 09574af83566..30b15a9a47c4 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -220,46 +220,46 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) + blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.queued, op, 1); + blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.queued, op, -1); + blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.merged, op, 1); + blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op) + u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = ktime_get_ns(); if (now > io_start_time_ns) - blkg_rwstat_add(&stats->service_time, op, + blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) - blkg_rwstat_add(&stats->wait_time, op, + blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) { } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } + blk_opf_t opf) { } +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op) { } + u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } @@ -706,10 +706,10 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, } /** - * __bfq_bic_change_cgroup - move @bic to @cgroup. + * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. - * @blkcg: the blk-cgroup to move to. + * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see @@ -863,6 +863,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. + * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, @@ -892,6 +893,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. + * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, @@ -1471,8 +1473,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) return bfqq->bfqd->root_group; } -void bfqg_and_blkg_get(struct bfq_group *bfqg) {} - void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e6d7e6b01a05..c740b41fe0a4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -668,19 +668,19 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) * significantly affect service guarantees coming from the BFQ scheduling * algorithm. */ -static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(op)) : NULL; + struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL; int depth; unsigned limit = data->q->nr_requests; /* Sync reads have full depth available */ - if (op_is_sync(op) && !op_is_write(op)) { + if (op_is_sync(opf) && !op_is_write(opf)) { depth = 0; } else { - depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; + depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; limit = (limit * depth) >> bfqd->full_depth_shift; } @@ -693,7 +693,7 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) depth = 1; bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(op), depth); + __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); if (depth) data->shallow_depth = depth; } @@ -6104,7 +6104,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) static void bfq_update_insert_stats(struct request_queue *q, struct bfq_queue *bfqq, bool idle_timer_disabled, - unsigned int cmd_flags) + blk_opf_t cmd_flags) { if (!bfqq) return; @@ -6129,7 +6129,7 @@ static void bfq_update_insert_stats(struct request_queue *q, static inline void bfq_update_insert_stats(struct request_queue *q, struct bfq_queue *bfqq, bool idle_timer_disabled, - unsigned int cmd_flags) {} + blk_opf_t cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ static struct bfq_queue *bfq_init_rq(struct request *rq); @@ -6141,7 +6141,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq; bool idle_timer_disabled = false; - unsigned int cmd_flags; + blk_opf_t cmd_flags; LIST_HEAD(free); #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ca8177d7bf7c..ad8e513d7e87 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -994,11 +994,11 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq); void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op); -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op); -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op); + blk_opf_t opf); +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf); +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf); void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op); + u64 io_start_time_ns, blk_opf_t opf); void bfqg_stats_update_dequeue(struct bfq_group *bfqg); void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); void bfqg_stats_update_idle_time(struct bfq_group *bfqg); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index f8eb340381cf..983413cdefad 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1360,6 +1360,8 @@ left: /** * __bfq_lookup_next_entity - return the first eligible entity in @st. * @st: the service tree. + * @in_service: whether or not there is an in-service entity for the sched_data + * this active tree belongs to. * * If there is no in-service entity for the sched_data st belongs to, * then return the entity that will be set in service if: @@ -1472,9 +1474,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, break; } - if (!entity) - return NULL; - return entity; } diff --git a/block/bio.c b/block/bio.c index 51c99f2c5c90..6f9f883f9a65 100644 --- a/block/bio.c +++ b/block/bio.c @@ -239,7 +239,7 @@ static void bio_free(struct bio *bio) * when IO has completed, or when the bio is released. */ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf) + unsigned short max_vecs, blk_opf_t opf) { bio->bi_next = NULL; bio->bi_bdev = bdev; @@ -292,7 +292,7 @@ EXPORT_SYMBOL(bio_init); * preserved are the ones that are initialized by bio_alloc_bioset(). See * comment in struct bio. */ -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf) +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf) { bio_uninit(bio); memset(bio, 0, BIO_RESET_BYTES); @@ -341,7 +341,7 @@ void bio_chain(struct bio *bio, struct bio *parent) EXPORT_SYMBOL(bio_chain); struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp) + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp) { struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp); @@ -409,7 +409,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs) } static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp, + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp, struct bio_set *bs) { struct bio_alloc_cache *cache; @@ -468,7 +468,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, * Returns: Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs) { gfp_t saved_gfp = gfp_mask; @@ -1033,7 +1033,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page, if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND)) return 0; - if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) + if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev))) return 0; return bio_add_hw_page(q, bio, page, len, offset, @@ -1159,6 +1159,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off) put_page(pages[i]); } +static int bio_iov_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + bool same_page = false; + + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { + if (WARN_ON_ONCE(bio_full(bio, len))) + return -EINVAL; + __bio_add_page(bio, page, len, offset); + return 0; + } + + if (same_page) + put_page(page); + return 0; +} + +static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + bool same_page = false; + + if (bio_add_hw_page(q, bio, page, len, offset, + queue_max_zone_append_sectors(q), &same_page) != len) + return -EINVAL; + if (same_page) + put_page(page); + return 0; +} + #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) /** @@ -1177,7 +1208,6 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - bool same_page = false; ssize_t size, left; unsigned len, i; size_t offset; @@ -1186,82 +1216,43 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * Move page array up in the allocated memory for the bio vecs as far as * possible so that we can start filling biovecs from the beginning * without overwriting the temporary page array. - */ + */ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); - if (unlikely(size <= 0)) - return size ? size : -EFAULT; - - for (left = size, i = 0; left > 0; left -= len, i++) { - struct page *page = pages[i]; - - len = min_t(size_t, PAGE_SIZE - offset, left); - - if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { - if (same_page) - put_page(page); - } else { - if (WARN_ON_ONCE(bio_full(bio, len))) { - bio_put_pages(pages + i, left, offset); - return -EINVAL; - } - __bio_add_page(bio, page, len, offset); - } - offset = 0; - } - - iov_iter_advance(iter, size); - return 0; -} - -static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) -{ - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; - unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - unsigned int max_append_sectors = queue_max_zone_append_sectors(q); - struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; - struct page **pages = (struct page **)bv; - ssize_t size, left; - unsigned len, i; - size_t offset; - int ret = 0; - - if (WARN_ON_ONCE(!max_append_sectors)) - return 0; - /* - * Move page array up in the allocated memory for the bio vecs as far as - * possible so that we can start filling biovecs from the beginning - * without overwriting the temporary page array. + * Each segment in the iov is required to be a block size multiple. + * However, we may not be able to get the entire segment if it spans + * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the + * result to ensure the bio's total size is correct. The remainder of + * the iov data will be picked up in the next bio iteration. */ - BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); - pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (size > 0) + size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); if (unlikely(size <= 0)) return size ? size : -EFAULT; for (left = size, i = 0; left > 0; left -= len, i++) { struct page *page = pages[i]; - bool same_page = false; + int ret; len = min_t(size_t, PAGE_SIZE - offset, left); - if (bio_add_hw_page(q, bio, page, len, offset, - max_append_sectors, &same_page) != len) { + if (bio_op(bio) == REQ_OP_ZONE_APPEND) + ret = bio_iov_add_zone_append_page(bio, page, len, + offset); + else + ret = bio_iov_add_page(bio, page, len, offset); + + if (ret) { bio_put_pages(pages + i, left, offset); - ret = -EINVAL; - break; + return ret; } - if (same_page) - put_page(page); offset = 0; } - iov_iter_advance(iter, size - left); - return ret; + iov_iter_advance(iter, size); + return 0; } /** @@ -1298,10 +1289,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) } do { - if (bio_op(bio) == REQ_OP_ZONE_APPEND) - ret = __bio_iov_append_get_pages(bio, iter); - else - ret = __bio_iov_iter_get_pages(bio, iter); + ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); /* don't account direct I/O as memory stall */ diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h index 9f2723b34b75..022527b0b043 100644 --- a/block/blk-cgroup-rwstat.h +++ b/block/blk-cgroup-rwstat.h @@ -59,20 +59,20 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, * caller is responsible for synchronizing calls to this function. */ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - unsigned int op, uint64_t val) + blk_opf_t opf, uint64_t val) { struct percpu_counter *cnt; - if (op_is_discard(op)) + if (op_is_discard(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; - else if (op_is_write(op)) + else if (op_is_write(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); - if (op_is_sync(op)) + if (op_is_sync(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 764e740b0c0f..869af9d72bcf 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -846,6 +846,21 @@ static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src) } } +static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, + struct blkg_iostat *last) +{ + struct blkg_iostat delta; + unsigned long flags; + + /* propagate percpu delta to global */ + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&delta, cur); + blkg_iostat_sub(&delta, last); + blkg_iostat_add(&blkg->iostat.cur, &delta); + blkg_iostat_add(last, &delta); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); +} + static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct blkcg *blkcg = css_to_blkcg(css); @@ -860,8 +875,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { struct blkcg_gq *parent = blkg->parent; struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); - struct blkg_iostat cur, delta; - unsigned long flags; + struct blkg_iostat cur; unsigned int seq; /* fetch the current per-cpu values */ @@ -870,23 +884,12 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) blkg_iostat_set(&cur, &bisc->cur); } while (u64_stats_fetch_retry(&bisc->sync, seq)); - /* propagate percpu delta to global */ - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); - blkg_iostat_set(&delta, &cur); - blkg_iostat_sub(&delta, &bisc->last); - blkg_iostat_add(&blkg->iostat.cur, &delta); - blkg_iostat_add(&bisc->last, &delta); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + blkcg_iostat_update(blkg, &cur, &bisc->last); /* propagate global delta to parent (unless that's root) */ - if (parent && parent->parent) { - flags = u64_stats_update_begin_irqsave(&parent->iostat.sync); - blkg_iostat_set(&delta, &blkg->iostat.cur); - blkg_iostat_sub(&delta, &blkg->iostat.last); - blkg_iostat_add(&parent->iostat.cur, &delta); - blkg_iostat_add(&blkg->iostat.last, &delta); - u64_stats_update_end_irqrestore(&parent->iostat.sync, flags); - } + if (parent && parent->parent) + blkcg_iostat_update(parent, &blkg->iostat.cur, + &blkg->iostat.last); } rcu_read_unlock(); @@ -1299,6 +1302,7 @@ int blkcg_init_queue(struct request_queue *q) ret = blk_iolatency_init(q); if (ret) { blk_throtl_exit(q); + blk_ioprio_exit(q); goto err_destroy_all; } @@ -1529,6 +1533,18 @@ void blkcg_deactivate_policy(struct request_queue *q, } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); +static void blkcg_free_all_cpd(struct blkcg_policy *pol) +{ + struct blkcg *blkcg; + + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + if (blkcg->cpd[pol->plid]) { + pol->cpd_free_fn(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; + } + } +} + /** * blkcg_policy_register - register a blkcg policy * @pol: blkcg policy to register @@ -1593,14 +1609,9 @@ int blkcg_policy_register(struct blkcg_policy *pol) return 0; err_free_cpds: - if (pol->cpd_free_fn) { - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { - if (blkcg->cpd[pol->plid]) { - pol->cpd_free_fn(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; - } - } - } + if (pol->cpd_free_fn) + blkcg_free_all_cpd(pol); + blkcg_policy[pol->plid] = NULL; err_unlock: mutex_unlock(&blkcg_pol_mutex); @@ -1617,8 +1628,6 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { - struct blkcg *blkcg; - mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) @@ -1633,14 +1642,9 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) /* remove cpds and unregister */ mutex_lock(&blkcg_pol_mutex); - if (pol->cpd_free_fn) { - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { - if (blkcg->cpd[pol->plid]) { - pol->cpd_free_fn(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; - } - } - } + if (pol->cpd_free_fn) + blkcg_free_all_cpd(pol); + blkcg_policy[pol->plid] = NULL; mutex_unlock(&blkcg_pol_mutex); @@ -1696,7 +1700,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) * everybody is happy with their IO latencies. */ if (time_before64(old + NSEC_PER_SEC, now) && - atomic64_cmpxchg(&blkg->delay_start, old, now) == old) { + atomic64_try_cmpxchg(&blkg->delay_start, &old, now)) { u64 cur = atomic64_read(&blkg->delay_nsec); u64 sub = min_t(u64, blkg->last_delay, now - old); int cur_use = atomic_read(&blkg->use_delay); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index d4de0a35e066..d2724d1dd7c9 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -430,12 +430,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) * then check to see if we were the last delay so we can drop the * congestion count on the cgroup. */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); - if (cur == old) - break; - old = cur; - } + while (old && !atomic_try_cmpxchg(&blkg->use_delay, &old, old |
