diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-03 18:48:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-03 18:48:38 -0700 |
commit | e50df24979fd02f920aa7baada714a58bc61bfd9 (patch) | |
tree | b0f12af3e2cb40e1ce0f3d2c4736e6bbec30ffb8 | |
parent | 4f52875366bfbd6ddc19c1045b603d853e0a889c (diff) | |
parent | 3c2f765c81be1c85782ba09f492800a99f765a2c (diff) | |
download | linux-e50df24979fd02f920aa7baada714a58bc61bfd9.tar.gz linux-e50df24979fd02f920aa7baada714a58bc61bfd9.tar.bz2 linux-e50df24979fd02f920aa7baada714a58bc61bfd9.zip |
Merge tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux
Pull more block updates from Jens Axboe:
"Mostly items that came in a bit late for the initial pull request,
wanted to make sure they had the appropriate amount of linux-next soak
before going upstream.
Outside of stragglers, just generic fixes for either merge window
items, or longer standing bugs"
* tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux: (25 commits)
md/raid0: add discard support for the 'original' layout
nvme: disable controller on reset state failure
nvme: sync timeout work on failed reset
nvme: ensure unquiesce on teardown
cdrom/gdrom: Fix build error
nvme: improved uring polling
block: add request polling helper
nvme-mpath: fix I/O failure with EAGAIN when failing over I/O
nvme: host: fix command name spelling
blk-sysfs: add a new attr_group for blk_mq
blk-iocost: move wbt_enable/disable_default() out of spinlock
blk-wbt: cleanup rwb_enabled() and wbt_disabled()
blk-wbt: remove dead code to handle wbt enable/disable with io inflight
blk-wbt: don't create wbt sysfs entry if CONFIG_BLK_WBT is disabled
blk-mq: fix two misuses on RQF_USE_SCHED
blk-throttle: Fix io statistics for cgroup v1
bcache: Fix bcache device claiming
bcache: Alloc holder object before async registration
raid10: avoid spin_lock from fastpath from raid10_unplug()
md: fix 'delete_mutex' deadlock
...
-rw-r--r-- | block/blk-cgroup.c | 6 | ||||
-rw-r--r-- | block/blk-iocost.c | 7 | ||||
-rw-r--r-- | block/blk-mq.c | 54 | ||||
-rw-r--r-- | block/blk-sysfs.c | 181 | ||||
-rw-r--r-- | block/blk-throttle.c | 6 | ||||
-rw-r--r-- | block/blk-throttle.h | 9 | ||||
-rw-r--r-- | block/blk-wbt.c | 21 | ||||
-rw-r--r-- | block/blk-wbt.h | 19 | ||||
-rw-r--r-- | drivers/cdrom/gdrom.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 123 | ||||
-rw-r--r-- | drivers/md/md.c | 32 | ||||
-rw-r--r-- | drivers/md/md.h | 4 | ||||
-rw-r--r-- | drivers/md/raid0.c | 62 | ||||
-rw-r--r-- | drivers/md/raid0.h | 1 | ||||
-rw-r--r-- | drivers/md/raid1-10.c | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/constants.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 70 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 10 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 3 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 2 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 2 |
25 files changed, 341 insertions, 304 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index aaf9903ad7b2..fc49be622e05 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -2086,6 +2086,9 @@ void blk_cgroup_bio_start(struct bio *bio) struct blkg_iostat_set *bis; unsigned long flags; + if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) + return; + /* Root-level stats are sourced from system-wide IO stats */ if (!cgroup_parent(blkcg->css.cgroup)) return; @@ -2116,8 +2119,7 @@ void blk_cgroup_bio_start(struct bio *bio) } u64_stats_update_end_irqrestore(&bis->sync, flags); - if (cgroup_subsys_on_dfl(io_cgrp_subsys)) - cgroup_rstat_updated(blkcg->css.cgroup, cpu); + cgroup_rstat_updated(blkcg->css.cgroup, cpu); put_cpu(); } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 6084a9519883..9dfcf540f400 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3301,11 +3301,9 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, blk_stat_enable_accounting(disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = true; - wbt_disable_default(disk); } else { blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = false; - wbt_enable_default(disk); } if (user) { @@ -3318,6 +3316,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); + if (enable) + wbt_disable_default(disk); + else + wbt_enable_default(disk); + blk_mq_unquiesce_queue(disk->queue); blk_mq_unfreeze_queue(disk->queue); diff --git a/block/blk-mq.c b/block/blk-mq.c index decb6ab2d508..5504719b970d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -49,17 +49,8 @@ static void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags); static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); - -static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, - blk_qc_t qc) -{ - return xa_load(&q->hctx_table, qc); -} - -static inline blk_qc_t blk_rq_to_qc(struct request *rq) -{ - return rq->mq_hctx->queue_num; -} +static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + struct io_comp_batch *iob, unsigned int flags); /* * Check if any of the ctx, dispatch list or elevator @@ -1248,7 +1239,7 @@ void blk_mq_start_request(struct request *rq) q->integrity.profile->prepare_fn(rq); #endif if (rq->bio && rq->bio->bi_opf & REQ_POLLED) - WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq)); + WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); } EXPORT_SYMBOL(blk_mq_start_request); @@ -1280,7 +1271,11 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) if (!plug->multiple_queues && last && last->q != rq->q) plug->multiple_queues = true; - if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED)) + /* + * Any request allocated from sched tags can't be issued to + * ->queue_rqs() directly + */ + if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS)) plug->has_elevator = true; rq->rq_next = NULL; rq_list_add(&plug->mq_list, rq); @@ -1350,7 +1345,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll); static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0); + blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); cond_resched(); } while (!completion_done(wait)); } @@ -4745,10 +4740,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); -int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, - unsigned int flags) +static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + struct io_comp_batch *iob, unsigned int flags) { - struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); long state = get_current_state(); int ret; @@ -4773,6 +4767,32 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch * return 0; } +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, + struct io_comp_batch *iob, unsigned int flags) +{ + struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie); + + return blk_hctx_poll(q, hctx, iob, flags); +} + +int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, + unsigned int poll_flags) +{ + struct request_queue *q = rq->q; + int ret; + + if (!blk_rq_is_poll(rq)) + return 0; + if (!percpu_ref_tryget(&q->q_usage_counter)) + return 0; + + ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags); + blk_queue_exit(q); + + return ret; +} +EXPORT_SYMBOL_GPL(blk_rq_poll); + unsigned int blk_mq_rq_cpu(struct request *rq) { return rq->mq_ctx->cpu; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a64208583853..afc797fb0dfc 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -47,19 +47,6 @@ queue_var_store(unsigned long *var, const char *page, size_t count) return count; } -static ssize_t queue_var_store64(s64 *var, const char *page) -{ - int err; - s64 v; - - err = kstrtos64(page, 10, &v); - if (err < 0) - return err; - - *var = v; - return 0; -} - static ssize_t queue_requests_show(struct request_queue *q, char *page) { return queue_var_show(q->nr_requests, page); @@ -451,61 +438,6 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page, return count; } -static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) -{ - if (!wbt_rq_qos(q)) - return -EINVAL; - - if (wbt_disabled(q)) - return sprintf(page, "0\n"); - - return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); -} - -static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, - size_t count) -{ - struct rq_qos *rqos; - ssize_t ret; - s64 val; - - ret = queue_var_store64(&val, page); - if (ret < 0) - return ret; - if (val < -1) - return -EINVAL; - - rqos = wbt_rq_qos(q); - if (!rqos) { - ret = wbt_init(q->disk); - if (ret) - return ret; - } - - if (val == -1) - val = wbt_default_latency_nsec(q); - else if (val >= 0) - val *= 1000ULL; - - if (wbt_get_min_lat(q) == val) - return count; - - /* - * Ensure that the queue is idled, in case the latency update - * ends up either enabling or disabling wbt completely. We can't - * have IO inflight if that happens. - */ - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - - wbt_set_min_lat(q, val); - - blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); - - return count; -} - static ssize_t queue_wc_show(struct request_queue *q, char *page) { if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) @@ -598,7 +530,6 @@ QUEUE_RW_ENTRY(queue_wc, "write_cache"); QUEUE_RO_ENTRY(queue_fua, "fua"); QUEUE_RO_ENTRY(queue_dax, "dax"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); -QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment"); @@ -617,8 +548,79 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); +#ifdef CONFIG_BLK_WBT +static ssize_t queue_var_store64(s64 *var, const char *page) +{ + int err; + s64 v; + + err = kstrtos64(page, 10, &v); + if (err < 0) + return err; + + *var = v; + return 0; +} + +static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) +{ + if (!wbt_rq_qos(q)) + return -EINVAL; + + if (wbt_disabled(q)) + return sprintf(page, "0\n"); + + return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); +} + +static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, + size_t count) +{ + struct rq_qos *rqos; + ssize_t ret; + s64 val; + + ret = queue_var_store64(&val, page); + if (ret < 0) + return ret; + if (val < -1) + return -EINVAL; + + rqos = wbt_rq_qos(q); + if (!rqos) { + ret = wbt_init(q->disk); + if (ret) + return ret; + } + + if (val == -1) + val = wbt_default_latency_nsec(q); + else if (val >= 0) + val *= 1000ULL; + + if (wbt_get_min_lat(q) == val) + return count; + + /* + * Ensure that the queue is idled, in case the latency update + * ends up either enabling or disabling wbt completely. We can't + * have IO inflight if that happens. + */ + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + wbt_set_min_lat(q, val); + + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); + + return count; +} + +QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); +#endif + static struct attribute *queue_attrs[] = { - &queue_requests_entry.attr, &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, @@ -626,7 +628,6 @@ static struct attribute *queue_attrs[] = { &queue_max_discard_segments_entry.attr, &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, - &elv_iosched_entry.attr, &queue_hw_sector_size_entry.attr, &queue_logical_block_size_entry.attr, &queue_physical_block_size_entry.attr, @@ -647,7 +648,6 @@ static struct attribute *queue_attrs[] = { &queue_max_open_zones_entry.attr, &queue_max_active_zones_entry.attr, &queue_nomerges_entry.attr, - &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, &queue_stable_writes_entry.attr, &queue_random_entry.attr, @@ -655,9 +655,7 @@ static struct attribute *queue_attrs[] = { &queue_wc_entry.attr, &queue_fua_entry.attr, &queue_dax_entry.attr, - &queue_wb_lat_entry.attr, &queue_poll_delay_entry.attr, - &queue_io_timeout_entry.attr, #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &blk_throtl_sample_time_entry.attr, #endif @@ -666,16 +664,23 @@ static struct attribute *queue_attrs[] = { NULL, }; +static struct attribute *blk_mq_queue_attrs[] = { + &queue_requests_entry.attr, + &elv_iosched_entry.attr, + &queue_rq_affinity_entry.attr, + &queue_io_timeout_entry.attr, +#ifdef CONFIG_BLK_WBT + &queue_wb_lat_entry.attr, +#endif + NULL, +}; + static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; - if (attr == &queue_io_timeout_entry.attr && - (!q->mq_ops || !q->mq_ops->timeout)) - return 0; - if ((attr == &queue_max_open_zones_entry.attr || attr == &queue_max_active_zones_entry.attr) && !blk_queue_is_zoned(q)) @@ -684,11 +689,30 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, return attr->mode; } +static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); + struct request_queue *q = disk->queue; + + if (!queue_is_mq(q)) + return 0; + + if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) + return 0; + + return attr->mode; +} + static struct attribute_group queue_attr_group = { .attrs = queue_attrs, .is_visible = queue_attr_visible, }; +static struct attribute_group blk_mq_queue_attr_group = { + .attrs = blk_mq_queue_attrs, + .is_visible = blk_mq_queue_attr_visible, +}; #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) @@ -733,6 +757,7 @@ static const struct sysfs_ops queue_sysfs_ops = { static const struct attribute_group *blk_queue_attr_groups[] = { &queue_attr_group, + &blk_mq_queue_attr_group, NULL }; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 9d010d867fbf..7397ff199d66 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2178,12 +2178,6 @@ bool __blk_throtl_bio(struct bio *bio) rcu_read_lock(); - if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { - blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, - bio->bi_iter.bi_size); - blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); - } - spin_lock_irq(&q->queue_lock); throtl_update_latency_buckets(td); diff --git a/block/blk-throttle.h b/block/blk-throttle.h index ef4b7a4de987..d1ccbfe9f797 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -185,6 +185,15 @@ static inline bool blk_should_throtl(struct bio *bio) struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); int rw = bio_data_dir(bio); + if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { + bio_set_flag(bio, BIO_CGROUP_ACCT); + blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, + bio->bi_iter.bi_size); + } + blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); + } + /* iops limit is always counted */ if (tg->has_rules_iops[rw]) return true; diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 7a87506ff8e1..0bb613139bec 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -146,7 +146,7 @@ enum { static inline bool rwb_enabled(struct rq_wb *rwb) { return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && - rwb->wb_normal != 0; + rwb->enable_state != WBT_STATE_OFF_MANUAL; } static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) @@ -201,15 +201,6 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, inflight = atomic_dec_return(&rqw->inflight); /* - * wbt got disabled with IO in flight. Wake up any potential - * waiters, we don't have to do more than that. - */ - if (unlikely(!rwb_enabled(rwb))) { - rwb_wake_all(rwb); - return; - } - - /* * For discards, our limit is always the background. For writes, if * the device does write back caching, drop further down before we * wake people up. @@ -503,8 +494,7 @@ bool wbt_disabled(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); - return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT || - RQWB(rqos)->enable_state == WBT_STATE_OFF_MANUAL; + return !rqos || !rwb_enabled(RQWB(rqos)); } u64 wbt_get_min_lat(struct request_queue *q) @@ -545,13 +535,6 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) { unsigned int limit; - /* - * If we got disabled, just return UINT_MAX. This ensures that - * we'll properly inc a new IO, and dec+wakeup at the end. - */ - if (!rwb_enabled(rwb)) - return UINT_MAX; - if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD) return rwb->wb_background; diff --git a/block/blk-wbt.h b/block/blk-wbt.h index ba6cca5849a6..8a029e138f7a 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -18,10 +18,6 @@ u64 wbt_default_latency_nsec(struct request_queue *); #else -static inline int wbt_init(struct gendisk *disk) -{ - return -EINVAL; -} static inline void wbt_disable_default(struct gendisk *disk) { } @@ -31,21 +27,6 @@ static inline void wbt_enable_default(struct gendisk *disk) static inline void wbt_set_write_cache(struct request_queue *q, bool wc) { } -static inline u64 wbt_get_min_lat(struct request_queue *q) -{ - return 0; -} -static inline void wbt_set_min_lat(struct request_queue *q, u64 val) -{ -} -static inline u64 wbt_default_latency_nsec(struct request_queue *q) -{ - return 0; -} -static inline bool wbt_disabled(struct request_queue *q) -{ - return true; -} #endif /* CONFIG_BLK_WBT */ diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 3a46e27479ff..d668b174ace9 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -481,7 +481,7 @@ static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode) disk_check_media_change(disk); mutex_lock(&gdrom_mutex); - ret = cdrom_open(gd.cd_info); + ret = cdrom_open(gd.cd_info, mode); mutex_unlock(&gdrom_mutex); return ret; } @@ -489,7 +489,7 @@ static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode) static void gdrom_bdops_release(struct gendisk *disk) { mutex_lock(&gdrom_mutex); - cdrom_release(gd.cd_info, mode); + cdrom_release(gd.cd_info); mutex_unlock(&gdrom_mutex); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e2a803683105..0ae2b3676293 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl) put_page(virt_to_page(dc->sb_disk)); if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, bcache_kobj); + blkdev_put(dc->bdev, dc); wake_up(&unregister_wait); @@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; - dc->bdev->bd_holder = dc; dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) @@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj) put_page(virt_to_page(ca->sb_disk)); if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, bcache_kobj); + blkdev_put(ca->bdev, ca); kfree(ca); module_put(THIS_MODULE); @@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; - ca->bdev->bd_holder = ca; ca->sb_disk = sb_disk; if (bdev_max_discard_sectors((bdev))) @@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, * call blkdev_put() to bdev in bch_cache_release(). So we * explicitly call blkdev_put() here. */ - blkdev_put(bdev, bcache_kobj); + blkdev_put(bdev, ca); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) @@ -2448,6 +2446,7 @@ struct async_reg_args { struct cache_sb *sb; struct cache_sb_disk *sb_disk; struct block_device *bdev; + void *holder; }; static void register_bdev_worker(struct work_struct *work) @@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cached_dev *dc; - - dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) + < 0) fail = true; mutex_unlock(&bch_register_lock); -out: if (fail) pr_info("error %s: fail to register backing device\n", args->path); @@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cache *ca; - - ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) fail = true; -out: if (fail) pr_info("error %s: fail to register cache device\n", args->path); @@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args) queue_delayed_work(system_wq, &args->reg_work, 10); } +static void *alloc_holder_object(struct cache_sb *sb) +{ + if (SB_IS_BDEV(sb)) + return kzalloc(sizeof(struct cached_dev), GFP_KERNEL); + return kzalloc(sizeof(struct cache), GFP_KERNEL); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct block_device *bdev; + struct block_device *bdev, *bdev2; + void *holder = NULL; ssize_t ret; bool async_registration = false; + bool quiet = false; #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION async_registration = true; @@ -2558,10 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE, - bcache_kobj, NULL); + bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev)) + goto out_free_sb; + + err = "failed to set blocksize"; + if (set_blocksize(bdev, 4096)) + goto out_blkdev_put; + + err = read_super(sb, bdev, &sb_disk); + if (err) + goto out_blkdev_put; + + holder = alloc_holder_object(sb); + if (!holder) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + + /* Now reopen in exclusive mode with proper holder */ + bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + holder, NULL); + blkdev_put(bdev, NULL); + bdev = bdev2; if (IS_ERR(bdev)) { - if (bdev == ERR_PTR(-EBUSY)) { + ret = PTR_ERR(bdev); + bdev = NULL; + if (ret == -EBUSY) { dev_t dev; mutex_lock(&bch_register_lock); @@ -2571,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); - if (attr == &ksysfs_register_quiet) - goto done; + if (attr == &ksysfs_register_quiet) { + quiet = true; + ret = size; + } } - goto out_free_sb; + goto out_free_holder; } - err = "failed to set blocksize"; - if (set_blocksize(bdev, 4096)) - goto out_blkdev_put; - - err = read_super(sb, bdev, &sb_disk); - if (err) - goto out_blkdev_put; - err = "failed to register device"; if (async_registration) { @@ -2595,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!args) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_free_holder; } args->path = path; args->sb = sb; args->sb_disk = sb_disk; args->bdev = bdev; + args->holder = holder; register_device_async(args); /* No wait and returns to user space */ goto async_done; } if (SB_IS_BDEV(sb)) { - struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - - if (!dc) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev, dc); + ret = register_bdev(sb, sb_disk, bdev, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { - struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev, ca); + ret = register_cache(sb, sb_disk, bdev, holder); if (ret) goto out_free_sb; } -done: kfree(sb); kfree(path); module_put(THIS_MODULE); async_done: return size; +out_free_holder: + kfree(holder); out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - blkdev_put(bdev, register_bcache); + if (bdev) + blkdev_put(bdev, holder); out_free_sb: kfree(sb); out_free_path: @@ -2656,7 +2650,8 @@ out_free_path: out_module_put: module_put(THIS_MODULE); out: - pr_info("error %s: %s\n", path?path:"", err); + if (!quiet) + pr_info("error %s: %s\n", path?path:"", err); return ret; } diff --git a/drivers/md/md.c b/drivers/md/md.c index cf3733c90c47..2e38ef421d69 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev) { mutex_init(&mddev->open_mutex); mutex_init(&mddev->reconfig_mutex); - mutex_init(&mddev->delete_mutex); mutex_init(&mddev->bitmap_info.mutex); INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->all_mddevs); @@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev) static const struct attribute_group md_redundancy_group; -static void md_free_rdev(struct mddev *mddev) +void mddev_unlock(struct mddev *mddev) { struct md_rdev *rdev; struct md_rdev *tmp; + LIST_HEAD(delete); - mutex_lock(&mddev->delete_mutex); - if (list_empty(&mddev->deleting)) - goto out; - - list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) { - list_del_init(&rdev->same_set); - kobject_del(&rdev->kobj); - export_rdev(rdev, mddev); - } -out: - mutex_unlock(&mddev->delete_mutex); -} + if (!list_empty(&mddev->deleting)) + list_splice_init(&mddev->deleting, &delete); -void mddev_unlock(struct mddev *mddev) -{ if (mddev->to_remove) { /* These cannot be removed under reconfig_mutex as * an access to the files will try to take reconfig_mutex @@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev) } else mutex_unlock(&mddev->reconfig_mutex); - md_free_rdev(mddev); + list_for_each_entry_safe(rdev, tmp, &delete, same_set) { |