summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 18:48:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 18:48:38 -0700
commite50df24979fd02f920aa7baada714a58bc61bfd9 (patch)
treeb0f12af3e2cb40e1ce0f3d2c4736e6bbec30ffb8
parent4f52875366bfbd6ddc19c1045b603d853e0a889c (diff)
parent3c2f765c81be1c85782ba09f492800a99f765a2c (diff)
downloadlinux-e50df24979fd02f920aa7baada714a58bc61bfd9.tar.gz
linux-e50df24979fd02f920aa7baada714a58bc61bfd9.tar.bz2
linux-e50df24979fd02f920aa7baada714a58bc61bfd9.zip
Merge tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux
Pull more block updates from Jens Axboe: "Mostly items that came in a bit late for the initial pull request, wanted to make sure they had the appropriate amount of linux-next soak before going upstream. Outside of stragglers, just generic fixes for either merge window items, or longer standing bugs" * tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux: (25 commits) md/raid0: add discard support for the 'original' layout nvme: disable controller on reset state failure nvme: sync timeout work on failed reset nvme: ensure unquiesce on teardown cdrom/gdrom: Fix build error nvme: improved uring polling block: add request polling helper nvme-mpath: fix I/O failure with EAGAIN when failing over I/O nvme: host: fix command name spelling blk-sysfs: add a new attr_group for blk_mq blk-iocost: move wbt_enable/disable_default() out of spinlock blk-wbt: cleanup rwb_enabled() and wbt_disabled() blk-wbt: remove dead code to handle wbt enable/disable with io inflight blk-wbt: don't create wbt sysfs entry if CONFIG_BLK_WBT is disabled blk-mq: fix two misuses on RQF_USE_SCHED blk-throttle: Fix io statistics for cgroup v1 bcache: Fix bcache device claiming bcache: Alloc holder object before async registration raid10: avoid spin_lock from fastpath from raid10_unplug() md: fix 'delete_mutex' deadlock ...
-rw-r--r--block/blk-cgroup.c6
-rw-r--r--block/blk-iocost.c7
-rw-r--r--block/blk-mq.c54
-rw-r--r--block/blk-sysfs.c181
-rw-r--r--block/blk-throttle.c6
-rw-r--r--block/blk-throttle.h9
-rw-r--r--block/blk-wbt.c21
-rw-r--r--block/blk-wbt.h19
-rw-r--r--drivers/cdrom/gdrom.c4
-rw-r--r--drivers/md/bcache/super.c123
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid0.c62
-rw-r--r--drivers/md/raid0.h1
-rw-r--r--drivers/md/raid1-10.c2
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/nvme/host/constants.c2
-rw-r--r--drivers/nvme/host/core.c6
-rw-r--r--drivers/nvme/host/ioctl.c70
-rw-r--r--drivers/nvme/host/multipath.c10
-rw-r--r--drivers/nvme/host/nvme.h3
-rw-r--r--drivers/nvme/host/pci.c5
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--include/linux/blk-mq.h8
-rw-r--r--include/uapi/linux/io_uring.h2
25 files changed, 341 insertions, 304 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index aaf9903ad7b2..fc49be622e05 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -2086,6 +2086,9 @@ void blk_cgroup_bio_start(struct bio *bio)
struct blkg_iostat_set *bis;
unsigned long flags;
+ if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
+ return;
+
/* Root-level stats are sourced from system-wide IO stats */
if (!cgroup_parent(blkcg->css.cgroup))
return;
@@ -2116,8 +2119,7 @@ void blk_cgroup_bio_start(struct bio *bio)
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
- if (cgroup_subsys_on_dfl(io_cgrp_subsys))
- cgroup_rstat_updated(blkcg->css.cgroup, cpu);
+ cgroup_rstat_updated(blkcg->css.cgroup, cpu);
put_cpu();
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 6084a9519883..9dfcf540f400 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3301,11 +3301,9 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_stat_enable_accounting(disk->queue);
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = true;
- wbt_disable_default(disk);
} else {
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = false;
- wbt_enable_default(disk);
}
if (user) {
@@ -3318,6 +3316,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ioc_refresh_params(ioc, true);
spin_unlock_irq(&ioc->lock);
+ if (enable)
+ wbt_disable_default(disk);
+ else
+ wbt_enable_default(disk);
+
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index decb6ab2d508..5504719b970d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -49,17 +49,8 @@ static void blk_mq_request_bypass_insert(struct request *rq,
blk_insert_t flags);
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
-
-static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
- blk_qc_t qc)
-{
- return xa_load(&q->hctx_table, qc);
-}
-
-static inline blk_qc_t blk_rq_to_qc(struct request *rq)
-{
- return rq->mq_hctx->queue_num;
-}
+static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+ struct io_comp_batch *iob, unsigned int flags);
/*
* Check if any of the ctx, dispatch list or elevator
@@ -1248,7 +1239,7 @@ void blk_mq_start_request(struct request *rq)
q->integrity.profile->prepare_fn(rq);
#endif
if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
- WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
+ WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
}
EXPORT_SYMBOL(blk_mq_start_request);
@@ -1280,7 +1271,11 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
- if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
+ /*
+ * Any request allocated from sched tags can't be issued to
+ * ->queue_rqs() directly
+ */
+ if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
@@ -1350,7 +1345,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll);
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
do {
- blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0);
+ blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);
cond_resched();
} while (!completion_done(wait));
}
@@ -4745,10 +4740,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
- unsigned int flags)
+static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+ struct io_comp_batch *iob, unsigned int flags)
{
- struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
long state = get_current_state();
int ret;
@@ -4773,6 +4767,32 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *
return 0;
}
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
+ struct io_comp_batch *iob, unsigned int flags)
+{
+ struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);
+
+ return blk_hctx_poll(q, hctx, iob, flags);
+}
+
+int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
+ unsigned int poll_flags)
+{
+ struct request_queue *q = rq->q;
+ int ret;
+
+ if (!blk_rq_is_poll(rq))
+ return 0;
+ if (!percpu_ref_tryget(&q->q_usage_counter))
+ return 0;
+
+ ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);
+ blk_queue_exit(q);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blk_rq_poll);
+
unsigned int blk_mq_rq_cpu(struct request *rq)
{
return rq->mq_ctx->cpu;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index a64208583853..afc797fb0dfc 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -47,19 +47,6 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
-static ssize_t queue_var_store64(s64 *var, const char *page)
-{
- int err;
- s64 v;
-
- err = kstrtos64(page, 10, &v);
- if (err < 0)
- return err;
-
- *var = v;
- return 0;
-}
-
static ssize_t queue_requests_show(struct request_queue *q, char *page)
{
return queue_var_show(q->nr_requests, page);
@@ -451,61 +438,6 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
return count;
}
-static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
-{
- if (!wbt_rq_qos(q))
- return -EINVAL;
-
- if (wbt_disabled(q))
- return sprintf(page, "0\n");
-
- return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
-}
-
-static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
- size_t count)
-{
- struct rq_qos *rqos;
- ssize_t ret;
- s64 val;
-
- ret = queue_var_store64(&val, page);
- if (ret < 0)
- return ret;
- if (val < -1)
- return -EINVAL;
-
- rqos = wbt_rq_qos(q);
- if (!rqos) {
- ret = wbt_init(q->disk);
- if (ret)
- return ret;
- }
-
- if (val == -1)
- val = wbt_default_latency_nsec(q);
- else if (val >= 0)
- val *= 1000ULL;
-
- if (wbt_get_min_lat(q) == val)
- return count;
-
- /*
- * Ensure that the queue is idled, in case the latency update
- * ends up either enabling or disabling wbt completely. We can't
- * have IO inflight if that happens.
- */
- blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
-
- wbt_set_min_lat(q, val);
-
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
-
- return count;
-}
-
static ssize_t queue_wc_show(struct request_queue *q, char *page)
{
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
@@ -598,7 +530,6 @@ QUEUE_RW_ENTRY(queue_wc, "write_cache");
QUEUE_RO_ENTRY(queue_fua, "fua");
QUEUE_RO_ENTRY(queue_dax, "dax");
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
-QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
@@ -617,8 +548,79 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats");
QUEUE_RW_ENTRY(queue_random, "add_random");
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
+#ifdef CONFIG_BLK_WBT
+static ssize_t queue_var_store64(s64 *var, const char *page)
+{
+ int err;
+ s64 v;
+
+ err = kstrtos64(page, 10, &v);
+ if (err < 0)
+ return err;
+
+ *var = v;
+ return 0;
+}
+
+static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+{
+ if (!wbt_rq_qos(q))
+ return -EINVAL;
+
+ if (wbt_disabled(q))
+ return sprintf(page, "0\n");
+
+ return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+}
+
+static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ struct rq_qos *rqos;
+ ssize_t ret;
+ s64 val;
+
+ ret = queue_var_store64(&val, page);
+ if (ret < 0)
+ return ret;
+ if (val < -1)
+ return -EINVAL;
+
+ rqos = wbt_rq_qos(q);
+ if (!rqos) {
+ ret = wbt_init(q->disk);
+ if (ret)
+ return ret;
+ }
+
+ if (val == -1)
+ val = wbt_default_latency_nsec(q);
+ else if (val >= 0)
+ val *= 1000ULL;
+
+ if (wbt_get_min_lat(q) == val)
+ return count;
+
+ /*
+ * Ensure that the queue is idled, in case the latency update
+ * ends up either enabling or disabling wbt completely. We can't
+ * have IO inflight if that happens.
+ */
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
+
+ wbt_set_min_lat(q, val);
+
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
+
+ return count;
+}
+
+QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
+#endif
+
static struct attribute *queue_attrs[] = {
- &queue_requests_entry.attr,
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr,
@@ -626,7 +628,6 @@ static struct attribute *queue_attrs[] = {
&queue_max_discard_segments_entry.attr,
&queue_max_integrity_segments_entry.attr,
&queue_max_segment_size_entry.attr,
- &elv_iosched_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr,
&queue_physical_block_size_entry.attr,
@@ -647,7 +648,6 @@ static struct attribute *queue_attrs[] = {
&queue_max_open_zones_entry.attr,
&queue_max_active_zones_entry.attr,
&queue_nomerges_entry.attr,
- &queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr,
&queue_stable_writes_entry.attr,
&queue_random_entry.attr,
@@ -655,9 +655,7 @@ static struct attribute *queue_attrs[] = {
&queue_wc_entry.attr,
&queue_fua_entry.attr,
&queue_dax_entry.attr,
- &queue_wb_lat_entry.attr,
&queue_poll_delay_entry.attr,
- &queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
&blk_throtl_sample_time_entry.attr,
#endif
@@ -666,16 +664,23 @@ static struct attribute *queue_attrs[] = {
NULL,
};
+static struct attribute *blk_mq_queue_attrs[] = {
+ &queue_requests_entry.attr,
+ &elv_iosched_entry.attr,
+ &queue_rq_affinity_entry.attr,
+ &queue_io_timeout_entry.attr,
+#ifdef CONFIG_BLK_WBT
+ &queue_wb_lat_entry.attr,
+#endif
+ NULL,
+};
+
static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
int n)
{
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue;
- if (attr == &queue_io_timeout_entry.attr &&
- (!q->mq_ops || !q->mq_ops->timeout))
- return 0;
-
if ((attr == &queue_max_open_zones_entry.attr ||
attr == &queue_max_active_zones_entry.attr) &&
!blk_queue_is_zoned(q))
@@ -684,11 +689,30 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
return attr->mode;
}
+static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
+ struct request_queue *q = disk->queue;
+
+ if (!queue_is_mq(q))
+ return 0;
+
+ if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
+ return 0;
+
+ return attr->mode;
+}
+
static struct attribute_group queue_attr_group = {
.attrs = queue_attrs,
.is_visible = queue_attr_visible,
};
+static struct attribute_group blk_mq_queue_attr_group = {
+ .attrs = blk_mq_queue_attrs,
+ .is_visible = blk_mq_queue_attr_visible,
+};
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
@@ -733,6 +757,7 @@ static const struct sysfs_ops queue_sysfs_ops = {
static const struct attribute_group *blk_queue_attr_groups[] = {
&queue_attr_group,
+ &blk_mq_queue_attr_group,
NULL
};
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9d010d867fbf..7397ff199d66 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2178,12 +2178,6 @@ bool __blk_throtl_bio(struct bio *bio)
rcu_read_lock();
- if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
- blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
- bio->bi_iter.bi_size);
- blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
- }
-
spin_lock_irq(&q->queue_lock);
throtl_update_latency_buckets(td);
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index ef4b7a4de987..d1ccbfe9f797 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -185,6 +185,15 @@ static inline bool blk_should_throtl(struct bio *bio)
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
int rw = bio_data_dir(bio);
+ if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
+ if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
+ bio_set_flag(bio, BIO_CGROUP_ACCT);
+ blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
+ bio->bi_iter.bi_size);
+ }
+ blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
+ }
+
/* iops limit is always counted */
if (tg->has_rules_iops[rw])
return true;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 7a87506ff8e1..0bb613139bec 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -146,7 +146,7 @@ enum {
static inline bool rwb_enabled(struct rq_wb *rwb)
{
return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
- rwb->wb_normal != 0;
+ rwb->enable_state != WBT_STATE_OFF_MANUAL;
}
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
@@ -201,15 +201,6 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
inflight = atomic_dec_return(&rqw->inflight);
/*
- * wbt got disabled with IO in flight. Wake up any potential
- * waiters, we don't have to do more than that.
- */
- if (unlikely(!rwb_enabled(rwb))) {
- rwb_wake_all(rwb);
- return;
- }
-
- /*
* For discards, our limit is always the background. For writes, if
* the device does write back caching, drop further down before we
* wake people up.
@@ -503,8 +494,7 @@ bool wbt_disabled(struct request_queue *q)
{
struct rq_qos *rqos = wbt_rq_qos(q);
- return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT ||
- RQWB(rqos)->enable_state == WBT_STATE_OFF_MANUAL;
+ return !rqos || !rwb_enabled(RQWB(rqos));
}
u64 wbt_get_min_lat(struct request_queue *q)
@@ -545,13 +535,6 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
{
unsigned int limit;
- /*
- * If we got disabled, just return UINT_MAX. This ensures that
- * we'll properly inc a new IO, and dec+wakeup at the end.
- */
- if (!rwb_enabled(rwb))
- return UINT_MAX;
-
if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
return rwb->wb_background;
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index ba6cca5849a6..8a029e138f7a 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -18,10 +18,6 @@ u64 wbt_default_latency_nsec(struct request_queue *);
#else
-static inline int wbt_init(struct gendisk *disk)
-{
- return -EINVAL;
-}
static inline void wbt_disable_default(struct gendisk *disk)
{
}
@@ -31,21 +27,6 @@ static inline void wbt_enable_default(struct gendisk *disk)
static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
{
}
-static inline u64 wbt_get_min_lat(struct request_queue *q)
-{
- return 0;
-}
-static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
-{
-}
-static inline u64 wbt_default_latency_nsec(struct request_queue *q)
-{
- return 0;
-}
-static inline bool wbt_disabled(struct request_queue *q)
-{
- return true;
-}
#endif /* CONFIG_BLK_WBT */
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 3a46e27479ff..d668b174ace9 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -481,7 +481,7 @@ static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode)
disk_check_media_change(disk);
mutex_lock(&gdrom_mutex);
- ret = cdrom_open(gd.cd_info);
+ ret = cdrom_open(gd.cd_info, mode);
mutex_unlock(&gdrom_mutex);
return ret;
}
@@ -489,7 +489,7 @@ static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode)
static void gdrom_bdops_release(struct gendisk *disk)
{
mutex_lock(&gdrom_mutex);
- cdrom_release(gd.cd_info, mode);
+ cdrom_release(gd.cd_info);
mutex_unlock(&gdrom_mutex);
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e2a803683105..0ae2b3676293 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl)
put_page(virt_to_page(dc->sb_disk));
if (!IS_ERR_OR_NULL(dc->bdev))
- blkdev_put(dc->bdev, bcache_kobj);
+ blkdev_put(dc->bdev, dc);
wake_up(&unregister_wait);
@@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->bdev = bdev;
- dc->bdev->bd_holder = dc;
dc->sb_disk = sb_disk;
if (cached_dev_init(dc, sb->block_size << 9))
@@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj)
put_page(virt_to_page(ca->sb_disk));
if (!IS_ERR_OR_NULL(ca->bdev))
- blkdev_put(ca->bdev, bcache_kobj);
+ blkdev_put(ca->bdev, ca);
kfree(ca);
module_put(THIS_MODULE);
@@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
- ca->bdev->bd_holder = ca;
ca->sb_disk = sb_disk;
if (bdev_max_discard_sectors((bdev)))
@@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
* call blkdev_put() to bdev in bch_cache_release(). So we
* explicitly call blkdev_put() here.
*/
- blkdev_put(bdev, bcache_kobj);
+ blkdev_put(bdev, ca);
if (ret == -ENOMEM)
err = "cache_alloc(): -ENOMEM";
else if (ret == -EPERM)
@@ -2448,6 +2446,7 @@ struct async_reg_args {
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
struct block_device *bdev;
+ void *holder;
};
static void register_bdev_worker(struct work_struct *work)
@@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work)
int fail = false;
struct async_reg_args *args =
container_of(work, struct async_reg_args, reg_work.work);
- struct cached_dev *dc;
-
- dc = kzalloc(sizeof(*dc), GFP_KERNEL);
- if (!dc) {
- fail = true;
- put_page(virt_to_page(args->sb_disk));
- blkdev_put(args->bdev, bcache_kobj);
- goto out;
- }
mutex_lock(&bch_register_lock);
- if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
+ if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder)
+ < 0)
fail = true;
mutex_unlock(&bch_register_lock);
-out:
if (fail)
pr_info("error %s: fail to register backing device\n",
args->path);
@@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work)
int fail = false;
struct async_reg_args *args =
container_of(work, struct async_reg_args, reg_work.work);
- struct cache *ca;
-
- ca = kzalloc(sizeof(*ca), GFP_KERNEL);
- if (!ca) {
- fail = true;
- put_page(virt_to_page(args->sb_disk));
- blkdev_put(args->bdev, bcache_kobj);
- goto out;
- }
/* blkdev_put() will be called in bch_cache_release() */
- if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
+ if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder))
fail = true;
-out:
if (fail)
pr_info("error %s: fail to register cache device\n",
args->path);
@@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args)
queue_delayed_work(system_wq, &args->reg_work, 10);
}
+static void *alloc_holder_object(struct cache_sb *sb)
+{
+ if (SB_IS_BDEV(sb))
+ return kzalloc(sizeof(struct cached_dev), GFP_KERNEL);
+ return kzalloc(sizeof(struct cache), GFP_KERNEL);
+}
+
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
@@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
char *path = NULL;
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
- struct block_device *bdev;
+ struct block_device *bdev, *bdev2;
+ void *holder = NULL;
ssize_t ret;
bool async_registration = false;
+ bool quiet = false;
#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
async_registration = true;
@@ -2558,10 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
ret = -EINVAL;
err = "failed to open device";
- bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE,
- bcache_kobj, NULL);
+ bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
+ if (IS_ERR(bdev))
+ goto out_free_sb;
+
+ err = "failed to set blocksize";
+ if (set_blocksize(bdev, 4096))
+ goto out_blkdev_put;
+
+ err = read_super(sb, bdev, &sb_disk);
+ if (err)
+ goto out_blkdev_put;
+
+ holder = alloc_holder_object(sb);
+ if (!holder) {
+ ret = -ENOMEM;
+ err = "cannot allocate memory";
+ goto out_put_sb_page;
+ }
+
+ /* Now reopen in exclusive mode with proper holder */
+ bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ holder, NULL);
+ blkdev_put(bdev, NULL);
+ bdev = bdev2;
if (IS_ERR(bdev)) {
- if (bdev == ERR_PTR(-EBUSY)) {
+ ret = PTR_ERR(bdev);
+ bdev = NULL;
+ if (ret == -EBUSY) {
dev_t dev;
mutex_lock(&bch_register_lock);
@@ -2571,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
else
err = "device busy";
mutex_unlock(&bch_register_lock);
- if (attr == &ksysfs_register_quiet)
- goto done;
+ if (attr == &ksysfs_register_quiet) {
+ quiet = true;
+ ret = size;
+ }
}
- goto out_free_sb;
+ goto out_free_holder;
}
- err = "failed to set blocksize";
- if (set_blocksize(bdev, 4096))
- goto out_blkdev_put;
-
- err = read_super(sb, bdev, &sb_disk);
- if (err)
- goto out_blkdev_put;
-
err = "failed to register device";
if (async_registration) {
@@ -2595,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!args) {
ret = -ENOMEM;
err = "cannot allocate memory";
- goto out_put_sb_page;
+ goto out_free_holder;
}
args->path = path;
args->sb = sb;
args->sb_disk = sb_disk;
args->bdev = bdev;
+ args->holder = holder;
register_device_async(args);
/* No wait and returns to user space */
goto async_done;
}
if (SB_IS_BDEV(sb)) {
- struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
-
- if (!dc) {
- ret = -ENOMEM;
- err = "cannot allocate memory";
- goto out_put_sb_page;
- }
-
mutex_lock(&bch_register_lock);
- ret = register_bdev(sb, sb_disk, bdev, dc);
+ ret = register_bdev(sb, sb_disk, bdev, holder);
mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */
if (ret < 0)
goto out_free_sb;
} else {
- struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-
- if (!ca) {
- ret = -ENOMEM;
- err = "cannot allocate memory";
- goto out_put_sb_page;
- }
-
/* blkdev_put() will be called in bch_cache_release() */
- ret = register_cache(sb, sb_disk, bdev, ca);
+ ret = register_cache(sb, sb_disk, bdev, holder);
if (ret)
goto out_free_sb;
}
-done:
kfree(sb);
kfree(path);
module_put(THIS_MODULE);
async_done:
return size;
+out_free_holder:
+ kfree(holder);
out_put_sb_page:
put_page(virt_to_page(sb_disk));
out_blkdev_put:
- blkdev_put(bdev, register_bcache);
+ if (bdev)
+ blkdev_put(bdev, holder);
out_free_sb:
kfree(sb);
out_free_path:
@@ -2656,7 +2650,8 @@ out_free_path:
out_module_put:
module_put(THIS_MODULE);
out:
- pr_info("error %s: %s\n", path?path:"", err);
+ if (!quiet)
+ pr_info("error %s: %s\n", path?path:"", err);
return ret;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cf3733c90c47..2e38ef421d69 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
{
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
- mutex_init(&mddev->delete_mutex);
mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks);
INIT_LIST_HEAD(&mddev->all_mddevs);
@@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev)
static const struct attribute_group md_redundancy_group;
-static void md_free_rdev(struct mddev *mddev)
+void mddev_unlock(struct mddev *mddev)
{
struct md_rdev *rdev;
struct md_rdev *tmp;
+ LIST_HEAD(delete);
- mutex_lock(&mddev->delete_mutex);
- if (list_empty(&mddev->deleting))
- goto out;
-
- list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
- list_del_init(&rdev->same_set);
- kobject_del(&rdev->kobj);
- export_rdev(rdev, mddev);
- }
-out:
- mutex_unlock(&mddev->delete_mutex);
-}
+ if (!list_empty(&mddev->deleting))
+ list_splice_init(&mddev->deleting, &delete);
-void mddev_unlock(struct mddev *mddev)
-{
if (mddev->to_remove) {
/* These cannot be removed under reconfig_mutex as
* an access to the files will try to take reconfig_mutex
@@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev)
} else
mutex_unlock(&mddev->reconfig_mutex);
- md_free_rdev(mddev);
+ list_for_each_entry_safe(rdev, tmp, &delete, same_set) {