summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/md/md-cluster.txt3
-rw-r--r--MAINTAINERS7
-rw-r--r--block/badblocks.c2
-rw-r--r--drivers/md/Kconfig5
-rw-r--r--drivers/md/Makefile5
-rw-r--r--drivers/md/dm-raid.c12
-rw-r--r--drivers/md/md-bitmap.c (renamed from drivers/md/bitmap.c)27
-rw-r--r--drivers/md/md-bitmap.h (renamed from drivers/md/bitmap.h)0
-rw-r--r--drivers/md/md-cluster.c12
-rw-r--r--drivers/md/md-faulty.c (renamed from drivers/md/faulty.c)0
-rw-r--r--drivers/md/md-linear.c (renamed from drivers/md/linear.c)2
-rw-r--r--drivers/md/md-linear.h (renamed from drivers/md/linear.h)0
-rw-r--r--drivers/md/md-multipath.c (renamed from drivers/md/multipath.c)4
-rw-r--r--drivers/md/md-multipath.h (renamed from drivers/md/multipath.h)0
-rw-r--r--drivers/md/md.c147
-rw-r--r--drivers/md/md.h20
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c78
-rw-r--r--drivers/md/raid10.c169
-rw-r--r--drivers/md/raid10.h6
-rw-r--r--drivers/md/raid5-cache.c44
-rw-r--r--drivers/md/raid5-log.h2
-rw-r--r--drivers/md/raid5-ppl.c6
-rw-r--r--drivers/md/raid5.c79
24 files changed, 409 insertions, 223 deletions
diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt
index 82ee51604e9a..e1055f105cf5 100644
--- a/Documentation/md/md-cluster.txt
+++ b/Documentation/md/md-cluster.txt
@@ -1,4 +1,5 @@
-The cluster MD is a shared-device RAID for a cluster.
+The cluster MD is a shared-device RAID for a cluster, it supports
+two levels: raid1 and raid10 (limited support).
1. On-disk format
diff --git a/MAINTAINERS b/MAINTAINERS
index ba3d8c197d92..8604cf64a169 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4103,6 +4103,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git
T: quilt http://people.redhat.com/agk/patches/linux/editing/
S: Maintained
F: Documentation/device-mapper/
+F: drivers/md/Makefile
+F: drivers/md/Kconfig
F: drivers/md/dm*
F: drivers/md/persistent-data/
F: include/linux/device-mapper.h
@@ -12487,7 +12489,10 @@ M: Shaohua Li <shli@kernel.org>
L: linux-raid@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
S: Supported
-F: drivers/md/
+F: drivers/md/Makefile
+F: drivers/md/Kconfig
+F: drivers/md/md*
+F: drivers/md/raid*
F: include/linux/raid/
F: include/uapi/linux/raid/
diff --git a/block/badblocks.c b/block/badblocks.c
index 43c71166e1e2..91f7bcf979d3 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -178,7 +178,7 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
if (bb->shift < 0)
/* badblocks are disabled */
- return 0;
+ return 1;
if (bb->shift) {
/* round the start down, and the end up */
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 4a249ee86364..83b9362be09c 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -178,7 +178,7 @@ config MD_FAULTY
config MD_CLUSTER
- tristate "Cluster Support for MD (EXPERIMENTAL)"
+ tristate "Cluster Support for MD"
depends on BLK_DEV_MD
depends on DLM
default n
@@ -188,7 +188,8 @@ config MD_CLUSTER
nodes in the cluster can access the MD devices simultaneously.
This brings the redundancy (and uptime) of RAID levels across the
- nodes of the cluster.
+ nodes of the cluster. Currently, it can work with raid1 and raid10
+ (limited support).
If unsure, say N.
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index e94b6f9be941..f701bb211783 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -19,9 +19,12 @@ dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
dm-cache-smq-y += dm-cache-policy-smq.o
dm-era-y += dm-era-target.o
dm-verity-y += dm-verity-target.o
-md-mod-y += md.o bitmap.o
+md-mod-y += md.o md-bitmap.o
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
+linear-y += md-linear.o
+multipath-y += md-multipath.o
+faulty-y += md-faulty.o
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index a25eebd98996..366c625b9591 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -12,7 +12,7 @@
#include "raid1.h"
#include "raid5.h"
#include "raid10.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
#include <linux/device-mapper.h>
@@ -3630,8 +3630,11 @@ static void raid_postsuspend(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
- if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+ if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+ mddev_lock_nointr(&rs->md);
mddev_suspend(&rs->md);
+ mddev_unlock(&rs->md);
+ }
rs->md.ro = 1;
}
@@ -3888,8 +3891,11 @@ static void raid_resume(struct dm_target *ti)
if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+ if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+ mddev_lock_nointr(mddev);
mddev_resume(mddev);
+ mddev_unlock(mddev);
+ }
}
static struct target_type raid_target = {
diff --git a/drivers/md/bitmap.c b/drivers/md/md-bitmap.c
index 4d8ed74efadf..239c7bb3929b 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -29,7 +29,7 @@
#include <linux/seq_file.h>
#include <trace/events/block.h>
#include "md.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
static inline char *bmname(struct bitmap *bitmap)
{
@@ -459,7 +459,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
/* rocking back to read-only */
bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
- sb->state = cpu_to_le32(bitmap->flags);
+ /*
+ * clear BITMAP_WRITE_ERROR bit to protect against the case that
+ * a bitmap write error occurred but the later writes succeeded.
+ */
+ sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR));
/* Just in case these have been changed via sysfs: */
sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
@@ -625,7 +629,7 @@ re_read:
err = read_sb_page(bitmap->mddev,
offset,
sb_page,
- 0, PAGE_SIZE);
+ 0, sizeof(bitmap_super_t));
}
if (err)
return err;
@@ -1816,6 +1820,12 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot)
BUG_ON(file && mddev->bitmap_info.offset);
+ if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+ pr_notice("md/raid:%s: array with journal cannot have bitmap\n",
+ mdname(mddev));
+ return ERR_PTR(-EBUSY);
+ }
+
bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
if (!bitmap)
return ERR_PTR(-ENOMEM);
@@ -2123,7 +2133,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
if (store.sb_page && bitmap->storage.sb_page)
memcpy(page_address(store.sb_page),
page_address(bitmap->storage.sb_page),
- PAGE_SIZE);
+ sizeof(bitmap_super_t));
bitmap_file_unmap(&bitmap->storage);
bitmap->storage = store;
@@ -2152,6 +2162,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
for (k = 0; k < page; k++) {
kfree(new_bp[k].map);
}
+ kfree(new_bp);
/* restore some fields from old_counts */
bitmap->counts.bp = old_counts.bp;
@@ -2202,6 +2213,14 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
block += old_blocks;
}
+ if (bitmap->counts.bp != old_counts.bp) {
+ unsigned long k;
+ for (k = 0; k < old_counts.pages; k++)
+ if (!old_counts.bp[k].hijacked)
+ kfree(old_counts.bp[k].map);
+ kfree(old_counts.bp);
+ }
+
if (!init) {
int i;
while (block < (chunks << chunkshift)) {
diff --git a/drivers/md/bitmap.h b/drivers/md/md-bitmap.h
index 5df35ca90f58..5df35ca90f58 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/md-bitmap.h
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 03082e17c65c..79bfbc840385 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -15,7 +15,7 @@
#include <linux/sched.h>
#include <linux/raid/md_p.h>
#include "md.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
#include "md-cluster.h"
#define LVB_SIZE 64
@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
static void remove_suspend_info(struct mddev *mddev, int slot)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
+ mddev->pers->quiesce(mddev, 1);
spin_lock_irq(&cinfo->suspend_lock);
__remove_suspend_info(cinfo, slot);
spin_unlock_irq(&cinfo->suspend_lock);
- mddev->pers->quiesce(mddev, 2);
+ mddev->pers->quiesce(mddev, 0);
}
@@ -492,13 +493,12 @@ static void process_suspend_info(struct mddev *mddev,
s->lo = lo;
s->hi = hi;
mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
spin_lock_irq(&cinfo->suspend_lock);
/* Remove existing entry (if exists) before adding */
__remove_suspend_info(cinfo, slot);
list_add(&s->list, &cinfo->suspend_list);
spin_unlock_irq(&cinfo->suspend_lock);
- mddev->pers->quiesce(mddev, 2);
+ mddev->pers->quiesce(mddev, 0);
}
static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
@@ -1094,7 +1094,7 @@ static void metadata_update_cancel(struct mddev *mddev)
/*
* return 0 if all the bitmaps have the same sync_size
*/
-int cluster_check_sync_size(struct mddev *mddev)
+static int cluster_check_sync_size(struct mddev *mddev)
{
int i, rv;
bitmap_super_t *sb;
@@ -1478,7 +1478,7 @@ static struct md_cluster_operations cluster_ops = {
static int __init cluster_init(void)
{
- pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
+ pr_warn("md-cluster: support raid1 and raid10 (limited support)\n");
pr_info("Registering Cluster MD functions\n");
register_md_cluster_operations(&cluster_ops, THIS_MODULE);
return 0;
diff --git a/drivers/md/faulty.c b/drivers/md/md-faulty.c
index 38264b38420f..38264b38420f 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/md-faulty.c
diff --git a/drivers/md/linear.c b/drivers/md/md-linear.c
index c464fb48039a..773fc70dced7 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/md-linear.c
@@ -23,7 +23,7 @@
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
-#include "linear.h"
+#include "md-linear.h"
/*
* find which device holds a particular offset
diff --git a/drivers/md/linear.h b/drivers/md/md-linear.h
index 8381d651d4ed..8381d651d4ed 100644
--- a/drivers/md/linear.h
+++ b/drivers/md/md-linear.h
diff --git a/drivers/md/multipath.c b/drivers/md/md-multipath.c
index b68e0666b9b0..e40065bdbfc8 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/md-multipath.c
@@ -25,7 +25,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include "md.h"
-#include "multipath.h"
+#include "md-multipath.h"
#define MAX_WORK_PER_DISK 128
@@ -243,7 +243,6 @@ static void print_multipath_conf (struct mpconf *conf)
static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
{
struct mpconf *conf = mddev->private;
- struct request_queue *q;
int err = -EEXIST;
int path;
struct multipath_info *p;
@@ -257,7 +256,6 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
for (path = first; path <= last; path++)
if ((p=conf->multipaths+path)->rdev == NULL) {
- q = rdev->bdev->bd_disk->queue;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
diff --git a/drivers/md/multipath.h b/drivers/md/md-multipath.h
index 0adb941f485a..0adb941f485a 100644
--- a/drivers/md/multipath.h
+++ b/drivers/md/md-multipath.h
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 447ddcbc9566..09c3af3dcdca 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -69,7 +69,7 @@
#include <trace/events/block.h>
#include "md.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
#include "md-cluster.h"
#ifndef MODULE
@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
* call has finished, the bio has been linked into some internal structure
* and so is visible to ->quiesce(), so we don't need the refcount any more.
*/
+static bool is_suspended(struct mddev *mddev, struct bio *bio)
+{
+ if (mddev->suspended)
+ return true;
+ if (bio_data_dir(bio) != WRITE)
+ return false;
+ if (mddev->suspend_lo >= mddev->suspend_hi)
+ return false;
+ if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
+ return false;
+ if (bio_end_sector(bio) < mddev->suspend_lo)
+ return false;
+ return true;
+}
+
void md_handle_request(struct mddev *mddev, struct bio *bio)
{
check_suspended:
rcu_read_lock();
- if (mddev->suspended) {
+ if (is_suspended(mddev, bio)) {
DEFINE_WAIT(__wait);
for (;;) {
prepare_to_wait(&mddev->sb_wait, &__wait,
TASK_UNINTERRUPTIBLE);
- if (!mddev->suspended)
+ if (!is_suspended(mddev, bio))
break;
rcu_read_unlock();
schedule();
@@ -344,12 +359,17 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
void mddev_suspend(struct mddev *mddev)
{
WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
+ lockdep_assert_held(&mddev->reconfig_mutex);
if (mddev->suspended++)
return;
synchronize_rcu();
wake_up(&mddev->sb_wait);
+ set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
+ smp_mb__after_atomic();
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
mddev->pers->quiesce(mddev, 1);
+ clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
+ wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
del_timer_sync(&mddev->safemode_timer);
}
@@ -357,6 +377,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
{
+ lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended)
return;
wake_up(&mddev->sb_wait);
@@ -663,6 +684,7 @@ void mddev_unlock(struct mddev *mddev)
*/
spin_lock(&pers_lock);
md_wakeup_thread(mddev->thread);
+ wake_up(&mddev->sb_wait);
spin_unlock(&pers_lock);
}
EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -2313,7 +2335,7 @@ static void export_array(struct mddev *mddev)
static bool set_in_sync(struct mddev *mddev)
{
- WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock));
+ lockdep_assert_held(&mddev->lock);
if (!mddev->in_sync) {
mddev->sync_checkers++;
spin_unlock(&mddev->lock);
@@ -2432,10 +2454,18 @@ repeat:
}
}
- /* First make sure individual recovery_offsets are correct */
+ /*
+ * First make sure individual recovery_offsets are correct
+ * curr_resync_completed can only be used during recovery.
+ * During reshape/resync it might use array-addresses rather
+ * that device addresses.
+ */
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
mddev->delta_disks >= 0 &&
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
!test_bit(Journal, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
mddev->curr_resync_completed > rdev->recovery_offset)
@@ -4824,7 +4854,7 @@ suspend_lo_show(struct mddev *mddev, char *page)
static ssize_t
suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
{
- unsigned long long old, new;
+ unsigned long long new;
int err;
err = kstrtoull(buf, 10, &new);
@@ -4840,16 +4870,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL)
goto unlock;
- old = mddev->suspend_lo;
+ mddev_suspend(mddev);
mddev->suspend_lo = new;
- if (new >= old)
- /* Shrinking suspended region */
- mddev->pers->quiesce(mddev, 2);
- else {
- /* Expanding suspended region - need to wait */
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- }
+ mddev_resume(mddev);
+
err = 0;
unlock:
mddev_unlock(mddev);
@@ -4867,7 +4891,7 @@ suspend_hi_show(struct mddev *mddev, char *page)
static ssize_t
suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
{
- unsigned long long old, new;
+ unsigned long long new;
int err;
err = kstrtoull(buf, 10, &new);
@@ -4880,19 +4904,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
if (err)
return err;
err = -EINVAL;
- if (mddev->pers == NULL ||
- mddev->pers->quiesce == NULL)
+ if (mddev->pers == NULL)
goto unlock;
- old = mddev->suspend_hi;
+
+ mddev_suspend(mddev);
mddev->suspend_hi = new;
- if (new <= old)
- /* Shrinking suspended region */
- mddev->pers->quiesce(mddev, 2);
- else {
- /* Expanding suspended region - need to wait */
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- }
+ mddev_resume(mddev);
+
err = 0;
unlock:
mddev_unlock(mddev);
@@ -5834,8 +5852,14 @@ void md_stop(struct mddev *mddev)
* This is called from dm-raid
*/
__md_stop(mddev);
- if (mddev->bio_set)
+ if (mddev->bio_set) {
bioset_free(mddev->bio_set);
+ mddev->bio_set = NULL;
+ }
+ if (mddev->sync_set) {
+ bioset_free(mddev->sync_set);
+ mddev->sync_set = NULL;
+ }
}
EXPORT_SYMBOL_GPL(md_stop);
@@ -6362,7 +6386,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
break;
}
}
- if (has_journal) {
+ if (has_journal || mddev->bitmap) {
export_rdev(rdev);
return -EBUSY;
}
@@ -6618,22 +6642,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
return -ENOENT; /* cannot remove what isn't there */
err = 0;
if (mddev->pers) {
- mddev->pers->quiesce(mddev, 1);
if (fd >= 0) {
struct bitmap *bitmap;
bitmap = bitmap_create(mddev, -1);
+ mddev_suspend(mddev);
if (!IS_ERR(bitmap)) {
mddev->bitmap = bitmap;
err = bitmap_load(mddev);
} else
err = PTR_ERR(bitmap);
- }
- if (fd < 0 || err) {
+ if (err) {
+ bitmap_destroy(mddev);
+ fd = -1;
+ }
+ mddev_resume(mddev);
+ } else if (fd < 0) {
+ mddev_suspend(mddev);
bitmap_destroy(mddev);
- fd = -1; /* make sure to put the file */
+ mddev_resume(mddev);
}
- mddev->pers->quiesce(mddev, 0);
}
if (fd < 0) {
struct file *f = mddev->bitmap_info.file;
@@ -6735,7 +6763,7 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
{
- WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
+ lockdep_assert_held(&mddev->reconfig_mutex);
if (mddev->external_size)
return;
@@ -6917,8 +6945,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
mddev->bitmap_info.default_space;
- mddev->pers->quiesce(mddev, 1);
bitmap = bitmap_create(mddev, -1);
+ mddev_suspend(mddev);
if (!IS_ERR(bitmap)) {
mddev->bitmap = bitmap;
rv = bitmap_load(mddev);
@@ -6926,7 +6954,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
rv = PTR_ERR(bitmap);
if (rv)
bitmap_destroy(mddev);
- mddev->pers->quiesce(mddev, 0);
+ mddev_resume(mddev);
} else {
/* remove the bitmap */
if (!mddev->bitmap) {
@@ -6949,9 +6977,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.nodes = 0;
md_cluster_ops->leave(mddev);
}
- mddev->pers->quiesce(mddev, 1);
+ mddev_suspend(mddev);
bitmap_destroy(mddev);
- mddev->pers->quiesce(mddev, 0);
+ mddev_resume(mddev);
mddev->bitmap_info.offset = 0;
}
}
@@ -7468,8 +7496,8 @@ void md_wakeup_thread(struct md_thread *thread)
{
if (thread) {
pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
- if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags))
- wake_up(&thread->wqueue);
+ set_bit(THREAD_WAKEUP, &thread->flags);
+ wake_up(&thread->wqueue);
}
}
EXPORT_SYMBOL(md_wakeup_thread);
@@ -8039,7 +8067,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
+ mddev->suspended);
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
percpu_ref_put(&mddev->writes_pending);
return false;
@@ -8110,7 +8139,6 @@ void md_allow_write(struct mddev *mddev)
sysfs_notify_dirent_safe(mddev->sysfs_state);
/* wait for the dirty state to be recorded in the metadata */
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
} else
spin_unlock(&mddev->lock);
@@ -8477,16 +8505,19 @@ void md_do_sync(struct md_thread *thread)
} else {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector;
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- mddev->delta_disks >= 0 &&
- !test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < mddev->curr_resync)
- rdev->recovery_offset = mddev->curr_resync;
- rcu_read_unlock();
+ if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (rdev->raid_disk >= 0 &&
+ mddev->delta_disks >= 0 &&
+ !test_bit(Journal, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags) &&
+ !test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < mddev->curr_resync)
+ rdev->recovery_offset = mddev->curr_resync;
+ rcu_read_unlock();
+ }
}
}
skip:
@@ -8813,6 +8844,16 @@ void md_check_recovery(struct mddev *mddev)
unlock:
wake_up(&mddev->sb_wait);
mddev_unlock(mddev);
+ } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
+ /* Write superblock - thread that called mddev_suspend()
+ * holds reconfig_mutex for us.
+ */
+ set_bit(MD_UPDATING_SB, &mddev->flags);
+ smp_mb__after_atomic();
+ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
+ md_update_sb(mddev, 0);
+ clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
+ wake_up(&mddev->sb_wait);
}
}
EXPORT_SYMBOL(md_check_recovery);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d8287d3cd1bf..7d6bcf0eba0c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -237,6 +237,12 @@ enum mddev_flags {
*/
MD_HAS_PPL, /* The raid array has PPL feature set */
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
+ MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
+ * the metadata without taking reconfig_mutex.
+ */
+ MD_UPDATING_SB, /* md_check_recovery is updating the metadata
+ * without explicitly holding reconfig_mutex.
+ */
};
enum mddev_sb_flags {
@@ -494,11 +500,6 @@ static inline void mddev_lock_nointr(struct mddev *mddev)
mutex_lock(&mddev->reconfig_mutex);
}
-static inline int mddev_is_locked(struct mddev *mddev)
-{
- return mutex_is_locked(&mddev->reconfig_mutex);
-}
-
static inline int mddev_trylock(struct mddev *mddev)
{
return mutex_trylock(&mddev->reconfig_mutex);
@@ -538,12 +539,11 @@ struct md_personality
int (*check_reshape) (struct mddev *mddev);
int (*start_reshape) (struct mddev *mddev);
void (*finish_reshape) (struct mddev *mddev);
- /* quiesce moves between quiescence states
- * 0 - fully active
- * 1 - no new requests allowed
- * others - reserved
+ /* quiesce suspends or resumes internal processing.
+ * 1 - stop new actions and wait for action io to complete
+ * 0 - return to normal behaviour
*/
- void (*quiesce) (struct mddev *mddev, int state);
+ void (*quiesce) (struct mddev *mddev, int quiesce);
/* takeover is used to transition an array from one
* personality to another. The new personality must be able
* to handle the data in the current layout.
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 5a00fc118470..5ecba9eef441 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev *mddev)
return ERR_PTR(-EINVAL);
}
-static void raid0_quiesce(struct mddev *mddev, int state)
+static void raid0_quiesce(struct mddev *mddev, int quiesce)
{
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f3f3e40dc9d8..cc9d337a1ed3 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -37,13 +37,12 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
-#include <linux/sched/signal.h>
#include <trace/events/block.h>
#include "md.h"
#include "raid1.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
#define UNSUPPORTED_MDDEV_FLAGS \
((1L << MD_HAS_JOURNAL) | \
@@ -990,14 +989,6 @@ static void wait_barrier(struct r1conf *conf, sector_t sector_nr)
_wait_barrier(conf, idx);
}
-static void wait_all_barriers(struct r1conf *conf)
-{
- int idx;
-
- for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
- _wait_barrier(conf, idx);
-}
-
static void _allow_barrier(struct r1conf *conf, int idx)
{
atomic_dec(&conf->nr_pending[idx]);
@@ -1011,14 +1002,6 @@ static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
_allow_barrier(conf, idx);
}
-static void allow_all_barriers(struct r1conf *conf)
-{
- int idx;
-
- for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
- _allow_barrier(conf, idx);
-}
-
/* conf->resync_lock should be held */
static int get_unqueued_pending(struct r1conf *conf)
{
@@ -1303,42 +1286,28 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
int first_clone;
int max_sectors;
- /*
- * Register the new request and wait if the reconstruction
- * thread has put up a bar for new requests.
- * Continue immediately if no resync is active currently.
- */
-
-
- if ((bio_end_sector(bio) > mddev->suspend_lo &&
- bio->bi_iter.bi_sector < mddev->suspend_hi) ||
- (mddev_is_clustered(mddev) &&
+ if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
+ bio->bi_iter.bi_sector, bio_end_sector(bio))) {
- /*
- * As the suspend_* range is controlled by userspace, we want
- * an interruptible wait.
- */
DEFINE_WAIT(w);
for (;;) {
- sigset_t full, old;
prepare_to_wait(&conf->wait_barrier,
- &w, TASK_INTERRUPTIBLE);
- if (bio_end_sector(bio) <= mddev->suspend_lo ||
- bio->bi_iter.bi_sector >= mddev->suspend_hi ||
- (mddev_is_clustered(mddev) &&
- !md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector,
- bio_end_sector(bio))))
+ &w, TASK_IDLE);
+ if (!md_cluster_ops->area_resyncing(mddev, WRITE,
+ bio->bi_iter.bi_sector,
+ bio_end_sector(bio)))
break;
- sigfillset(&full);
- sigprocmask(SIG_BLOCK, &full, &old);
schedule();
- sigprocmask(SIG_SETMASK, &old, NULL);
}
finish_wait(&conf->wait_barrier, &w);
}
+
+ /*
+ * Register the new request and wait if the reconstruction
+ * thread has put up a bar for new requests.
+ * Continue immediately if no resync is active currently.
+ */
wait_barrier(conf, bio->bi_iter.bi_sector);
r1_bio = alloc_r1bio(mddev, bio);
@@ -1654,8 +1623,12 @@ static void print_conf(struct r1conf *conf)
static void close_sync(struct r1conf *conf)
{
- wait_all_barriers(conf);
- allow_all_barriers(conf);
+ int idx;
+
+ for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++) {
+ _wait_barrier(conf, idx);
+ _allow_barrier(conf, idx);
+ }
mempool_destroy(conf->r1buf_pool);
conf->r1buf_pool = NULL;
@@ -3277,21 +3250,14 @@ static int raid1_reshape(struct mddev *mddev)
return 0;
}
-static void raid1_quiesce(struct mddev *mddev, int state)
+st