diff options
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/dm-bufio.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 13 | ||||
| -rw-r--r-- | drivers/md/dm-cache-target.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-delay.c | 23 | ||||
| -rw-r--r-- | drivers/md/dm-snap.c | 71 | ||||
| -rw-r--r-- | drivers/md/dm-stats.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm-thin-metadata.c | 8 | ||||
| -rw-r--r-- | drivers/md/dm-thin-metadata.h | 1 | ||||
| -rw-r--r-- | drivers/md/dm-thin.c | 66 | ||||
| -rw-r--r-- | drivers/md/md.c | 147 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-array.c | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 6 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 7 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 32 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 8 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 162 | ||||
| -rw-r--r-- | drivers/md/raid1.h | 15 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 6 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 425 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 16 |
21 files changed, 760 insertions, 269 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 173cbb20d104..54bdd923316f 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1717,6 +1717,11 @@ static int __init dm_bufio_init(void) { __u64 mem; + dm_bufio_allocated_kmem_cache = 0; + dm_bufio_allocated_get_free_pages = 0; + dm_bufio_allocated_vmalloc = 0; + dm_bufio_current_allocated = 0; + memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 416b7b752a6e..64780ad73bb0 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -730,15 +730,18 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, int r = 0; bool updated = updated_this_tick(mq, e); - requeue_and_update_tick(mq, e); - if ((!discarded_oblock && updated) || - !should_promote(mq, e, discarded_oblock, data_dir)) + !should_promote(mq, e, discarded_oblock, data_dir)) { + requeue_and_update_tick(mq, e); result->op = POLICY_MISS; - else if (!can_migrate) + + } else if (!can_migrate) r = -EWOULDBLOCK; - else + + else { + requeue_and_update_tick(mq, e); r = pre_cache_to_cache(mq, e, result); + } return r; } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9efcf1059b99..1b1469ebe5cb 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2755,7 +2755,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) { int r; - r = dm_cache_resize(cache->cmd, cache->cache_size); + r = dm_cache_resize(cache->cmd, new_size); if (r) { DMERR("could not resize cache metadata"); return r; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 496d5f3646a5..2f91d6d4a2cc 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -20,6 +20,7 @@ struct delay_c { struct timer_list delay_timer; struct mutex timer_lock; + struct workqueue_struct *kdelayd_wq; struct work_struct flush_expired_bios; struct list_head delayed_bios; atomic_t may_delay; @@ -45,14 +46,13 @@ struct dm_delay_info { static DEFINE_MUTEX(delayed_bios_lock); -static struct workqueue_struct *kdelayd_wq; static struct kmem_cache *delayed_cache; static void handle_delayed_timer(unsigned long data) { struct delay_c *dc = (struct delay_c *)data; - queue_work(kdelayd_wq, &dc->flush_expired_bios); + queue_work(dc->kdelayd_wq, &dc->flush_expired_bios); } static void queue_timeout(struct delay_c *dc, unsigned long expires) @@ -191,6 +191,12 @@ out: goto bad_dev_write; } + dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); + if (!dc->kdelayd_wq) { + DMERR("Couldn't start kdelayd"); + goto bad_queue; + } + setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc); INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); @@ -203,6 +209,8 @@ out: ti->private = dc; return 0; +bad_queue: + mempool_destroy(dc->delayed_pool); bad_dev_write: if (dc->dev_write) dm_put_device(ti, dc->dev_write); @@ -217,7 +225,7 @@ static void delay_dtr(struct dm_target *ti) { struct delay_c *dc = ti->private; - flush_workqueue(kdelayd_wq); + destroy_workqueue(dc->kdelayd_wq); dm_put_device(ti, dc->dev_read); @@ -350,12 +358,6 @@ static int __init dm_delay_init(void) { int r = -ENOMEM; - kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); - if (!kdelayd_wq) { - DMERR("Couldn't start kdelayd"); - goto bad_queue; - } - delayed_cache = KMEM_CACHE(dm_delay_info, 0); if (!delayed_cache) { DMERR("Couldn't create delayed bio cache."); @@ -373,8 +375,6 @@ static int __init dm_delay_init(void) bad_register: kmem_cache_destroy(delayed_cache); bad_memcache: - destroy_workqueue(kdelayd_wq); -bad_queue: return r; } @@ -382,7 +382,6 @@ static void __exit dm_delay_exit(void) { dm_unregister_target(&delay_target); kmem_cache_destroy(delayed_cache); - destroy_workqueue(kdelayd_wq); } /* Module hooks */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index aec57d76db5d..944690bafd93 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -66,6 +66,18 @@ struct dm_snapshot { atomic_t pending_exceptions_count; + /* Protected by "lock" */ + sector_t exception_start_sequence; + + /* Protected by kcopyd single-threaded callback */ + sector_t exception_complete_sequence; + + /* + * A list of pending exceptions that completed out of order. + * Protected by kcopyd single-threaded callback. + */ + struct list_head out_of_order_list; + mempool_t *pending_pool; struct dm_exception_table pending; @@ -173,6 +185,14 @@ struct dm_snap_pending_exception { */ int started; + /* There was copying error. */ + int copy_error; + + /* A sequence number, it is used for in-order completion. */ + sector_t exception_sequence; + + struct list_head out_of_order_entry; + /* * For writing a complete chunk, bypassing the copy. */ @@ -1094,6 +1114,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->valid = 1; s->active = 0; atomic_set(&s->pending_exceptions_count, 0); + s->exception_start_sequence = 0; + s->exception_complete_sequence = 0; + INIT_LIST_HEAD(&s->out_of_order_list); init_rwsem(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); @@ -1443,6 +1466,19 @@ static void commit_callback(void *context, int success) pending_complete(pe, success); } +static void complete_exception(struct dm_snap_pending_exception *pe) +{ + struct dm_snapshot *s = pe->snap; + + if (unlikely(pe->copy_error)) + pending_complete(pe, 0); + + else + /* Update the metadata if we are persistent */ + s->store->type->commit_exception(s->store, &pe->e, + commit_callback, pe); +} + /* * Called when the copy I/O has finished. kcopyd actually runs * this code so don't block. @@ -1452,13 +1488,32 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) struct dm_snap_pending_exception *pe = context; struct dm_snapshot *s = pe->snap; - if (read_err || write_err) - pending_complete(pe, 0); + pe->copy_error = read_err || write_err; - else - /* Update the metadata if we are persistent */ - s->store->type->commit_exception(s->store, &pe->e, - commit_callback, pe); + if (pe->exception_sequence == s->exception_complete_sequence) { + s->exception_complete_sequence++; + complete_exception(pe); + + while (!list_empty(&s->out_of_order_list)) { + pe = list_entry(s->out_of_order_list.next, + struct dm_snap_pending_exception, out_of_order_entry); + if (pe->exception_sequence != s->exception_complete_sequence) + break; + s->exception_complete_sequence++; + list_del(&pe->out_of_order_entry); + complete_exception(pe); + } + } else { + struct list_head *lh; + struct dm_snap_pending_exception *pe2; + + list_for_each_prev(lh, &s->out_of_order_list) { + pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry); + if (pe2->exception_sequence < pe->exception_sequence) + break; + } + list_add(&pe->out_of_order_entry, lh); + } } /* @@ -1553,6 +1608,8 @@ __find_pending_exception(struct dm_snapshot *s, return NULL; } + pe->exception_sequence = s->exception_start_sequence++; + dm_insert_exception(&s->pending, &pe->e); return pe; @@ -2192,7 +2249,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 11, 1}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 3d404c1371ed..28a90122a5a8 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -964,6 +964,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv, int __init dm_statistics_init(void) { + shared_memory_amount = 0; dm_stat_need_rcu_barrier = 0; return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 465f08ca62b1..3ba6a3859ce3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -200,6 +200,11 @@ int dm_table_create(struct dm_table **result, fmode_t mode, num_targets = dm_round_up(num_targets, KEYS_PER_NODE); + if (!num_targets) { + kfree(t); + return -ENOMEM; + } + if (alloc_targets(t, num_targets)) { kfree(t); return -ENOMEM; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 60bce435f4fa..8a30ad54bd46 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1697,6 +1697,14 @@ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) up_write(&pmd->root_lock); } +void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + pmd->read_only = false; + dm_bm_set_read_write(pmd->bm); + up_write(&pmd->root_lock); +} + int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_block_t threshold, dm_sm_threshold_fn fn, diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 845ebbe589a9..7bcc0e1d6238 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -193,6 +193,7 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_siz * that nothing is changing. */ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); +void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd); int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_block_t threshold, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2c0cf511ec23..ee29037ffc2e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -640,7 +640,9 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR_LIMIT("dm_thin_insert_block() failed"); + DMERR_LIMIT("%s: dm_thin_insert_block() failed: error = %d", + dm_device_name(pool->pool_md), r); + set_pool_mode(pool, PM_READ_ONLY); cell_error(pool, m->cell); goto out; } @@ -881,32 +883,23 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, } } -static int commit(struct pool *pool) -{ - int r; - - r = dm_pool_commit_metadata(pool->pmd); - if (r) - DMERR_LIMIT("%s: commit failed: error = %d", - dm_device_name(pool->pool_md), r); - - return r; -} - /* * A non-zero return indicates read_only or fail_io mode. * Many callers don't care about the return value. */ -static int commit_or_fallback(struct pool *pool) +static int commit(struct pool *pool) { int r; if (get_pool_mode(pool) != PM_WRITE) return -EINVAL; - r = commit(pool); - if (r) + r = dm_pool_commit_metadata(pool->pmd); + if (r) { + DMERR_LIMIT("%s: dm_pool_commit_metadata failed: error = %d", + dm_device_name(pool->pool_md), r); set_pool_mode(pool, PM_READ_ONLY); + } return r; } @@ -943,7 +936,9 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) * Try to commit to see if that will free up some * more space. */ - (void) commit_or_fallback(pool); + r = commit(pool); + if (r) + return r; r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); if (r) @@ -957,7 +952,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) * table reload). */ if (!free_blocks) { - DMWARN("%s: no free space available.", + DMWARN("%s: no free data space available.", dm_device_name(pool->pool_md)); spin_lock_irqsave(&pool->lock, flags); pool->no_free_space = 1; @@ -967,8 +962,16 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) } r = dm_pool_alloc_data_block(pool->pmd, result); - if (r) + if (r) { + if (r == -ENOSPC && + !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) && + !free_blocks) { + DMWARN("%s: no free metadata space available.", + dm_device_name(pool->pool_md)); + set_pool_mode(pool, PM_READ_ONLY); + } return r; + } return 0; } @@ -1349,7 +1352,7 @@ static void process_deferred_bios(struct pool *pool) if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) return; - if (commit_or_fallback(pool)) { + if (commit(pool)) { while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; @@ -1397,6 +1400,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) case PM_FAIL: DMERR("%s: switching pool to failure mode", dm_device_name(pool->pool_md)); + dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; pool->process_prepared_mapping = process_prepared_mapping_fail; @@ -1421,6 +1425,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) break; case PM_WRITE: + dm_pool_metadata_read_write(pool->pmd); pool->process_bio = process_bio; pool->process_discard = process_discard; pool->process_prepared_mapping = process_prepared_mapping; @@ -1637,12 +1642,19 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) struct pool_c *pt = ti->private; /* - * We want to make sure that degraded pools are never upgraded. + * We want to make sure that a pool in PM_FAIL mode is never upgraded. */ enum pool_mode old_mode = pool->pf.mode; enum pool_mode new_mode = pt->adjusted_pf.mode; - if (old_mode > new_mode) + /* + * If we were in PM_FAIL mode, rollback of metadata failed. We're + * not going to recover without a thin_repair. So we never let the + * pool move out of the old mode. On the other hand a PM_READ_ONLY + * may have been due to a lack of metadata or data space, and may + * now work (ie. if the underlying devices have been resized). + */ + if (old_mode == PM_FAIL) new_mode = old_mode; pool->ti = ti; @@ -2266,7 +2278,7 @@ static int pool_preresume(struct dm_target *ti) return r; if (need_commit1 || need_commit2) - (void) commit_or_fallback(pool); + (void) commit(pool); return 0; } @@ -2293,7 +2305,7 @@ static void pool_postsuspend(struct dm_target *ti) cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); - (void) commit_or_fallback(pool); + (void) commit(pool); } static int check_arg_count(unsigned argc, unsigned args_required) @@ -2427,7 +2439,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; - (void) commit_or_fallback(pool); + (void) commit(pool); r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) @@ -2489,7 +2501,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) DMWARN("Unrecognised thin pool target message received: %s", argv[0]); if (!r) - (void) commit_or_fallback(pool); + (void) commit(pool); return r; } @@ -2544,7 +2556,7 @@ static void pool_status(struct dm_target *ti, status_type_t type, /* Commit to ensure statistics aren't out-of-date */ if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) - (void) commit_or_fallback(pool); + (void) commit(pool); r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) { diff --git a/drivers/md/md.c b/drivers/md/md.c index 8766eabb0014..21f4d7ff0da2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -112,7 +112,7 @@ static inline int speed_max(struct mddev *mddev) static struct ctl_table_header *raid_table_header; -static ctl_table raid_table[] = { +static struct ctl_table raid_table[] = { { .procname = "speed_limit_min", .data = &sysctl_speed_limit_min, @@ -130,7 +130,7 @@ static ctl_table raid_table[] = { { } }; -static ctl_table raid_dir_table[] = { +static struct ctl_table raid_dir_table[] = { { .procname = "raid", .maxlen = 0, @@ -140,7 +140,7 @@ static ctl_table raid_dir_table[] = { { } }; -static ctl_table raid_root_table[] = { +static struct ctl_table raid_root_table[] = { { .procname = "dev", .maxlen = 0, @@ -562,11 +562,19 @@ static struct mddev * mddev_find(dev_t unit) goto retry; } -static inline int mddev_lock(struct mddev * mddev) +static inline int __must_check mddev_lock(struct mddev * mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); } +/* Sometimes we need to take the lock in a situation where + * failure due to interrupts is not acceptable. + */ +static inline void mddev_lock_nointr(struct mddev * mddev) +{ + mutex_lock(&mddev->reconfig_mutex); +} + static inline int mddev_is_locked(struct mddev *mddev) { return mutex_is_locked(&mddev->reconfig_mutex); @@ -768,16 +776,10 @@ void md_super_wait(struct mddev *mddev) finish_wait(&mddev->sb_wait, &wq); } -static void bi_complete(struct bio *bio, int error) -{ - complete((struct completion*)bio->bi_private); -} - int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int rw, bool metadata_op) { struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); - struct completion event; int ret; rw |= REQ_SYNC; @@ -793,11 +795,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, else bio->bi_sector = sector + rdev->data_offset; bio_add_page(bio, page, size, 0); - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(rw, bio); - wait_for_completion(&event); + submit_bio_wait(rw, bio); ret = test_bit(BIO_UPTODATE, &bio->bi_flags); bio_put(bio); @@ -2978,7 +2976,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) for_each_mddev(mddev, tmp) { struct md_rdev *rdev2; - mddev_lock(mddev); + mddev_lock_nointr(mddev); rdev_for_each(rdev2, mddev) if (rdev->bdev == rdev2->bdev && rdev != rdev2 && @@ -2994,7 +2992,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) break; } } - mddev_lock(my_mddev); + mddev_lock_nointr(my_mddev); if (overlap) { /* Someone else could have slipped in a size * change here, but doing so is just silly. @@ -3580,6 +3578,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->in_sync = 1; del_timer_sync(&mddev->safemode_timer); } + blk_set_stacking_limits(&mddev->queue->limits); pers->run(mddev); set_bit(MD_CHANGE_DEVS, &mddev->flags); mddev_resume(mddev); @@ -5258,7 +5257,7 @@ static void __md_stop_writes(struct mddev *mddev) void md_stop_writes(struct mddev *mddev) { - mddev_lock(mddev); + mddev_lock_nointr(mddev); __md_stop_writes(mddev); mddev_unlock(mddev); } @@ -5291,20 +5290,35 @@ EXPORT_SYMBOL_GPL(md_stop); static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) { int err = 0; + int did_freeze = 0; + + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { + did_freeze = 1; + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + md_wakeup_thread(mddev->thread); + } + if (mddev->sync_thread) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + /* Thread might be blocked waiting for metadata update + * which will now never happen */ + wake_up_process(mddev->sync_thread->tsk); + } + mddev_unlock(mddev); + wait_event(resync_wait, mddev->sync_thread == NULL); + mddev_lock_nointr(mddev); + mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > !!bdev) { + if (atomic_read(&mddev->openers) > !!bdev || + mddev->sync_thread || + (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { printk("md: %s still in use.\n",mdname(mddev)); + if (did_freeze) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + md_wakeup_thread(mddev->thread); + } err = -EBUSY; goto out; } - if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) { - /* Someone opened the device since we flushed it - * so page cache could be dirty and it is too late - * to flush. So abort - */ - mutex_unlock(&mddev->open_mutex); - return -EBUSY; - } if (mddev->pers) { __md_stop_writes(mddev); @@ -5315,7 +5329,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) set_disk_ro(mddev->gendisk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_state); - err = 0; + err = 0; } out: mutex_unlock(&mddev->open_mutex); @@ -5331,20 +5345,34 @@ static int do_md_stop(struct mddev * mddev, int mode, { struct gendisk *disk = mddev->gendisk; struct md_rdev *rdev; + int did_freeze = 0; + + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { + did_freeze = 1; + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + md_wakeup_thread(mddev->thread); + } + if (mddev->sync_thread) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + /* Thread might be blocked waiting for metadata update + * which will now never happen */ + wake_up_process(mddev->sync_thread->tsk); + } + mddev_unlock(mddev); + wait_event(resync_wait, mddev->sync_thread == NULL); + mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); if (atomic_read(&mddev->openers) > !!bdev || - mddev->sysfs_active) { + mddev->sysfs_active || + mddev->sync_thread || + (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { printk("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); - return -EBUSY; - } - if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) { - /* Someone opened the device since we flushed it - * so page cache could be dirty and it is too late - * to flush. So abort - */ - mutex_unlock(&mddev->open_mutex); + if (did_freeze) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + md_wakeup_thread(mddev->thread); + } return -EBUSY; } if (mddev->pers) { @@ -6551,7 +6579,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, wait_event(mddev->sb_wait, !test_bit(MD_CHANGE_DEVS, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); - mddev_lock(mddev); + mddev_lock_nointr(mddev); } } else { err = -EROFS; @@ -7361,9 +7389,6 @@ void md_do_sync(struct md_thread *thread) mddev->curr_resync = 2; try_again: - if (kthread_should_stop()) - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) goto skip; for_each_mddev(mddev2, tmp) { @@ -7388,7 +7413,7 @@ void md_do_sync(struct md_thread *thread) * be caught by 'softlockup' */ prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); - if (!kthread_should_stop() && + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && mddev2->curr_resync >= mddev->curr_resync) { printk(KERN_INFO "md: delaying %s of %s" " until %s has finished (they" @@ -7464,7 +7489,7 @@ void md_do_sync(struct md_thread *thread) last_check = 0; if (j>2) { - printk(KERN_INFO + printk(KERN_INFO "md: resuming %s of %s from checkpoint.\n", desc, mdname(mddev)); mddev->curr_resync = j; @@ -7501,7 +7526,8 @@ void md_do_sync(struct md_thread *thread) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } - while (j >= mddev->resync_max && !kthread_should_stop()) { + while (j >= mddev->resync_max && + !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { /* As this condition is controlled by user-space, * we can block indefinitely, so use '_interruptible' * to avoid triggering warnings. @@ -7509,17 +7535,18 @@ void md_do_sync(struct md_thread *thread) flush_signals(current); /* just in case */ wait_event_interruptible(mddev->recovery_wait, mddev->resync_max > j - || kthread_should_stop()); + || test_bit(MD_RECOVERY_INTR, + &mddev->recovery)); } - if (kthread_should_stop()) - goto interrupted; + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + break; sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); - goto out; + break; } if (!skipped) { /* actual IO requested */ @@ -7556,10 +7583,8 @@ void md_do_sync(struct md_thread *thread) last_mark = next; } - - if (kthread_should_stop()) - goto interrupted; - + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + break; /* * this loop exits only if either when we are slower than @@ -7582,11 +7607,12 @@ void md_do_sync(struct md_thread *thread) } } } - printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc); + printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc, + test_bit(MD_RECOVERY_INTR, &mddev->recovery) + ? "interrupted" : "done"); /* * this also signals 'finished resyncing' to md_stop */ - out: blk_finish_plug(&plug); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); @@ -7640,16 +7666,6 @@ void md_do_sync(struct md_thread *thread) set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); return; - - interrupted: - /* - * got a signal, exit. - */ - printk(KERN_INFO - "md: md_do_sync() got signal ... exiting\n"); - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - goto out; - } EXPORT_SYMBOL_GPL(md_do_sync); @@ -7751,7 +7767,7 @@ void md_check_recovery(struct mddev *mddev) if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( - (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) || + (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || @@ -7894,6 +7910,7 @@ void md_reap_sync_thread(struct mddev *mddev) /* resync has finished, collect result */ md_unregister_thread(&mddev->sync_thread); + wake_up(&resync_wait); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index af96e24ec328..1d75b1dc1e2e 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, * The shadow op will often be a noop. Only insert if it really * copied data. */ - if (dm_block_location(*block) != b) + if (dm_block_location(*block) != b) { + /* + * dm_tm_shadow_block will have already decremented the old + * block, but it is still referenced by the btree. We + * increment to stop the insert decrementing it below zero + * when overwriting the old value. + */ + dm_tm_inc(info->btree_info.tm, b); r = insert_ablock(info, index, *block, root); + } return r; } diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a7e8bf296388..064a3c271baa 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -626,6 +626,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm) } EXPORT_SYMBOL_GPL(dm_bm_set_read_only); +void dm_bm_set_read_write(struct dm_block_manager *bm) +{ + bm->read_only = false; +} +EXPORT_SYMBOL_GPL(dm_bm_set_read_write); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { return crc32c(~(u32) 0, data, len) ^ init_xor; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 9a82083a66b6..13cd58e1fe69 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/ |
