diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/alloc.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 57 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 26 | ||||
-rw-r--r-- | drivers/md/bcache/closure.c | 17 | ||||
-rw-r--r-- | drivers/md/bcache/closure.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 16 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 186 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 160 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 55 | ||||
-rw-r--r-- | drivers/md/bcache/util.c | 25 | ||||
-rw-r--r-- | drivers/md/bcache/util.h | 6 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 92 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.h | 4 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 16 | ||||
-rw-r--r-- | drivers/md/dm.c | 2 | ||||
-rw-r--r-- | drivers/md/md-linear.c | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 10 | ||||
-rw-r--r-- | drivers/md/raid0.c | 4 | ||||
-rw-r--r-- | drivers/md/raid1.c | 6 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 | ||||
-rw-r--r-- | drivers/md/raid5.c | 4 |
26 files changed, 582 insertions, 155 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 458e1d38577d..004cc3cc6123 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -287,7 +287,8 @@ do { \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ - if (kthread_should_stop()) { \ + if (kthread_should_stop() || \ + test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \ set_current_state(TASK_RUNNING); \ return 0; \ } \ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 12e5197f186c..d338b7086013 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -188,6 +188,7 @@ #include <linux/refcount.h> #include <linux/types.h> #include <linux/workqueue.h> +#include <linux/kthread.h> #include "bset.h" #include "util.h" @@ -258,10 +259,11 @@ struct bcache_device { struct gendisk *disk; unsigned long flags; -#define BCACHE_DEV_CLOSING 0 -#define BCACHE_DEV_DETACHING 1 -#define BCACHE_DEV_UNLINK_DONE 2 - +#define BCACHE_DEV_CLOSING 0 +#define BCACHE_DEV_DETACHING 1 +#define BCACHE_DEV_UNLINK_DONE 2 +#define BCACHE_DEV_WB_RUNNING 3 +#define BCACHE_DEV_RATE_DW_RUNNING 4 unsigned nr_stripes; unsigned stripe_size; atomic_t *stripe_sectors_dirty; @@ -286,6 +288,12 @@ struct io { sector_t last; }; +enum stop_on_failure { + BCH_CACHED_DEV_STOP_AUTO = 0, + BCH_CACHED_DEV_STOP_ALWAYS, + BCH_CACHED_DEV_STOP_MODE_MAX, +}; + struct cached_dev { struct list_head list; struct bcache_device disk; @@ -359,6 +367,7 @@ struct cached_dev { unsigned sequential_cutoff; unsigned readahead; + unsigned io_disable:1; unsigned verify:1; unsigned bypass_torture_test:1; @@ -378,6 +387,11 @@ struct cached_dev { unsigned writeback_rate_i_term_inverse; unsigned writeback_rate_p_term_inverse; unsigned writeback_rate_minimum; + + enum stop_on_failure stop_when_cache_set_failed; +#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 + atomic_t io_errors; + unsigned error_limit; }; enum alloc_reserve { @@ -474,10 +488,15 @@ struct gc_stat { * * CACHE_SET_RUNNING means all cache devices have been registered and journal * replay is complete. + * + * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all + * external and internal I/O should be denied when this flag is set. + * */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 #define CACHE_SET_RUNNING 2 +#define CACHE_SET_IO_DISABLE 3 struct cache_set { struct closure cl; @@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c) wake_up_process(ca->alloc_thread); } +static inline void closure_bio_submit(struct cache_set *c, + struct bio *bio, + struct closure *cl) +{ + closure_get(cl); + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return; + } + generic_make_request(bio); +} + +/* + * Prevent the kthread exits directly, and make sure when kthread_stop() + * is called to stop a kthread, it is still alive. If a kthread might be + * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is + * necessary before the kthread returns. + */ +static inline void wait_for_kthread_stop(void) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } +} + /* Forward declarations */ +void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio); void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); void bch_bbio_count_io_errors(struct cache_set *, struct bio *, blk_status_t, const char *); @@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned, struct bkey *, int, bool); bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned, unsigned, unsigned, bool); +bool bch_cached_dev_error(struct cached_dev *dc); __printf(2, 3) bool bch_cache_set_error(struct cache_set *, const char *, ...); @@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *); extern struct workqueue_struct *bcache_wq; extern const char * const bch_cache_modes[]; +extern const char * const bch_stop_on_failure_modes[]; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index e56d3ecdbfcb..579c696a5fe0 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init); static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, btree_iter_cmp_fn *cmp) { - struct btree_iter_set unused; + struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; if (!btree_iter_end(iter)) { @@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, } if (iter->data->k == iter->data->end) - heap_pop(iter, unused, cmp); + heap_pop(iter, b, cmp); else heap_sift(iter, 0, cmp); } diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index fa506c1aa524..0c24280f3b98 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned); #ifdef CONFIG_BCACHE_DEBUG int __bch_count_data(struct btree_keys *); -void __bch_check_keys(struct btree_keys *, const char *, ...); +void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...); void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); void bch_dump_bucket(struct btree_keys *); #else static inline int __bch_count_data(struct btree_keys *b) { return -1; } -static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} +static inline void __printf(2, 3) + __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} static inline void bch_dump_bucket(struct btree_keys *b) {} void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fad9fe8817eb..17936b2dc7d6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, struct btree *b, *t; unsigned long i, nr = sc->nr_to_scan; unsigned long freed = 0; + unsigned int btree_cache_used; if (c->shrinker_disabled) return SHRINK_STOP; @@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, nr = min_t(unsigned long, nr, mca_can_free(c)); i = 0; + btree_cache_used = c->btree_cache_used; list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { - if (freed >= nr) - break; + if (nr <= 0) + goto out; if (++i > 3 && !mca_reap(b, 0, false)) { @@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, rw_unlock(true, b); freed++; } + nr--; } - for (i = 0; (nr--) && i < c->btree_cache_used; i++) { + for (; (nr--) && i < btree_cache_used; i++) { if (list_empty(&c->btree_cache)) goto out; @@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, } out: mutex_unlock(&c->bucket_lock); - return freed; + return freed * c->btree_pages; } static unsigned long bch_mca_count(struct shrinker *shrink, @@ -959,7 +962,7 @@ err: return b; } -/** +/* * bch_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * @@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c) btree_gc_start(c); + /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */ do { ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); @@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c) if (ret && ret != -EAGAIN) pr_warn("gc failed!"); - } while (ret); + } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); bch_btree_gc_finish(c); wake_up_allocators(c); @@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg) while (1) { wait_event_interruptible(c->gc_wait, - kthread_should_stop() || gc_should_run(c)); + kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags) || + gc_should_run(c)); - if (kthread_should_stop()) + if (kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) break; set_gc_sectors(c); bch_btree_gc(c); } + wait_for_kthread_stop(); return 0; } @@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, if (b->key.ptr[0] != btree_ptr || b->seq != seq + 1) { - op->lock = b->level; + op->lock = b->level; goto out; } } diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 7f12920c14f7..0e14969182c6 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v) } EXPORT_SYMBOL(closure_sub); -/** +/* * closure_put - decrement a closure's refcount */ void closure_put(struct closure *cl) @@ -55,7 +55,7 @@ void closure_put(struct closure *cl) } EXPORT_SYMBOL(closure_put); -/** +/* * closure_wake_up - wake up all closures on a wait list, without memory barrier */ void __closure_wake_up(struct closure_waitlist *wait_list) @@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up); /** * closure_wait - add a closure to a waitlist - * - * @waitlist will own a ref on @cl, which will be released when + * @waitlist: will own a ref on @cl, which will be released when * closure_wake_up() is called on @waitlist. + * @cl: closure pointer. * */ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) @@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl) } EXPORT_SYMBOL(closure_debug_destroy); -static struct dentry *debug; +static struct dentry *closure_debug; static int debug_seq_show(struct seq_file *f, void *data) { @@ -199,11 +199,12 @@ static const struct file_operations debug_ops = { .release = single_release }; -void __init closure_debug_init(void) +int __init closure_debug_init(void) { - debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); + closure_debug = debugfs_create_file("closures", + 0400, bcache_debug, NULL, &debug_ops); + return IS_ERR_OR_NULL(closure_debug); } - #endif MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>"); diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 3b9dfc9962ad..71427eb5fdae 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -105,6 +105,7 @@ struct closure; struct closure_syncer; typedef void (closure_fn) (struct closure *); +extern struct dentry *bcache_debug; struct closure_waitlist { struct llist_head list; @@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl) #ifdef CONFIG_BCACHE_CLOSURES_DEBUG -void closure_debug_init(void); +int closure_debug_init(void); void closure_debug_create(struct closure *cl); void closure_debug_destroy(struct closure *cl); #else -static inline void closure_debug_init(void) {} +static inline int closure_debug_init(void) { return 0; } static inline void closure_debug_create(struct closure *cl) {} static inline void closure_debug_destroy(struct closure *cl) {} diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index af89408befe8..028f7b386e01 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -17,7 +17,7 @@ #include <linux/random.h> #include <linux/seq_file.h> -static struct dentry *debug; +struct dentry *bcache_debug; #ifdef CONFIG_BCACHE_DEBUG @@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = { void bch_debug_init_cache_set(struct cache_set *c) { - if (!IS_ERR_OR_NULL(debug)) { + if (!IS_ERR_OR_NULL(bcache_debug)) { char name[50]; snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); - c->debug = debugfs_create_file(name, 0400, debug, c, + c->debug = debugfs_create_file(name, 0400, bcache_debug, c, &cache_set_debug_ops); } } @@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c) void bch_debug_exit(void) { - if (!IS_ERR_OR_NULL(debug)) - debugfs_remove_recursive(debug); + if (!IS_ERR_OR_NULL(bcache_debug)) + debugfs_remove_recursive(bcache_debug); } int __init bch_debug_init(struct kobject *kobj) { - debug = debugfs_create_dir("bcache", NULL); + bcache_debug = debugfs_create_dir("bcache", NULL); - return IS_ERR_OR_NULL(debug); + return IS_ERR_OR_NULL(bcache_debug); } diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index f9d391711595..c334e6666461 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -534,7 +534,6 @@ err: static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); - struct bucket *g; unsigned i, stale; if (!KEY_PTRS(k) || @@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) return false; for (i = 0; i < KEY_PTRS(k); i++) { - g = PTR_BUCKET(b->c, k, i); stale = ptr_stale(b->c, k, i); btree_bug_on(stale > 96, b, diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index a783c5a41ff1..7fac97ae036e 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); b->submit_time_us = local_clock_us(); - closure_bio_submit(bio, bio->bi_private); + closure_bio_submit(c, bio, bio->bi_private); } void bch_submit_bbio(struct bio *bio, struct cache_set *c, @@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, } /* IO errors */ +void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) +{ + char buf[BDEVNAME_SIZE]; + unsigned errors; + + WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); + + errors = atomic_add_return(1, &dc->io_errors); + if (errors < dc->error_limit) + pr_err("%s: IO error on backing device, unrecoverable", + bio_devname(bio, buf)); + else + bch_cached_dev_error(dc); +} void bch_count_io_errors(struct cache *ca, blk_status_t error, diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 1b736b860739..18f1b5239620 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset; bio_set_op_attrs(bio, REQ_OP_READ, 0); bch_bio_map(bio, data); - closure_bio_submit(bio, &cl); + closure_bio_submit(ca->set, bio, &cl); closure_sync(&cl); /* This function could be simpler now since we no longer write @@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c) struct cache *ca; uint64_t last_seq; unsigned iter, n = 0; - atomic_t p; + atomic_t p __maybe_unused; atomic_long_inc(&c->reclaim); @@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl) } static void journal_write_unlock(struct closure *cl) + __releases(&c->journal.lock) { struct cache_set *c = container_of(cl, struct cache_set, journal.io); @@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl) spin_unlock(&c->journal.lock); while ((bio = bio_list_pop(&list))) - closure_bio_submit(bio, cl); + closure_bio_submit(c, bio, cl); continue_at(cl, journal_write_done, NULL); } @@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c) static struct journal_write *journal_wait_for_write(struct cache_set *c, unsigned nkeys) + __acquires(&c->journal.lock) { size_t sectors; struct closure cl; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6422846b546e..a65e3365eeb9 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl) } op->insert_data_done = true; + /* get in bch_data_insert() */ bio_put(bio); out: continue_at(cl, bch_data_insert_keys, op->wq); @@ -295,6 +296,7 @@ err: /** * bch_data_insert - stick some data in the cache + * @cl: closure pointer. * * This is the starting point for any data to end up in a cache device; it could * be from a normal write, or a writeback write, or a write to a flash only @@ -630,6 +632,41 @@ static void request_endio(struct bio *bio) closure_put(cl); } +static void backing_request_endio(struct bio *bio) +{ + struct closure *cl = bio->bi_private; + + if (bio->bi_status) { + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, + struct cached_dev, disk); + /* + * If a bio has REQ_PREFLUSH for writeback mode, it is + * speically assembled in cached_dev_write() for a non-zero + * write request which has REQ_PREFLUSH. we don't set + * s->iop.status by this failure, the status will be decided + * by result of bch_data_insert() operation. + */ + if (unlikely(s->iop.writeback && + bio->bi_opf & REQ_PREFLUSH)) { + char buf[BDEVNAME_SIZE]; + + bio_devname(bio, buf); + pr_err("Can't flush %s: returned bi_status %i", + buf, bio->bi_status); + } else { + /* set to orig_bio->bi_status in bio_complete() */ + s->iop.status = bio->bi_status; + } + s->recoverable = false; + /* should count I/O error for backing device here */ + bch_count_backing_io_errors(dc, bio); + } + + bio_put(bio); + closure_put(cl); +} + static void bio_complete(struct search *s) { if (s->orig_bio) { @@ -644,13 +681,21 @@ static void bio_complete(struct search *s) } } -static void do_bio_hook(struct search *s, struct bio *orig_bio) +static void do_bio_hook(struct search *s, + struct bio *orig_bio, + bio_end_io_t *end_io_fn) { struct bio *bio = &s->bio.bio; bio_init(bio, NULL, 0); __bio_clone_fast(bio, orig_bio); - bio->bi_end_io = request_endio; + /* + * bi_end_io can be set separately somewhere else, e.g. the + * variants in, + * - cache_bio->bi_end_io from cached_dev_cache_miss() + * - n->bi_end_io from cache_lookup_fn() + */ + bio->bi_end_io = end_io_fn; bio->bi_private = &s->cl; bio_cnt_set(bio, 3); @@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio, s = mempool_alloc(d->c->search, GFP_NOIO); closure_init(&s->cl, NULL); - do_bio_hook(s, bio); + do_bio_hook(s, bio, request_endio); s->orig_bio = bio; s->cache_miss = NULL; @@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl) trace_bcache_read_retry(s->orig_bio); s->iop.status = 0; - do_bio_hook(s, s->orig_bio); + do_bio_hook(s, s->orig_bio, backing_request_endio); /* XXX: invalidate cache */ - closure_bio_submit(bio, cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, bio, cl); } continue_at(cl, cached_dev_cache_miss_done, NULL); @@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, bio_copy_dev(cache_bio, miss); cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; - cache_bio->bi_end_io = request_endio; + cache_bio->bi_end_io = backing_request_endio; cache_bio->bi_private = &s->cl; bch_bio_map(cache_bio, NULL); @@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_miss = miss; s->iop.bio = cache_bio; bio_get(cache_bio); - closure_bio_submit(cache_bio, &s->cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, cache_bio, &s->cl); return ret; out_put: bio_put(cache_bio); out_submit: - miss->bi_end_io = request_endio; + miss->bi_end_io = backing_request_endio; miss->bi_private = &s->cl; - closure_bio_submit(miss, &s->cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, miss, &s->cl); return ret; } @@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) s->iop.bio = s->orig_bio; bio_get(s->iop.bio); - if ((bio_op(bio) != REQ_OP_DISCARD) || - blk_queue_discard(bdev_get_queue(dc->bdev))) - closure_bio_submit(bio, cl); + if (bio_op(bio) == REQ_OP_DISCARD && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + goto insert_data; + + /* I/O request sent to backing device */ + bio->bi_end_io = backing_request_endio; + closure_bio_submit(s->iop.c, bio, cl); + } else if (s->iop.writeback) { bch_writeback_add(dc); s->iop.bio = bio; if (bio->bi_opf & REQ_PREFLUSH) { - /* Also need to send a flush to the backing device */ - struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, - dc->disk.bio_split); - + /* + * Also need to send a flush to the backing + * device. + */ + struct bio *flush; + + flush = bio_alloc_bioset(GFP_NOIO, 0, + dc->disk.bio_split); + if (!flush) { + s->iop.status = BLK_STS_RESOURCE; + goto insert_data; + } bio_copy_dev(flush, bio); - flush->bi_end_io = request_endio; + flush->bi_end_io = backing_request_endio; flush->bi_private = cl; flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - closure_bio_submit(flush, cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, flush, cl); } } else { s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); - - closure_bio_submit(bio, cl); + /* I/O request sent to backing device */ + bio->bi_end_io = backing_request_endio; + closure_bio_submit(s->iop.c, bio, cl); } +insert_data: closure_call(&s->iop.cl, bch_data_insert, NULL, cl); continue_at(cl, cached_dev_write_complete, NULL); } @@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl) bch_journal_meta(s->iop.c, cl); /* If it's a flush, we send the flush to the backing device too */ - closure_bio_submit(bio, cl); + bio->bi_end_io = backing_request_endio; + closure_bio_submit(s->iop.c, bio, cl); continue_at(cl, cached_dev_bio_complete, NULL); } +struct detached_dev_io_private { + struct bcache_device *d; + unsigned long start_time; + bio_end_io_t *bi_end_io; + void *bi_private; +}; + +static void detached_dev_end_io(struct bio *bio) +{ + struct detached_dev_io_private *ddip; + + ddip = bio->bi_private; + bio->bi_end_io = ddip->bi_end_io; + bio->bi_private = ddip->bi_private; + + generic_end_io_acct(ddip->d->disk->queue, + bio_data_dir(bio), + &ddip->d->disk->part0, ddip->start_time); + + if (bio->bi_status) { + struct cached_dev *dc = container_of(ddip->d, + struct cached_dev, disk); + /* should count I/O error for backing device here */ + bch_count_backing_io_errors(dc, bio); + } + + kfree(ddip); + bio->bi_end_io(bio); +} + +static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) +{ + struct detached_dev_io_private *ddip; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + + /* + * no need to call closure_get(&dc->disk.cl), + * because upper layer had already opened bcache device, + * which would call closure_get(&dc->disk.cl) + */ + ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); + ddip->d = d; + ddip->start_time = jiffies; + ddip->bi_end_io = bio->bi_end_io; + ddip->bi_private = bio->bi_private; + bio->bi_end_io = detached_dev_end_io; + bio->bi_private = ddip; + + if ((bio_op(bio) == REQ_OP_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + bio->bi_end_io(bio); + else + generic_make_request(bio); +} + /* Cached devices - read & write stuff */ static blk_qc_t cached_dev_make_request(struct request_queue *q, @@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); + if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) || + dc->io_disable)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } + atomic_set(&dc->backing_idle, 0); generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); @@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, else cached_dev_read(dc, s); } - } else { - if ((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(dc->bdev))) - bio_endio(bio); - else - generic_make_request(bio); - } + } else + /* I/O request sent to backing device */ + detached_dev_do_request(d, bio); return BLK_QC_T_NONE; } @@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, struct bcache_device *d = bio->bi_disk->private_data; int rw = bio_data_dir(bio); + if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f2273143b3cb..d90d9e59ca00 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = { NULL }; +/* Default is -1; we skip past it for stop_when_cache_set_failed */ +const char * const bch_stop_on_failure_modes[] = { + "default", + "auto", + "always", + NULL +}; + static struct kobject *bcache_kobj; struct mutex bch_register_lock; LIST_HEAD(bch_cache_sets); @@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) bio->bi_private = dc; closure_get(cl); + /* I/O request sent to backing device */ __write_super(&dc->sb, bio); closure_return_with_destructor(cl, bch_write_bdev_super_unlock); @@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); bch_bio_map(bio, ca->disk_buckets); - closure_bio_submit(bio, &ca->prio); + closure_bio_submit(ca->set, bio, &ca->prio); closure_sync(cl); } @@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, sector_t sectors) { struct request_queue *q; + const size_t max_stripes = min_t(size_t, INT_MAX, + SIZE_MAX / sizeof(atomic_t)); size_t n; int idx |