diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 10:31:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 10:31:20 -0700 |
commit | d35a878ae1c50977b55e352fd46e36e35add72a0 (patch) | |
tree | 7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-cache-target.c | |
parent | e5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff) | |
parent | 390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff) | |
download | linux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.gz linux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.bz2 linux-d35a878ae1c50977b55e352fd46e36e35add72a0.zip |
Merge tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- A major update for DM cache that reduces the latency for deciding
whether blocks should migrate to/from the cache. The bio-prison-v2
interface supports this improvement by enabling direct dispatch of
work to workqueues rather than having to delay the actual work
dispatch to the DM cache core. So the dm-cache policies are much more
nimble by being able to drive IO as they see fit. One immediate
benefit from the improved latency is a cache that should be much more
adaptive to changing workloads.
- Add a new DM integrity target that emulates a block device that has
additional per-sector tags that can be used for storing integrity
information.
- Add a new authenticated encryption feature to the DM crypt target
that builds on the capabilities provided by the DM integrity target.
- Add MD interface for switching the raid4/5/6 journal mode and update
the DM raid target to use it to enable aid4/5/6 journal write-back
support.
- Switch the DM verity target over to using the asynchronous hash
crypto API (this helps work better with architectures that have
access to off-CPU algorithm providers, which should reduce CPU
utilization).
- Various request-based DM and DM multipath fixes and improvements from
Bart and Christoph.
- A DM thinp target fix for a bio structure leak that occurs for each
discard IFF discard passdown is enabled.
- A fix for a possible deadlock in DM bufio and a fix to re-check the
new buffer allocation watermark in the face of competing admin
changes to the 'max_cache_size_bytes' tunable.
- A couple DM core cleanups.
* tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (50 commits)
dm bufio: check new buffer allocation watermark every 30 seconds
dm bufio: avoid a possible ABBA deadlock
dm mpath: make it easier to detect unintended I/O request flushes
dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit()
dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH
dm: introduce enum dm_queue_mode to cleanup related code
dm mpath: verify __pg_init_all_paths locking assumptions at runtime
dm: verify suspend_locking assumptions at runtime
dm block manager: remove an unused argument from dm_block_manager_create()
dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue()
dm mpath: delay requeuing while path initialization is in progress
dm mpath: avoid that path removal can trigger an infinite loop
dm mpath: split and rename activate_path() to prepare for its expanded use
dm ioctl: prevent stack leak in dm ioctl call
dm integrity: use previously calculated log2 of sectors_per_block
dm integrity: use hex2bin instead of open-coded variant
dm crypt: replace custom implementation of hex2bin()
dm crypt: remove obsolete references to per-CPU state
dm verity: switch to using asynchronous hash crypto API
dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues
...
Diffstat (limited to 'drivers/md/dm-cache-target.c')
-rw-r--r-- | drivers/md/dm-cache-target.c | 2475 |
1 files changed, 1087 insertions, 1388 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 975922c8f231..1db375f50a13 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -5,7 +5,7 @@ */ #include "dm.h" -#include "dm-bio-prison.h" +#include "dm-bio-prison-v2.h" #include "dm-bio-record.h" #include "dm-cache-metadata.h" @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/mempool.h> #include <linux/module.h> +#include <linux/rwsem.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -25,7 +26,18 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, /*----------------------------------------------------------------*/ -#define IOT_RESOLUTION 4 +/* + * Glossary: + * + * oblock: index of an origin block + * cblock: index of a cache block + * promotion: movement of a block from origin to cache + * demotion: movement of a block from cache to origin + * migration: movement of a block between the origin and cache device, + * either direction + */ + +/*----------------------------------------------------------------*/ struct io_tracker { spinlock_t lock; @@ -99,19 +111,178 @@ static void iot_io_end(struct io_tracker *iot, sector_t len) /*----------------------------------------------------------------*/ /* - * Glossary: - * - * oblock: index of an origin block - * cblock: index of a cache block - * promotion: movement of a block from origin to cache - * demotion: movement of a block from cache to origin - * migration: movement of a block between the origin and cache device, - * either direction + * Represents a chunk of future work. 'input' allows continuations to pass + * values between themselves, typically error values. */ +struct continuation { + struct work_struct ws; + int input; +}; + +static inline void init_continuation(struct continuation *k, + void (*fn)(struct work_struct *)) +{ + INIT_WORK(&k->ws, fn); + k->input = 0; +} + +static inline void queue_continuation(struct workqueue_struct *wq, + struct continuation *k) +{ + queue_work(wq, &k->ws); +} /*----------------------------------------------------------------*/ /* + * The batcher collects together pieces of work that need a particular + * operation to occur before they can proceed (typically a commit). + */ +struct batcher { + /* + * The operation that everyone is waiting for. + */ + int (*commit_op)(void *context); + void *commit_context; + + /* + * This is how bios should be issued once the commit op is complete + * (accounted_request). + */ + void (*issue_op)(struct bio *bio, void *context); + void *issue_context; + + /* + * Queued work gets put on here after commit. + */ + struct workqueue_struct *wq; + + spinlock_t lock; + struct list_head work_items; + struct bio_list bios; + struct work_struct commit_work; + + bool commit_scheduled; +}; + +static void __commit(struct work_struct *_ws) +{ + struct batcher *b = container_of(_ws, struct batcher, commit_work); + + int r; + unsigned long flags; + struct list_head work_items; + struct work_struct *ws, *tmp; + struct continuation *k; + struct bio *bio; + struct bio_list bios; + + INIT_LIST_HEAD(&work_items); + bio_list_init(&bios); + + /* + * We have to grab these before the commit_op to avoid a race + * condition. + */ + spin_lock_irqsave(&b->lock, flags); + list_splice_init(&b->work_items, &work_items); + bio_list_merge(&bios, &b->bios); + bio_list_init(&b->bios); + b->commit_scheduled = false; + spin_unlock_irqrestore(&b->lock, flags); + + r = b->commit_op(b->commit_context); + + list_for_each_entry_safe(ws, tmp, &work_items, entry) { + k = container_of(ws, struct continuation, ws); + k->input = r; + INIT_LIST_HEAD(&ws->entry); /* to avoid a WARN_ON */ + queue_work(b->wq, ws); + } + + while ((bio = bio_list_pop(&bios))) { + if (r) { + bio->bi_error = r; + bio_endio(bio); + } else + b->issue_op(bio, b->issue_context); + } +} + +static void batcher_init(struct batcher *b, + int (*commit_op)(void *), + void *commit_context, + void (*issue_op)(struct bio *bio, void *), + void *issue_context, + struct workqueue_struct *wq) +{ + b->commit_op = commit_op; + b->commit_context = commit_context; + b->issue_op = issue_op; + b->issue_context = issue_context; + b->wq = wq; + + spin_lock_init(&b->lock); + INIT_LIST_HEAD(&b->work_items); + bio_list_init(&b->bios); + INIT_WORK(&b->commit_work, __commit); + b->commit_scheduled = false; +} + +static void async_commit(struct batcher *b) +{ + queue_work(b->wq, &b->commit_work); +} + +static void continue_after_commit(struct batcher *b, struct continuation *k) +{ + unsigned long flags; + bool commit_scheduled; + + spin_lock_irqsave(&b->lock, flags); + commit_scheduled = b->commit_scheduled; + list_add_tail(&k->ws.entry, &b->work_items); + spin_unlock_irqrestore(&b->lock, flags); + + if (commit_scheduled) + async_commit(b); +} + +/* + * Bios are errored if commit failed. + */ +static void issue_after_commit(struct batcher *b, struct bio *bio) +{ + unsigned long flags; + bool commit_scheduled; + + spin_lock_irqsave(&b->lock, flags); + commit_scheduled = b->commit_scheduled; + bio_list_add(&b->bios, bio); + spin_unlock_irqrestore(&b->lock, flags); + + if (commit_scheduled) + async_commit(b); +} + +/* + * Call this if some urgent work is waiting for the commit to complete. + */ +static void schedule_commit(struct batcher *b) +{ + bool immediate; + unsigned long flags; + + spin_lock_irqsave(&b->lock, flags); + immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios); + b->commit_scheduled = true; + spin_unlock_irqrestore(&b->lock, flags); + + if (immediate) + async_commit(b); +} + +/* * There are a couple of places where we let a bio run, but want to do some * work before calling its endio function. We do this by temporarily * changing the endio fn. @@ -189,31 +360,13 @@ struct cache_stats { atomic_t write_miss; atomic_t demotion; atomic_t promotion; + atomic_t writeback; atomic_t copies_avoided; atomic_t cache_cell_clash; atomic_t commit_count; atomic_t discard_count; }; -/* - * Defines a range of cblocks, begin to (end - 1) are in the range. end is - * the one-past-the-end value. - */ -struct cblock_range { - dm_cblock_t begin; - dm_cblock_t end; -}; - -struct invalidation_request { - struct list_head list; - struct cblock_range *cblocks; - - atomic_t complete; - int err; - - wait_queue_head_t result_wait; -}; - struct cache { struct dm_target *ti; struct dm_target_callbacks callbacks; @@ -255,11 +408,7 @@ struct cache { spinlock_t lock; struct list_head deferred_cells; struct bio_list deferred_bios; - struct bio_list deferred_flush_bios; struct bio_list deferred_writethrough_bios; - struct list_head quiesced_migrations; - struct list_head completed_migrations; - struct list_head need_commit_migrations; sector_t migration_threshold; wait_queue_head_t migration_wait; atomic_t nr_allocated_migrations; @@ -270,9 +419,7 @@ struct cache { */ atomic_t nr_io_migrations; - wait_queue_head_t quiescing_wait; - atomic_t quiescing; - atomic_t quiescing_ack; + struct rw_semaphore quiesce_lock; /* * cache_size entries, dirty if set @@ -296,13 +443,11 @@ struct cache { struct dm_kcopyd_client *copier; struct workqueue_struct *wq; - struct work_struct worker; - + struct work_struct deferred_bio_worker; + struct work_struct deferred_writethrough_worker; + struct work_struct migration_worker; struct delayed_work waker; - unsigned long last_commit_jiffies; - - struct dm_bio_prison *prison; - struct dm_deferred_set *all_io_ds; + struct dm_bio_prison_v2 *prison; mempool_t *migration_pool; @@ -330,12 +475,17 @@ struct cache { struct list_head invalidation_requests; struct io_tracker origin_tracker; + + struct work_struct commit_ws; + struct batcher committer; + + struct rw_semaphore background_work_lock; }; struct per_bio_data { bool tick:1; unsigned req_nr:2; - struct dm_deferred_entry *all_io_entry; + struct dm_bio_prison_cell_v2 *cell; struct dm_hook_info hook_info; sector_t len; @@ -350,55 +500,64 @@ struct per_bio_data { }; struct dm_cache_migration { - struct list_head list; + struct continuation k; struct cache *cache; - unsigned long start_jiffies; - dm_oblock_t old_oblock; - dm_oblock_t new_oblock; - dm_cblock_t cblock; - - bool err:1; - bool discard:1; - bool writeback:1; - bool demote:1; - bool promote:1; - bool requeue_holder:1; - bool invalidate:1; + struct policy_work *op; + struct bio *overwrite_bio; + struct dm_bio_prison_cell_v2 *cell; - struct dm_bio_prison_cell *old_ocell; - struct dm_bio_prison_cell *new_ocell; + dm_cblock_t invalidate_cblock; + dm_oblock_t invalidate_oblock; }; -/* - * Processing a bio in the worker thread may require these memory - * allocations. We prealloc to avoid deadlocks (the same worker thread - * frees them back to the mempool). - */ -struct prealloc { - struct dm_cache_migration *mg; - struct dm_bio_prison_cell *cell1; - struct dm_bio_prison_cell *cell2; -}; +/*----------------------------------------------------------------*/ + +static bool writethrough_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_WRITETHROUGH; +} -static enum cache_metadata_mode get_cache_mode(struct cache *cache); +static bool writeback_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_WRITEBACK; +} -static void wake_worker(struct cache *cache) +static inline bool passthrough_mode(struct cache_features *f) { - queue_work(cache->wq, &cache->worker); + return unlikely(f->io_mode == CM_IO_PASSTHROUGH); } /*----------------------------------------------------------------*/ -static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) +static void wake_deferred_bio_worker(struct cache *cache) { - /* FIXME: change to use a local slab. */ - return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); + queue_work(cache->wq, &cache->deferred_bio_worker); } -static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) +static void wake_deferred_writethrough_worker(struct cache *cache) { - dm_bio_prison_free_cell(cache->prison, cell); + queue_work(cache->wq, &cache->deferred_writethrough_worker); +} + +static void wake_migration_worker(struct cache *cache) +{ + if (passthrough_mode(&cache->features)) + return; + + queue_work(cache->wq, &cache->migration_worker); +} + +/*----------------------------------------------------------------*/ + +static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) +{ + return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT); +} + +static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) +{ + dm_bio_prison_free_cell_v2(cache->prison, cell); } static struct dm_cache_migration *alloc_migration(struct cache *cache) @@ -424,146 +583,127 @@ static void free_migration(struct dm_cache_migration *mg) mempool_free(mg, cache->migration_pool); } -static int prealloc_data_structs(struct cache *cache, struct prealloc *p) -{ - if (!p->mg) { - p->mg = alloc_migration(cache); - if (!p->mg) - return -ENOMEM; - } - - if (!p->cell1) { - p->cell1 = alloc_prison_cell(cache); - if (!p->cell1) - return -ENOMEM; - } - - if (!p->cell2) { - p->cell2 = alloc_prison_cell(cache); - if (!p->cell2) - return -ENOMEM; - } - - return 0; -} +/*----------------------------------------------------------------*/ -static void prealloc_free_structs(struct cache *cache, struct prealloc *p) +static inline dm_oblock_t oblock_succ(dm_oblock_t b) { - if (p->cell2) - free_prison_cell(cache, p->cell2); - - if (p->cell1) - free_prison_cell(cache, p->cell1); - - if (p->mg) - free_migration(p->mg); + return to_oblock(from_oblock(b) + 1ull); } -static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) +static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key) { - struct dm_cache_migration *mg = p->mg; - - BUG_ON(!mg); - p->mg = NULL; - - return mg; + key->virtual = 0; + key->dev = 0; + key->block_begin = from_oblock(begin); + key->block_end = from_oblock(end); } /* - * You must have a cell within the prealloc struct to return. If not this - * function will BUG() rather than returning NULL. + * We have two lock levels. Level 0, which is used to prevent WRITEs, and + * level 1 which prevents *both* READs and WRITEs. */ -static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) +#define WRITE_LOCK_LEVEL 0 +#define READ_WRITE_LOCK_LEVEL 1 + +static unsigned lock_level(struct bio *bio) { - struct dm_bio_prison_cell *r = NULL; + return bio_data_dir(bio) == WRITE ? + WRITE_LOCK_LEVEL : + READ_WRITE_LOCK_LEVEL; +} - if (p->cell1) { - r = p->cell1; - p->cell1 = NULL; +/*---------------------------------------------------------------- + * Per bio data + *--------------------------------------------------------------*/ - } else if (p->cell2) { - r = p->cell2; - p->cell2 = NULL; - } else - BUG(); +/* + * If using writeback, leave out struct per_bio_data's writethrough fields. + */ +#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) +#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) - return r; +static size_t get_per_bio_data_size(struct cache *cache) +{ + return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; } -/* - * You can't have more than two cells in a prealloc struct. BUG() will be - * called if you try and overfill. - */ -static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) +static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) { - if (!p->cell2) - p->cell2 = cell; + struct per_bio_data *pb = dm_per_bio_data(bio, data_size); + BUG_ON(!pb); + return pb; +} - else if (!p->cell1) - p->cell1 = cell; +static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) +{ + struct per_bio_data *pb = get_per_bio_data(bio, data_size); - else - BUG(); + pb->tick = false; + pb->req_nr = dm_bio_get_target_bio_nr(bio); + pb->cell = NULL; + pb->len = 0; + + return pb; } /*----------------------------------------------------------------*/ -static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) +static void defer_bio(struct cache *cache, struct bio *bio) { - key->virtual = 0; - key->dev = 0; - key->block_begin = from_oblock(begin); - key->block_end = from_oblock(end); -} + unsigned long flags; -/* - * The caller hands in a preallocated cell, and a free function for it. - * The cell will be freed if there's an error, or if it wasn't used because - * a cell with that key already exists. - */ -typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); + spin_lock_irqsave(&cache->lock, flags); + bio_list_add(&cache->deferred_bios, bio); + spin_unlock_irqrestore(&cache->lock, flags); -static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, - struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, - cell_free_fn free_fn, void *free_context, - struct dm_bio_prison_cell **cell_result) + wake_deferred_bio_worker(cache); +} + +static void defer_bios(struct cache *cache, struct bio_list *bios) { - int r; - struct dm_cell_key key; + unsigned long flags; - build_key(oblock_begin, oblock_end, &key); - r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); - if (r) - free_fn(free_context, cell_prealloc); + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&cache->deferred_bios, bios); + bio_list_init(bios); + spin_unlock_irqrestore(&cache->lock, flags); - return r; + wake_deferred_bio_worker(cache); } -static int bio_detain(struct cache *cache, dm_oblock_t oblock, - struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, - cell_free_fn free_fn, void *free_context, - struct dm_bio_prison_cell **cell_result) +/*----------------------------------------------------------------*/ + +static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio) { + bool r; + size_t pb_size; + struct per_bio_data *pb; + struct dm_cell_key_v2 key; dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); - return bio_detain_range(cache, oblock, end, bio, - cell_prealloc, free_fn, free_context, cell_result); -} + struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; -static int get_cell(struct cache *cache, - dm_oblock_t oblock, - struct prealloc *structs, - struct dm_bio_prison_cell **cell_result) -{ - int r; - struct dm_cell_key key; - struct dm_bio_prison_cell *cell_prealloc; + cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ + if (!cell_prealloc) { + defer_bio(cache, bio); + return false; + } - cell_prealloc = prealloc_get_cell(structs); + build_key(oblock, end, &key); + r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); + if (!r) { + /* + * Failed to get the lock. + */ + free_prison_cell(cache, cell_prealloc); + return r; + } - build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); - r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); - if (r) - prealloc_put_cell(structs, cell_prealloc); + if (cell != cell_prealloc) + free_prison_cell(cache, cell_prealloc); + + pb_size = get_per_bio_data_size(cache); + pb = get_per_bio_data(bio, pb_size); + pb->cell = cell; return r; } @@ -575,21 +715,33 @@ static bool is_dirty(struct cache *cache, dm_cblock_t b) return test_bit(from_cblock(b), cache->dirty_bitset); } -static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) +static void set_dirty(struct cache *cache, dm_cblock_t cblock) { if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { atomic_inc(&cache->nr_dirty); - policy_set_dirty(cache->policy, oblock); + policy_set_dirty(cache->policy, cblock); } } -static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) +/* + * These two are called when setting after migrations to force the policy + * and dirty bitset to be in sync. + */ +static void force_set_dirty(struct cache *cache, dm_cblock_t cblock) +{ + if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) + atomic_inc(&cache->nr_dirty); + policy_set_dirty(cache->policy, cblock); +} + +static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock) { if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { - policy_clear_dirty(cache->policy, oblock); if (atomic_dec_return(&cache->nr_dirty) == 0) dm_table_event(cache->ti->table); } + + policy_clear_dirty(cache->policy, cblock); } /*----------------------------------------------------------------*/ @@ -628,11 +780,6 @@ static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) oblocks_per_dblock(cache))); } -static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) -{ - return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); -} - static void set_discard(struct cache *cache, dm_dblock_t b) { unsigned long flags; @@ -679,83 +826,6 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) return r; } -/*----------------------------------------------------------------*/ - -static void load_stats(struct cache *cache) -{ - struct dm_cache_statistics stats; - - dm_cache_metadata_get_stats(cache->cmd, &stats); - atomic_set(&cache->stats.read_hit, stats.read_hits); - atomic_set(&cache->stats.read_miss, stats.read_misses); - atomic_set(&cache->stats.write_hit, stats.write_hits); - atomic_set(&cache->stats.write_miss, stats.write_misses); -} - -static void save_stats(struct cache *cache) -{ - struct dm_cache_statistics stats; - - if (get_cache_mode(cache) >= CM_READ_ONLY) - return; - - stats.read_hits = atomic_read(&cache->stats.read_hit); - stats.read_misses = atomic_read(&cache->stats.read_miss); - stats.write_hits = atomic_read(&cache->stats.write_hit); - stats.write_misses = atomic_read(&cache->stats.write_miss); - - dm_cache_metadata_set_stats(cache->cmd, &stats); -} - -/*---------------------------------------------------------------- - * Per bio data - *--------------------------------------------------------------*/ - -/* - * If using writeback, leave out struct per_bio_data's writethrough fields. - */ -#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) -#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) - -static bool writethrough_mode(struct cache_features *f) -{ - return f->io_mode == CM_IO_WRITETHROUGH; -} - -static bool writeback_mode(struct cache_features *f) -{ - return f->io_mode == CM_IO_WRITEBACK; -} - -static bool passthrough_mode(struct cache_features *f) -{ - return f->io_mode == CM_IO_PASSTHROUGH; -} - -static size_t get_per_bio_data_size(struct cache *cache) -{ - return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; -} - -static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) -{ - struct per_bio_data *pb = dm_per_bio_data(bio, data_size); - BUG_ON(!pb); - return pb; -} - -static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) -{ - struct per_bio_data *pb = get_per_bio_data(bio, data_size); - - pb->tick = false; - pb->req_nr = dm_bio_get_target_bio_nr(bio); - pb->all_io_entry = NULL; - pb->len = 0; - - return pb; -} - /*---------------------------------------------------------------- * Remapping *--------------------------------------------------------------*/ @@ -797,8 +867,9 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) } static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, - dm_oblock_t oblock) + dm_oblock_t oblock) { + // FIXME: this is called way too much. check_if_tick_bio_needed(cache, bio); remap_to_origin(cache, bio); if (bio_data_dir(bio) == WRITE) @@ -811,7 +882,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, check_if_tick_bio_needed(cache, bio); remap_to_cache(cache, bio, cblock); if (bio_data_dir(bio) == WRITE) { - set_dirty(cache, oblock, cblock); + set_dirty(cache, cblock); clear_discard(cache, oblock_to_dblock(cache, oblock)); } } @@ -828,22 +899,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) return to_oblock(block_nr); } -/* - * You must increment the deferred set whilst the prison cell is held. To - * encourage this, we ask for 'cell' to be passed in. - */ -static void inc_ds(struct cache *cache, struct bio *bio, - struct dm_bio_prison_cell *cell) -{ - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); - - BUG_ON(!cell); - BUG_ON(pb->all_io_entry); - - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); -} - static bool accountable_bio(struct cache *cache, struct bio *bio) { return ((bio->bi_bdev == cache->origin_dev->bdev) && @@ -875,29 +930,10 @@ static void accounted_request(struct cache *cache, struct bio *bio) generic_make_request(bio); } -static void issue(struct cache *cache, struct bio *bio) -{ - unsigned long flags; - - if (!op_is_flush(bio->bi_opf)) { - accounted_request(cache, bio); - return; - } - - /* - * Batch together any bios that trigger commits and then issue a - * single commit for them in do_worker(). - */ - spin_lock_irqsave(&cache->lock, flags); - cache->commit_requested = true; - bio_list_add(&cache->deferred_flush_bios, bio); - spin_unlock_irqrestore(&cache->lock, flags); -} - -static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell) +static void issue_op(struct bio *bio, void *context) { - inc_ds(cache, bio, cell); - issue(cache, bio); + struct cache *cache = context; + accounted_request(cache, bio); } static void defer_writethrough_bio(struct cache *cache, struct bio *bio) @@ -908,7 +944,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) bio_list_add(&cache->deferred_writethrough_bios, bio); spin_unlock_irqrestore(&cache->lock, flags); - wake_worker(cache); + wake_deferred_writethrough_worker(cache); } static void writethrough_endio(struct bio *bio) @@ -934,6 +970,7 @@ static void writethrough_endio(struct bio *bio) } /* + * FIXME: send in parallel, huge latency as is. * When running in writethrough mode we need to send writes to clean blocks * to both the cache and origin devices. In future we'd like to clone the * bio and send them in parallel, but for now we're doing them in @@ -1046,12 +1083,58 @@ static void metadata_operation_failed(struct cache *cache, const char *op, int r set_cache_mode(cache, CM_READ_ONLY); } +/*----------------------------------------------------------------*/ + +static void load_stats(struct cache *cache) +{ + struct dm_cache_statistics stats; + + dm_cache_metadata_get_stats(cache->cmd, &stats); + atomic_set(&cache->stats.read_hit, stats.read_hits); + atomic_set(&cache->stats.read_miss, stats.read_misses); + atomic_set(&cache->stats.write_hit, stats.write_hits); + atomic_set(&cache->stats.write_miss, stats.write_misses); +} + +static void save_stats(struct cache *cache) +{ + struct dm_cache_statistics stats; + + if (get_cache_mode(cache) >= CM_READ_ONLY) + return; + + stats.read_hits = atomic_read(&cache->stats.read_hit); + stats.read_misses = atomic_read(&cache->stats.read_miss); + stats.write_hits = atomic_read(&cache->stats.write_hit); + stats.write_misses = atomic_read(&cache->stats.write_miss); + + dm_cache_metadata_set_stats(cache->cmd, &stats); +} + +static void update_stats(struct cache_stats *stats, enum policy_operation op) +{ + switch (op) { + case POLICY_PROMOTE: + atomic_inc(&stats->promotion); + break; + + case POLICY_DEMOTE: + atomic_inc(&stats->demotion); + break; + + case POLICY_WRITEBACK: + atomic_inc(&stats->writeback); + break; + } +} + /*---------------------------------------------------------------- * Migration processing * * Migration covers moving data from the origin device to the cache, or * vice versa. *--------------------------------------------------------------*/ + static void inc_io_migrations(struct cache *cache) { atomic_inc(&cache->nr_io_migrations); @@ -1067,213 +1150,109 @@ static bool discard_or_flush(struct bio *bio) return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf); } -static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) -{ - if (discard_or_flush(cell->holder)) { - /* - * We have to handle these bios individually. - */ - dm_cell_release(cache->prison, cell, &cache->deferred_bios); - free_prison_cell(cache, cell); - } else - list_add_tail(&cell->user_list, &cache->deferred_cells); -} - -static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder) +static void calc_discard_block_range(struct cache *cache, struct bio *bio, + dm_dblock_t *b, dm_dblock_t *e) { - unsigned long flags; - - if (!holder && dm_cell_promote_or_release(cache->prison, cell)) { - /* - * There was no prisoner to promote to holder, the - * cell has been released. - */ - free_prison_cell(cache, cell); - return; - } + sector_t sb = bio->bi_iter.bi_sector; + sector_t se = bio_end_sector(bio); - spin_lock_irqsave(&cache->lock, flags); - __cell_defer(cache, cell); - spin_unlock_irqrestore(&cache->lock, flags); + *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); - wake_worker(cache); + if (se - sb < cache->discard_block_size) + *e = *b; + else + *e = to_dblock(block_div(se, cache->discard_block_size)); } -static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err) -{ - dm_cell_error(cache->prison, cell, err); - free_prison_cell(cache, cell); -} +/*----------------------------------------------------------------*/ -static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) +static void prevent_background_work(struct cache *cache) { - cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE); + lockdep_off(); + down_write(&cache->background_work_lock); + lockdep_on(); } -static void free_io_migration(struct dm_cache_migration *mg) +static void allow_background_work(struct cache *cache) { - struct cache *cache = mg->cache; - - dec_io_migrations(cache); - free_migration(mg); - wake_worker(cache); + lockdep_off(); + up_write(&cache->background_work_lock); + lockdep_on(); } -static void migration_failure(struct dm_cache_migration *mg) +static bool background_work_begin(struct cache *cache) { - struct cache *cache = mg->cache; - const char *dev_name = cache_device_name(cache); - - if (mg->writeback) { - DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name); - set_dirty(cache, mg->old_oblock, mg->cblock); - cell_defer(cache, mg->old_ocell, false); - - } else if (mg->demote) { - DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name); - policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); + bool r; - cell_defer(cache, mg->old_ocell, mg->promote ? false : true); - if (mg->promote) - cell_defer(cache, mg->new_ocell, true); - } else { - DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name); - policy_remove_mapping(cache->policy, mg->new_oblock); - cell_defer(cache, mg->new_ocell, true); - } + lockdep_off(); + r = down_read_trylock(&cache->background_work_lock); + lockdep_on(); |