Merge tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer: - A major update for DM cache that reduces the latency for deciding whether blocks should migrate to/from the cache. The bio-prison-v2 interface supports this improvement by enabling direct dispatch of work to workqueues rather than having to delay the actual work dispatch to the DM cache core. So the dm-cache policies are much more nimble by being able to drive IO as they see fit. One immediate benefit from the improved latency is a cache that should be much more adaptive to changing workloads. - Add a new DM integrity target that emulates a block device that has additional per-sector tags that can be used for storing integrity information. - Add a new authenticated encryption feature to the DM crypt target that builds on the capabilities provided by the DM integrity target. - Add MD interface for switching the raid4/5/6 journal mode and update the DM raid target to use it to enable aid4/5/6 journal write-back support. - Switch the DM verity target over to using the asynchronous hash crypto API (this helps work better with architectures that have access to off-CPU algorithm providers, which should reduce CPU utilization). - Various request-based DM and DM multipath fixes and improvements from Bart and Christoph. - A DM thinp target fix for a bio structure leak that occurs for each discard IFF discard passdown is enabled. - A fix for a possible deadlock in DM bufio and a fix to re-check the new buffer allocation watermark in the face of competing admin changes to the 'max_cache_size_bytes' tunable. - A couple DM core cleanups. * tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (50 commits) dm bufio: check new buffer allocation watermark every 30 seconds dm bufio: avoid a possible ABBA deadlock dm mpath: make it easier to detect unintended I/O request flushes dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit() dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH dm: introduce enum dm_queue_mode to cleanup related code dm mpath: verify __pg_init_all_paths locking assumptions at runtime dm: verify suspend_locking assumptions at runtime dm block manager: remove an unused argument from dm_block_manager_create() dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue() dm mpath: delay requeuing while path initialization is in progress dm mpath: avoid that path removal can trigger an infinite loop dm mpath: split and rename activate_path() to prepare for its expanded use dm ioctl: prevent stack leak in dm ioctl call dm integrity: use previously calculated log2 of sectors_per_block dm integrity: use hex2bin instead of open-coded variant dm crypt: replace custom implementation of hex2bin() dm crypt: remove obsolete references to per-CPU state dm verity: switch to using asynchronous hash crypto API dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-05-03 10:31:20 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-05-03 10:31:20 -0700
commit: d35a878ae1c50977b55e352fd46e36e35add72a0 (patch)
tree: 7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-cache-target.c
parent: e5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff)
parent: 390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff)
download: linux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.gz
linux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.bz2
linux-d35a878ae1c50977b55e352fd46e36e35add72a0.zip
1 files changed, 1087 insertions, 1388 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 975922c8f231..1db375f50a13 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -5,7 +5,7 @@
  */
 
 #include "dm.h"
-#include "dm-bio-prison.h"
+#include "dm-bio-prison-v2.h"
 #include "dm-bio-record.h"
 #include "dm-cache-metadata.h"
 
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/mempool.h>
 #include <linux/module.h>
+#include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
@@ -25,7 +26,18 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
 
 /*----------------------------------------------------------------*/
 
-#define IOT_RESOLUTION 4
+/*
+ * Glossary:
+ *
+ * oblock: index of an origin block
+ * cblock: index of a cache block
+ * promotion: movement of a block from origin to cache
+ * demotion: movement of a block from cache to origin
+ * migration: movement of a block between the origin and cache device,
+ *	      either direction
+ */
+
+/*----------------------------------------------------------------*/
 
 struct io_tracker {
 	spinlock_t lock;
@@ -99,19 +111,178 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
 /*----------------------------------------------------------------*/
 
 /*
- * Glossary:
- *
- * oblock: index of an origin block
- * cblock: index of a cache block
- * promotion: movement of a block from origin to cache
- * demotion: movement of a block from cache to origin
- * migration: movement of a block between the origin and cache device,
- *	      either direction
+ * Represents a chunk of future work.  'input' allows continuations to pass
+ * values between themselves, typically error values.
  */
+struct continuation {
+	struct work_struct ws;
+	int input;
+};
+
+static inline void init_continuation(struct continuation *k,
+				     void (*fn)(struct work_struct *))
+{
+	INIT_WORK(&k->ws, fn);
+	k->input = 0;
+}
+
+static inline void queue_continuation(struct workqueue_struct *wq,
+				      struct continuation *k)
+{
+	queue_work(wq, &k->ws);
+}
 
 /*----------------------------------------------------------------*/
 
 /*
+ * The batcher collects together pieces of work that need a particular
+ * operation to occur before they can proceed (typically a commit).
+ */
+struct batcher {
+	/*
+	 * The operation that everyone is waiting for.
+	 */
+	int (*commit_op)(void *context);
+	void *commit_context;
+
+	/*
+	 * This is how bios should be issued once the commit op is complete
+	 * (accounted_request).
+	 */
+	void (*issue_op)(struct bio *bio, void *context);
+	void *issue_context;
+
+	/*
+	 * Queued work gets put on here after commit.
+	 */
+	struct workqueue_struct *wq;
+
+	spinlock_t lock;
+	struct list_head work_items;
+	struct bio_list bios;
+	struct work_struct commit_work;
+
+	bool commit_scheduled;
+};
+
+static void __commit(struct work_struct *_ws)
+{
+	struct batcher *b = container_of(_ws, struct batcher, commit_work);
+
+	int r;
+	unsigned long flags;
+	struct list_head work_items;
+	struct work_struct *ws, *tmp;
+	struct continuation *k;
+	struct bio *bio;
+	struct bio_list bios;
+
+	INIT_LIST_HEAD(&work_items);
+	bio_list_init(&bios);
+
+	/*
+	 * We have to grab these before the commit_op to avoid a race
+	 * condition.
+	 */
+	spin_lock_irqsave(&b->lock, flags);
+	list_splice_init(&b->work_items, &work_items);
+	bio_list_merge(&bios, &b->bios);
+	bio_list_init(&b->bios);
+	b->commit_scheduled = false;
+	spin_unlock_irqrestore(&b->lock, flags);
+
+	r = b->commit_op(b->commit_context);
+
+	list_for_each_entry_safe(ws, tmp, &work_items, entry) {
+		k = container_of(ws, struct continuation, ws);
+		k->input = r;
+		INIT_LIST_HEAD(&ws->entry); /* to avoid a WARN_ON */
+		queue_work(b->wq, ws);
+	}
+
+	while ((bio = bio_list_pop(&bios))) {
+		if (r) {
+			bio->bi_error = r;
+			bio_endio(bio);
+		} else
+			b->issue_op(bio, b->issue_context);
+	}
+}
+
+static void batcher_init(struct batcher *b,
+			 int (*commit_op)(void *),
+			 void *commit_context,
+			 void (*issue_op)(struct bio *bio, void *),
+			 void *issue_context,
+			 struct workqueue_struct *wq)
+{
+	b->commit_op = commit_op;
+	b->commit_context = commit_context;
+	b->issue_op = issue_op;
+	b->issue_context = issue_context;
+	b->wq = wq;
+
+	spin_lock_init(&b->lock);
+	INIT_LIST_HEAD(&b->work_items);
+	bio_list_init(&b->bios);
+	INIT_WORK(&b->commit_work, __commit);
+	b->commit_scheduled = false;
+}
+
+static void async_commit(struct batcher *b)
+{
+	queue_work(b->wq, &b->commit_work);
+}
+
+static void continue_after_commit(struct batcher *b, struct continuation *k)
+{
+	unsigned long flags;
+	bool commit_scheduled;
+
+	spin_lock_irqsave(&b->lock, flags);
+	commit_scheduled = b->commit_scheduled;
+	list_add_tail(&k->ws.entry, &b->work_items);
+	spin_unlock_irqrestore(&b->lock, flags);
+
+	if (commit_scheduled)
+		async_commit(b);
+}
+
+/*
+ * Bios are errored if commit failed.
+ */
+static void issue_after_commit(struct batcher *b, struct bio *bio)
+{
+       unsigned long flags;
+       bool commit_scheduled;
+
+       spin_lock_irqsave(&b->lock, flags);
+       commit_scheduled = b->commit_scheduled;
+       bio_list_add(&b->bios, bio);
+       spin_unlock_irqrestore(&b->lock, flags);
+
+       if (commit_scheduled)
+	       async_commit(b);
+}
+
+/*
+ * Call this if some urgent work is waiting for the commit to complete.
+ */
+static void schedule_commit(struct batcher *b)
+{
+	bool immediate;
+	unsigned long flags;
+
+	spin_lock_irqsave(&b->lock, flags);
+	immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
+	b->commit_scheduled = true;
+	spin_unlock_irqrestore(&b->lock, flags);
+
+	if (immediate)
+		async_commit(b);
+}
+
+/*
  * There are a couple of places where we let a bio run, but want to do some
  * work before calling its endio function.  We do this by temporarily
  * changing the endio fn.
@@ -189,31 +360,13 @@ struct cache_stats {
 	atomic_t write_miss;
 	atomic_t demotion;
 	atomic_t promotion;
+	atomic_t writeback;
 	atomic_t copies_avoided;
 	atomic_t cache_cell_clash;
 	atomic_t commit_count;
 	atomic_t discard_count;
 };
 
-/*
- * Defines a range of cblocks, begin to (end - 1) are in the range.  end is
- * the one-past-the-end value.
- */
-struct cblock_range {
-	dm_cblock_t begin;
-	dm_cblock_t end;
-};
-
-struct invalidation_request {
-	struct list_head list;
-	struct cblock_range *cblocks;
-
-	atomic_t complete;
-	int err;
-
-	wait_queue_head_t result_wait;
-};
-
 struct cache {
 	struct dm_target *ti;
 	struct dm_target_callbacks callbacks;
@@ -255,11 +408,7 @@ struct cache {
 	spinlock_t lock;
 	struct list_head deferred_cells;
 	struct bio_list deferred_bios;
-	struct bio_list deferred_flush_bios;
 	struct bio_list deferred_writethrough_bios;
-	struct list_head quiesced_migrations;
-	struct list_head completed_migrations;
-	struct list_head need_commit_migrations;
 	sector_t migration_threshold;
 	wait_queue_head_t migration_wait;
 	atomic_t nr_allocated_migrations;
@@ -270,9 +419,7 @@ struct cache {
 	 */
 	atomic_t nr_io_migrations;
 
-	wait_queue_head_t quiescing_wait;
-	atomic_t quiescing;
-	atomic_t quiescing_ack;
+	struct rw_semaphore quiesce_lock;
 
 	/*
 	 * cache_size entries, dirty if set
@@ -296,13 +443,11 @@ struct cache {
 
 	struct dm_kcopyd_client *copier;
 	struct workqueue_struct *wq;
-	struct work_struct worker;
-
+	struct work_struct deferred_bio_worker;
+	struct work_struct deferred_writethrough_worker;
+	struct work_struct migration_worker;
 	struct delayed_work waker;
-	unsigned long last_commit_jiffies;
-
-	struct dm_bio_prison *prison;
-	struct dm_deferred_set *all_io_ds;
+	struct dm_bio_prison_v2 *prison;
 
 	mempool_t *migration_pool;
 
@@ -330,12 +475,17 @@ struct cache {
 	struct list_head invalidation_requests;
 
 	struct io_tracker origin_tracker;
+
+	struct work_struct commit_ws;
+	struct batcher committer;
+
+	struct rw_semaphore background_work_lock;
 };
 
 struct per_bio_data {
 	bool tick:1;
 	unsigned req_nr:2;
-	struct dm_deferred_entry *all_io_entry;
+	struct dm_bio_prison_cell_v2 *cell;
 	struct dm_hook_info hook_info;
 	sector_t len;
 
@@ -350,55 +500,64 @@ struct per_bio_data {
 };
 
 struct dm_cache_migration {
-	struct list_head list;
+	struct continuation k;
 	struct cache *cache;
 
-	unsigned long start_jiffies;
-	dm_oblock_t old_oblock;
-	dm_oblock_t new_oblock;
-	dm_cblock_t cblock;
-
-	bool err:1;
-	bool discard:1;
-	bool writeback:1;
-	bool demote:1;
-	bool promote:1;
-	bool requeue_holder:1;
-	bool invalidate:1;
+	struct policy_work *op;
+	struct bio *overwrite_bio;
+	struct dm_bio_prison_cell_v2 *cell;
 
-	struct dm_bio_prison_cell *old_ocell;
-	struct dm_bio_prison_cell *new_ocell;
+	dm_cblock_t invalidate_cblock;
+	dm_oblock_t invalidate_oblock;
 };
 
-/*
- * Processing a bio in the worker thread may require these memory
- * allocations.  We prealloc to avoid deadlocks (the same worker thread
- * frees them back to the mempool).
- */
-struct prealloc {
-	struct dm_cache_migration *mg;
-	struct dm_bio_prison_cell *cell1;
-	struct dm_bio_prison_cell *cell2;
-};
+/*----------------------------------------------------------------*/
+
+static bool writethrough_mode(struct cache_features *f)
+{
+	return f->io_mode == CM_IO_WRITETHROUGH;
+}
 
-static enum cache_metadata_mode get_cache_mode(struct cache *cache);
+static bool writeback_mode(struct cache_features *f)
+{
+	return f->io_mode == CM_IO_WRITEBACK;
+}
 
-static void wake_worker(struct cache *cache)
+static inline bool passthrough_mode(struct cache_features *f)
 {
-	queue_work(cache->wq, &cache->worker);
+	return unlikely(f->io_mode == CM_IO_PASSTHROUGH);
 }
 
 /*----------------------------------------------------------------*/
 
-static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
+static void wake_deferred_bio_worker(struct cache *cache)
 {
-	/* FIXME: change to use a local slab. */
-	return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
+	queue_work(cache->wq, &cache->deferred_bio_worker);
 }
 
-static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
+static void wake_deferred_writethrough_worker(struct cache *cache)
 {
-	dm_bio_prison_free_cell(cache->prison, cell);
+	queue_work(cache->wq, &cache->deferred_writethrough_worker);
+}
+
+static void wake_migration_worker(struct cache *cache)
+{
+	if (passthrough_mode(&cache->features))
+		return;
+
+	queue_work(cache->wq, &cache->migration_worker);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
+{
+	return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
+}
+
+static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
+{
+	dm_bio_prison_free_cell_v2(cache->prison, cell);
 }
 
 static struct dm_cache_migration *alloc_migration(struct cache *cache)
@@ -424,146 +583,127 @@ static void free_migration(struct dm_cache_migration *mg)
 	mempool_free(mg, cache->migration_pool);
 }
 
-static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
-{
-	if (!p->mg) {
-		p->mg = alloc_migration(cache);
-		if (!p->mg)
-			return -ENOMEM;
-	}
-
-	if (!p->cell1) {
-		p->cell1 = alloc_prison_cell(cache);
-		if (!p->cell1)
-			return -ENOMEM;
-	}
-
-	if (!p->cell2) {
-		p->cell2 = alloc_prison_cell(cache);
-		if (!p->cell2)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
+/*----------------------------------------------------------------*/
 
-static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
+static inline dm_oblock_t oblock_succ(dm_oblock_t b)
 {
-	if (p->cell2)
-		free_prison_cell(cache, p->cell2);
-
-	if (p->cell1)
-		free_prison_cell(cache, p->cell1);
-
-	if (p->mg)
-		free_migration(p->mg);
+	return to_oblock(from_oblock(b) + 1ull);
 }
 
-static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
 {
-	struct dm_cache_migration *mg = p->mg;
-
-	BUG_ON(!mg);
-	p->mg = NULL;
-
-	return mg;
+	key->virtual = 0;
+	key->dev = 0;
+	key->block_begin = from_oblock(begin);
+	key->block_end = from_oblock(end);
 }
 
 /*
- * You must have a cell within the prealloc struct to return.  If not this
- * function will BUG() rather than returning NULL.
+ * We have two lock levels.  Level 0, which is used to prevent WRITEs, and
+ * level 1 which prevents *both* READs and WRITEs.
  */
-static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
+#define WRITE_LOCK_LEVEL 0
+#define READ_WRITE_LOCK_LEVEL 1
+
+static unsigned lock_level(struct bio *bio)
 {
-	struct dm_bio_prison_cell *r = NULL;
+	return bio_data_dir(bio) == WRITE ?
+		WRITE_LOCK_LEVEL :
+		READ_WRITE_LOCK_LEVEL;
+}
 
-	if (p->cell1) {
-		r = p->cell1;
-		p->cell1 = NULL;
+/*----------------------------------------------------------------
+ * Per bio data
+ *--------------------------------------------------------------*/
 
-	} else if (p->cell2) {
-		r = p->cell2;
-		p->cell2 = NULL;
-	} else
-		BUG();
+/*
+ * If using writeback, leave out struct per_bio_data's writethrough fields.
+ */
+#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
+#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
 
-	return r;
+static size_t get_per_bio_data_size(struct cache *cache)
+{
+	return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
 }
 
-/*
- * You can't have more than two cells in a prealloc struct.  BUG() will be
- * called if you try and overfill.
- */
-static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
+static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
 {
-	if (!p->cell2)
-		p->cell2 = cell;
+	struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
+	BUG_ON(!pb);
+	return pb;
+}
 
-	else if (!p->cell1)
-		p->cell1 = cell;
+static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio, data_size);
 
-	else
-		BUG();
+	pb->tick = false;
+	pb->req_nr = dm_bio_get_target_bio_nr(bio);
+	pb->cell = NULL;
+	pb->len = 0;
+
+	return pb;
 }
 
 /*----------------------------------------------------------------*/
 
-static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
+static void defer_bio(struct cache *cache, struct bio *bio)
 {
-	key->virtual = 0;
-	key->dev = 0;
-	key->block_begin = from_oblock(begin);
-	key->block_end = from_oblock(end);
-}
+	unsigned long flags;
 
-/*
- * The caller hands in a preallocated cell, and a free function for it.
- * The cell will be freed if there's an error, or if it wasn't used because
- * a cell with that key already exists.
- */
-typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_add(&cache->deferred_bios, bio);
+	spin_unlock_irqrestore(&cache->lock, flags);
 
-static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
-			    struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-			    cell_free_fn free_fn, void *free_context,
-			    struct dm_bio_prison_cell **cell_result)
+	wake_deferred_bio_worker(cache);
+}
+
+static void defer_bios(struct cache *cache, struct bio_list *bios)
 {
-	int r;
-	struct dm_cell_key key;
+	unsigned long flags;
 
-	build_key(oblock_begin, oblock_end, &key);
-	r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
-	if (r)
-		free_fn(free_context, cell_prealloc);
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_merge(&cache->deferred_bios, bios);
+	bio_list_init(bios);
+	spin_unlock_irqrestore(&cache->lock, flags);
 
-	return r;
+	wake_deferred_bio_worker(cache);
 }
 
-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
-		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-		      cell_free_fn free_fn, void *free_context,
-		      struct dm_bio_prison_cell **cell_result)
+/*----------------------------------------------------------------*/
+
+static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
 {
+	bool r;
+	size_t pb_size;
+	struct per_bio_data *pb;
+	struct dm_cell_key_v2 key;
 	dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
-	return bio_detain_range(cache, oblock, end, bio,
-				cell_prealloc, free_fn, free_context, cell_result);
-}
+	struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
 
-static int get_cell(struct cache *cache,
-		    dm_oblock_t oblock,
-		    struct prealloc *structs,
-		    struct dm_bio_prison_cell **cell_result)
-{
-	int r;
-	struct dm_cell_key key;
-	struct dm_bio_prison_cell *cell_prealloc;
+	cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
+	if (!cell_prealloc) {
+		defer_bio(cache, bio);
+		return false;
+	}
 
-	cell_prealloc = prealloc_get_cell(structs);
+	build_key(oblock, end, &key);
+	r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
+	if (!r) {
+		/*
+		 * Failed to get the lock.
+		 */
+		free_prison_cell(cache, cell_prealloc);
+		return r;
+	}
 
-	build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
-	r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
-	if (r)
-		prealloc_put_cell(structs, cell_prealloc);
+	if (cell != cell_prealloc)
+		free_prison_cell(cache, cell_prealloc);
+
+	pb_size = get_per_bio_data_size(cache);
+	pb = get_per_bio_data(bio, pb_size);
+	pb->cell = cell;
 
 	return r;
 }
@@ -575,21 +715,33 @@ static bool is_dirty(struct cache *cache, dm_cblock_t b)
 	return test_bit(from_cblock(b), cache->dirty_bitset);
 }
 
-static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
+static void set_dirty(struct cache *cache, dm_cblock_t cblock)
 {
 	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
 		atomic_inc(&cache->nr_dirty);
-		policy_set_dirty(cache->policy, oblock);
+		policy_set_dirty(cache->policy, cblock);
 	}
 }
 
-static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
+/*
+ * These two are called when setting after migrations to force the policy
+ * and dirty bitset to be in sync.
+ */
+static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
+{
+	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
+		atomic_inc(&cache->nr_dirty);
+	policy_set_dirty(cache->policy, cblock);
+}
+
+static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
 {
 	if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
-		policy_clear_dirty(cache->policy, oblock);
 		if (atomic_dec_return(&cache->nr_dirty) == 0)
 			dm_table_event(cache->ti->table);
 	}
+
+	policy_clear_dirty(cache->policy, cblock);
 }
 
 /*----------------------------------------------------------------*/
@@ -628,11 +780,6 @@ static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
 				   oblocks_per_dblock(cache)));
 }
 
-static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
-{
-	return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
-}
-
 static void set_discard(struct cache *cache, dm_dblock_t b)
 {
 	unsigned long flags;
@@ -679,83 +826,6 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
 	return r;
 }
 
-/*----------------------------------------------------------------*/
-
-static void load_stats(struct cache *cache)
-{
-	struct dm_cache_statistics stats;
-
-	dm_cache_metadata_get_stats(cache->cmd, &stats);
-	atomic_set(&cache->stats.read_hit, stats.read_hits);
-	atomic_set(&cache->stats.read_miss, stats.read_misses);
-	atomic_set(&cache->stats.write_hit, stats.write_hits);
-	atomic_set(&cache->stats.write_miss, stats.write_misses);
-}
-
-static void save_stats(struct cache *cache)
-{
-	struct dm_cache_statistics stats;
-
-	if (get_cache_mode(cache) >= CM_READ_ONLY)
-		return;
-
-	stats.read_hits = atomic_read(&cache->stats.read_hit);
-	stats.read_misses = atomic_read(&cache->stats.read_miss);
-	stats.write_hits = atomic_read(&cache->stats.write_hit);
-	stats.write_misses = atomic_read(&cache->stats.write_miss);
-
-	dm_cache_metadata_set_stats(cache->cmd, &stats);
-}
-
-/*----------------------------------------------------------------
- * Per bio data
- *--------------------------------------------------------------*/
-
-/*
- * If using writeback, leave out struct per_bio_data's writethrough fields.
- */
-#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
-#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
-
-static bool writethrough_mode(struct cache_features *f)
-{
-	return f->io_mode == CM_IO_WRITETHROUGH;
-}
-
-static bool writeback_mode(struct cache_features *f)
-{
-	return f->io_mode == CM_IO_WRITEBACK;
-}
-
-static bool passthrough_mode(struct cache_features *f)
-{
-	return f->io_mode == CM_IO_PASSTHROUGH;
-}
-
-static size_t get_per_bio_data_size(struct cache *cache)
-{
-	return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
-}
-
-static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
-{
-	struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
-	BUG_ON(!pb);
-	return pb;
-}
-
-static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
-{
-	struct per_bio_data *pb = get_per_bio_data(bio, data_size);
-
-	pb->tick = false;
-	pb->req_nr = dm_bio_get_target_bio_nr(bio);
-	pb->all_io_entry = NULL;
-	pb->len = 0;
-
-	return pb;
-}
-
 /*----------------------------------------------------------------
  * Remapping
  *--------------------------------------------------------------*/
@@ -797,8 +867,9 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 }
 
 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
-				  dm_oblock_t oblock)
+					  dm_oblock_t oblock)
 {
+	// FIXME: this is called way too much.
 	check_if_tick_bio_needed(cache, bio);
 	remap_to_origin(cache, bio);
 	if (bio_data_dir(bio) == WRITE)
@@ -811,7 +882,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
 	check_if_tick_bio_needed(cache, bio);
 	remap_to_cache(cache, bio, cblock);
 	if (bio_data_dir(bio) == WRITE) {
-		set_dirty(cache, oblock, cblock);
+		set_dirty(cache, cblock);
 		clear_discard(cache, oblock_to_dblock(cache, oblock));
 	}
 }
@@ -828,22 +899,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 	return to_oblock(block_nr);
 }
 
-/*
- * You must increment the deferred set whilst the prison cell is held.  To
- * encourage this, we ask for 'cell' to be passed in.
- */
-static void inc_ds(struct cache *cache, struct bio *bio,
-		   struct dm_bio_prison_cell *cell)
-{
-	size_t pb_data_size = get_per_bio_data_size(cache);
-	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
-
-	BUG_ON(!cell);
-	BUG_ON(pb->all_io_entry);
-
-	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-}
-
 static bool accountable_bio(struct cache *cache, struct bio *bio)
 {
 	return ((bio->bi_bdev == cache->origin_dev->bdev) &&
@@ -875,29 +930,10 @@ static void accounted_request(struct cache *cache, struct bio *bio)
 	generic_make_request(bio);
 }
 
-static void issue(struct cache *cache, struct bio *bio)
-{
-	unsigned long flags;
-
-	if (!op_is_flush(bio->bi_opf)) {
-		accounted_request(cache, bio);
-		return;
-	}
-
-	/*
-	 * Batch together any bios that trigger commits and then issue a
-	 * single commit for them in do_worker().
-	 */
-	spin_lock_irqsave(&cache->lock, flags);
-	cache->commit_requested = true;
-	bio_list_add(&cache->deferred_flush_bios, bio);
-	spin_unlock_irqrestore(&cache->lock, flags);
-}
-
-static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+static void issue_op(struct bio *bio, void *context)
 {
-	inc_ds(cache, bio, cell);
-	issue(cache, bio);
+	struct cache *cache = context;
+	accounted_request(cache, bio);
 }
 
 static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
@@ -908,7 +944,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
 	bio_list_add(&cache->deferred_writethrough_bios, bio);
 	spin_unlock_irqrestore(&cache->lock, flags);
 
-	wake_worker(cache);
+	wake_deferred_writethrough_worker(cache);
 }
 
 static void writethrough_endio(struct bio *bio)
@@ -934,6 +970,7 @@ static void writethrough_endio(struct bio *bio)
 }
 
 /*
+ * FIXME: send in parallel, huge latency as is.
  * When running in writethrough mode we need to send writes to clean blocks
  * to both the cache and origin devices.  In future we'd like to clone the
  * bio and send them in parallel, but for now we're doing them in
@@ -1046,12 +1083,58 @@ static void metadata_operation_failed(struct cache *cache, const char *op, int r
 	set_cache_mode(cache, CM_READ_ONLY);
 }
 
+/*----------------------------------------------------------------*/
+
+static void load_stats(struct cache *cache)
+{
+	struct dm_cache_statistics stats;
+
+	dm_cache_metadata_get_stats(cache->cmd, &stats);
+	atomic_set(&cache->stats.read_hit, stats.read_hits);
+	atomic_set(&cache->stats.read_miss, stats.read_misses);
+	atomic_set(&cache->stats.write_hit, stats.write_hits);
+	atomic_set(&cache->stats.write_miss, stats.write_misses);
+}
+
+static void save_stats(struct cache *cache)
+{
+	struct dm_cache_statistics stats;
+
+	if (get_cache_mode(cache) >= CM_READ_ONLY)
+		return;
+
+	stats.read_hits = atomic_read(&cache->stats.read_hit);
+	stats.read_misses = atomic_read(&cache->stats.read_miss);
+	stats.write_hits = atomic_read(&cache->stats.write_hit);
+	stats.write_misses = atomic_read(&cache->stats.write_miss);
+
+	dm_cache_metadata_set_stats(cache->cmd, &stats);
+}
+
+static void update_stats(struct cache_stats *stats, enum policy_operation op)
+{
+	switch (op) {
+	case POLICY_PROMOTE:
+		atomic_inc(&stats->promotion);
+		break;
+
+	case POLICY_DEMOTE:
+		atomic_inc(&stats->demotion);
+		break;
+
+	case POLICY_WRITEBACK:
+		atomic_inc(&stats->writeback);
+		break;
+	}
+}
+
 /*----------------------------------------------------------------
  * Migration processing
  *
  * Migration covers moving data from the origin device to the cache, or
  * vice versa.
  *--------------------------------------------------------------*/
+
 static void inc_io_migrations(struct cache *cache)
 {
 	atomic_inc(&cache->nr_io_migrations);
@@ -1067,213 +1150,109 @@ static bool discard_or_flush(struct bio *bio)
 	return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
 }
 
-static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
-{
-	if (discard_or_flush(cell->holder)) {
-		/*
-		 * We have to handle these bios individually.
-		 */
-		dm_cell_release(cache->prison, cell, &cache->deferred_bios);
-		free_prison_cell(cache, cell);
-	} else
-		list_add_tail(&cell->user_list, &cache->deferred_cells);
-}
-
-static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+				     dm_dblock_t *b, dm_dblock_t *e)
 {
-	unsigned long flags;
-
-	if (!holder && dm_cell_promote_or_release(cache->prison, cell)) {
-		/*
-		 * There was no prisoner to promote to holder, the
-		 * cell has been released.
-		 */
-		free_prison_cell(cache, cell);
-		return;
-	}
+	sector_t sb = bio->bi_iter.bi_sector;
+	sector_t se = bio_end_sector(bio);
 
-	spin_lock_irqsave(&cache->lock, flags);
-	__cell_defer(cache, cell);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	*b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
 
-	wake_worker(cache);
+	if (se - sb < cache->discard_block_size)
+		*e = *b;
+	else
+		*e = to_dblock(block_div(se, cache->discard_block_size));
 }
 
-static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err)
-{
-	dm_cell_error(cache->prison, cell, err);
-	free_prison_cell(cache, cell);
-}
+/*----------------------------------------------------------------*/
 
-static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)
+static void prevent_background_work(struct cache *cache)
 {
-	cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE);
+	lockdep_off();
+	down_write(&cache->background_work_lock);
+	lockdep_on();
 }
 
-static void free_io_migration(struct dm_cache_migration *mg)
+static void allow_background_work(struct cache *cache)
 {
-	struct cache *cache = mg->cache;
-
-	dec_io_migrations(cache);
-	free_migration(mg);
-	wake_worker(cache);
+	lockdep_off();
+	up_write(&cache->background_work_lock);
+	lockdep_on();
 }
 
-static void migration_failure(struct dm_cache_migration *mg)
+static bool background_work_begin(struct cache *cache)
 {
-	struct cache *cache = mg->cache;
-	const char *dev_name = cache_device_name(cache);
-
-	if (mg->writeback) {
-		DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name);
-		set_dirty(cache, mg->old_oblock, mg->cblock);
-		cell_defer(cache, mg->old_ocell, false);
-
-	} else if (mg->demote) {
-		DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name);
-		policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
+	bool r;
 
-		cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
-		if (mg->promote)
-			cell_defer(cache, mg->new_ocell, true);
-	} else {
-		DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name);
-		policy_remove_mapping(cache->policy, mg->new_oblock);
-		cell_defer(cache, mg->new_ocell, true);
-	}
+	lockdep_off();
+	r = down_read_trylock(&cache->background_work_lock);
+	lockdep_on();
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-05-03 10:31:20 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-05-03 10:31:20 -0700
commit	d35a878ae1c50977b55e352fd46e36e35add72a0 (patch)
tree	7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-cache-target.c
parent	e5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff)
parent	390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff)
download	linux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.gz linux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.bz2 linux-d35a878ae1c50977b55e352fd46e36e35add72a0.zip