summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-25 16:34:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-25 16:34:39 -0700
commit6597ac8a514e2085cf19822a5783345c613312a5 (patch)
treefcae0569d2159e04258405c15c91551e36f8ee6c
parente4bc13adfd016fc1036838170288b5680d1a98b0 (diff)
parente262f34741522e0d821642e5449c6eeb512723fc (diff)
downloadlinux-6597ac8a514e2085cf19822a5783345c613312a5.tar.gz
linux-6597ac8a514e2085cf19822a5783345c613312a5.tar.bz2
linux-6597ac8a514e2085cf19822a5783345c613312a5.zip
Merge tag 'dm-4.2-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - DM core cleanups: * blk-mq request-based DM no longer uses any mempools now that partial completions are no longer handled as part of cloned requests - DM raid cleanups and support for MD raid0 - DM cache core advances and a new stochastic-multi-queue (smq) cache replacement policy * smq is the new default dm-cache policy - DM thinp cleanups and much more efficient large discard support - DM statistics support for request-based DM and nanosecond resolution timestamps - Fixes to DM stripe, DM log-writes, DM raid1 and DM crypt * tag 'dm-4.2-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (39 commits) dm stats: add support for request-based DM devices dm stats: collect and report histogram of IO latencies dm stats: support precise timestamps dm stats: fix divide by zero if 'number_of_areas' arg is zero dm cache: switch the "default" cache replacement policy from mq to smq dm space map metadata: fix occasional leak of a metadata block on resize dm thin metadata: fix a race when entering fail mode dm thin: fail messages with EOPNOTSUPP when pool cannot handle messages dm thin: range discard support dm thin metadata: add dm_thin_remove_range() dm thin metadata: add dm_thin_find_mapped_range() dm btree: add dm_btree_remove_leaves() dm stats: Use kvfree() in dm_kvfree() dm cache: age and write back cache entries even without active IO dm cache: prefix all DMERR and DMINFO messages with cache device name dm cache: add fail io mode and needs_check flag dm cache: wake the worker thread every time we free a migration object dm cache: add stochastic-multi-queue (smq) policy dm cache: boost promotion of blocks that will be overwritten dm cache: defer whole cells ...
-rw-r--r--Documentation/device-mapper/cache-policies.txt67
-rw-r--r--Documentation/device-mapper/cache.txt9
-rw-r--r--Documentation/device-mapper/dm-raid.txt2
-rw-r--r--Documentation/device-mapper/statistics.txt41
-rw-r--r--drivers/md/Kconfig12
-rw-r--r--drivers/md/Makefile2
-rw-r--r--drivers/md/dm-bio-prison.c26
-rw-r--r--drivers/md/dm-bio-prison.h13
-rw-r--r--drivers/md/dm-cache-metadata.c133
-rw-r--r--drivers/md/dm-cache-metadata.h10
-rw-r--r--drivers/md/dm-cache-policy-cleaner.c6
-rw-r--r--drivers/md/dm-cache-policy-internal.h52
-rw-r--r--drivers/md/dm-cache-policy-mq.c93
-rw-r--r--drivers/md/dm-cache-policy-smq.c1791
-rw-r--r--drivers/md/dm-cache-policy.h30
-rw-r--r--drivers/md/dm-cache-target.c832
-rw-r--r--drivers/md/dm-crypt.c30
-rw-r--r--drivers/md/dm-log-writes.c4
-rw-r--r--drivers/md/dm-raid.c225
-rw-r--r--drivers/md/dm-raid1.c75
-rw-r--r--drivers/md/dm-stats.c341
-rw-r--r--drivers/md/dm-stats.h4
-rw-r--r--drivers/md/dm-stripe.c4
-rw-r--r--drivers/md/dm-table.c4
-rw-r--r--drivers/md/dm-thin-metadata.c124
-rw-r--r--drivers/md/dm-thin-metadata.h11
-rw-r--r--drivers/md/dm-thin.c612
-rw-r--r--drivers/md/dm.c190
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c6
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h1
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c127
-rw-r--r--drivers/md/persistent-data/dm-btree.h9
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c50
33 files changed, 4213 insertions, 723 deletions
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt
index 0d124a971801..d9246a32e673 100644
--- a/Documentation/device-mapper/cache-policies.txt
+++ b/Documentation/device-mapper/cache-policies.txt
@@ -25,10 +25,10 @@ trying to see when the io scheduler has let the ios run.
Overview of supplied cache replacement policies
===============================================
-multiqueue
-----------
+multiqueue (mq)
+---------------
-This policy is the default.
+This policy has been deprecated in favor of the smq policy (see below).
The multiqueue policy has three sets of 16 queues: one set for entries
waiting for the cache and another two for those in the cache (a set for
@@ -73,6 +73,67 @@ If you're trying to quickly warm a new cache device you may wish to
reduce these to encourage promotion. Remember to switch them back to
their defaults after the cache fills though.
+Stochastic multiqueue (smq)
+---------------------------
+
+This policy is the default.
+
+The stochastic multi-queue (smq) policy addresses some of the problems
+with the multiqueue (mq) policy.
+
+The smq policy (vs mq) offers the promise of less memory utilization,
+improved performance and increased adaptability in the face of changing
+workloads. SMQ also does not have any cumbersome tuning knobs.
+
+Users may switch from "mq" to "smq" simply by appropriately reloading a
+DM table that is using the cache target. Doing so will cause all of the
+mq policy's hints to be dropped. Also, performance of the cache may
+degrade slightly until smq recalculates the origin device's hotspots
+that should be cached.
+
+Memory usage:
+The mq policy uses a lot of memory; 88 bytes per cache block on a 64
+bit machine.
+
+SMQ uses 28bit indexes to implement it's data structures rather than
+pointers. It avoids storing an explicit hit count for each block. It
+has a 'hotspot' queue rather than a pre cache which uses a quarter of
+the entries (each hotspot block covers a larger area than a single
+cache block).
+
+All these mean smq uses ~25bytes per cache block. Still a lot of
+memory, but a substantial improvement nontheless.
+
+Level balancing:
+MQ places entries in different levels of the multiqueue structures
+based on their hit count (~ln(hit count)). This means the bottom
+levels generally have the most entries, and the top ones have very
+few. Having unbalanced levels like this reduces the efficacy of the
+multiqueue.
+
+SMQ does not maintain a hit count, instead it swaps hit entries with
+the least recently used entry from the level above. The over all
+ordering being a side effect of this stochastic process. With this
+scheme we can decide how many entries occupy each multiqueue level,
+resulting in better promotion/demotion decisions.
+
+Adaptability:
+The MQ policy maintains a hit count for each cache block. For a
+different block to get promoted to the cache it's hit count has to
+exceed the lowest currently in the cache. This means it can take a
+long time for the cache to adapt between varying IO patterns.
+Periodically degrading the hit counts could help with this, but I
+haven't found a nice general solution.
+
+SMQ doesn't maintain hit counts, so a lot of this problem just goes
+away. In addition it tracks performance of the hotspot queue, which
+is used to decide which blocks to promote. If the hotspot queue is
+performing badly then it starts moving entries more quickly between
+levels. This lets it adapt to new IO patterns very quickly.
+
+Performance:
+Testing SMQ shows substantially better performance than MQ.
+
cleaner
-------
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index 68c0f517c60e..82960cffbad3 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -221,6 +221,7 @@ Status
<#read hits> <#read misses> <#write hits> <#write misses>
<#demotions> <#promotions> <#dirty> <#features> <features>*
<#core args> <core args>* <policy name> <#policy args> <policy args>*
+<cache metadata mode>
metadata block size : Fixed block size for each metadata block in
sectors
@@ -251,8 +252,12 @@ core args : Key/value pairs for tuning the core
e.g. migration_threshold
policy name : Name of the policy
#policy args : Number of policy arguments to follow (must be even)
-policy args : Key/value pairs
- e.g. sequential_threshold
+policy args : Key/value pairs e.g. sequential_threshold
+cache metadata mode : ro if read-only, rw if read-write
+ In serious cases where even a read-only mode is deemed unsafe
+ no further I/O will be permitted and the status will just
+ contain the string 'Fail'. The userspace recovery tools
+ should then be used.
Messages
--------
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index ef8ba9fa58c4..cb12af3b51c2 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -224,3 +224,5 @@ Version History
New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
1.5.1 Add ability to restore transiently failed devices on resume.
1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
+1.6.0 Add discard support (and devices_handle_discard_safely module param).
+1.7.0 Add support for MD RAID0 mappings.
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
index 2a1673adc200..4919b2dfd1b3 100644
--- a/Documentation/device-mapper/statistics.txt
+++ b/Documentation/device-mapper/statistics.txt
@@ -13,9 +13,14 @@ the range specified.
The I/O statistics counters for each step-sized area of a region are
in the same format as /sys/block/*/stat or /proc/diskstats (see:
Documentation/iostats.txt). But two extra counters (12 and 13) are
-provided: total time spent reading and writing in milliseconds. All
-these counters may be accessed by sending the @stats_print message to
-the appropriate DM device via dmsetup.
+provided: total time spent reading and writing. When the histogram
+argument is used, the 14th parameter is reported that represents the
+histogram of latencies. All these counters may be accessed by sending
+the @stats_print message to the appropriate DM device via dmsetup.
+
+The reported times are in milliseconds and the granularity depends on
+the kernel ticks. When the option precise_timestamps is used, the
+reported times are in nanoseconds.
Each region has a corresponding unique identifier, which we call a
region_id, that is assigned when the region is created. The region_id
@@ -33,7 +38,9 @@ memory is used by reading
Messages
========
- @stats_create <range> <step> [<program_id> [<aux_data>]]
+ @stats_create <range> <step>
+ [<number_of_optional_arguments> <optional_arguments>...]
+ [<program_id> [<aux_data>]]
Create a new region and return the region_id.
@@ -48,6 +55,29 @@ Messages
"/<number_of_areas>" - the range is subdivided into the specified
number of areas.
+ <number_of_optional_arguments>
+ The number of optional arguments
+
+ <optional_arguments>
+ The following optional arguments are supported
+ precise_timestamps - use precise timer with nanosecond resolution
+ instead of the "jiffies" variable. When this argument is
+ used, the resulting times are in nanoseconds instead of
+ milliseconds. Precise timestamps are a little bit slower
+ to obtain than jiffies-based timestamps.
+ histogram:n1,n2,n3,n4,... - collect histogram of latencies. The
+ numbers n1, n2, etc are times that represent the boundaries
+ of the histogram. If precise_timestamps is not used, the
+ times are in milliseconds, otherwise they are in
+ nanoseconds. For each range, the kernel will report the
+ number of requests that completed within this range. For
+ example, if we use "histogram:10,20,30", the kernel will
+ report four numbers a:b:c:d. a is the number of requests
+ that took 0-10 ms to complete, b is the number of requests
+ that took 10-20 ms to complete, c is the number of requests
+ that took 20-30 ms to complete and d is the number of
+ requests that took more than 30 ms to complete.
+
<program_id>
An optional parameter. A name that uniquely identifies
the userspace owner of the range. This groups ranges together
@@ -55,6 +85,9 @@ Messages
created and ignore those created by others.
The kernel returns this string back in the output of
@stats_list message, but it doesn't use it for anything else.
+ If we omit the number of optional arguments, program id must not
+ be a number, otherwise it would be interpreted as the number of
+ optional arguments.
<aux_data>
An optional parameter. A word that provides auxiliary data
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index edcf4ab66e00..b59727309072 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -304,6 +304,18 @@ config DM_CACHE_MQ
This is meant to be a general purpose policy. It prioritises
reads over writes.
+config DM_CACHE_SMQ
+ tristate "Stochastic MQ Cache Policy (EXPERIMENTAL)"
+ depends on DM_CACHE
+ default y
+ ---help---
+ A cache policy that uses a multiqueue ordered by recent hits
+ to select which blocks should be promoted and demoted.
+ This is meant to be a general purpose policy. It prioritises
+ reads over writes. This SMQ policy (vs MQ) offers the promise
+ of less memory utilization, improved performance and increased
+ adaptability in the face of changing workloads.
+
config DM_CACHE_CLEANER
tristate "Cleaner Cache Policy (EXPERIMENTAL)"
depends on DM_CACHE
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index dba4db5985fb..462f443a4f85 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -13,6 +13,7 @@ dm-log-userspace-y \
dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
dm-cache-mq-y += dm-cache-policy-mq.o
+dm-cache-smq-y += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y += dm-era-target.o
md-mod-y += md.o bitmap.o
@@ -54,6 +55,7 @@ obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
obj-$(CONFIG_DM_CACHE) += dm-cache.o
obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o
+obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o
obj-$(CONFIG_DM_ERA) += dm-era.o
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index be065300e93c..cd6d1d21e057 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -255,6 +255,32 @@ void dm_cell_visit_release(struct dm_bio_prison *prison,
}
EXPORT_SYMBOL_GPL(dm_cell_visit_release);
+static int __promote_or_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell)
+{
+ if (bio_list_empty(&cell->bios)) {
+ rb_erase(&cell->node, &prison->cells);
+ return 1;
+ }
+
+ cell->holder = bio_list_pop(&cell->bios);
+ return 0;
+}
+
+int dm_cell_promote_or_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell)
+{
+ int r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __promote_or_release(prison, cell);
+ spin_unlock_irqrestore(&prison->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_promote_or_release);
+
/*----------------------------------------------------------------*/
#define DEFERRED_SET_SIZE 64
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 74cf01144b1f..54352f009bfd 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -101,6 +101,19 @@ void dm_cell_visit_release(struct dm_bio_prison *prison,
void (*visit_fn)(void *, struct dm_bio_prison_cell *),
void *context, struct dm_bio_prison_cell *cell);
+/*
+ * Rather than always releasing the prisoners in a cell, the client may
+ * want to promote one of them to be the new holder. There is a race here
+ * though between releasing an empty cell, and other threads adding new
+ * inmates. So this function makes the decision with its lock held.
+ *
+ * This function can have two outcomes:
+ * i) An inmate is promoted to be the holder of the cell (return value of 0).
+ * ii) The cell has no inmate for promotion and is released (return value of 1).
+ */
+int dm_cell_promote_or_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell);
+
/*----------------------------------------------------------------*/
/*
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index c1c010498a21..20cc36b01b77 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -39,6 +39,8 @@
enum superblock_flag_bits {
/* for spotting crashes that would invalidate the dirty bitset */
CLEAN_SHUTDOWN,
+ /* metadata must be checked using the tools */
+ NEEDS_CHECK,
};
/*
@@ -107,6 +109,7 @@ struct dm_cache_metadata {
struct dm_disk_bitset discard_info;
struct rw_semaphore root_lock;
+ unsigned long flags;
dm_block_t root;
dm_block_t hint_root;
dm_block_t discard_root;
@@ -129,6 +132,14 @@ struct dm_cache_metadata {
* buffer before the superblock is locked and updated.
*/
__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+
+ /*
+ * Set if a transaction has to be aborted but the attempt to roll
+ * back to the previous (good) transaction failed. The only
+ * metadata operation permissible in this state is the closing of
+ * the device.
+ */
+ bool fail_io:1;
};
/*-------------------------------------------------------------------
@@ -527,6 +538,7 @@ static unsigned long clear_clean_shutdown(unsigned long flags)
static void read_superblock_fields(struct dm_cache_metadata *cmd,
struct cache_disk_superblock *disk_super)
{
+ cmd->flags = le32_to_cpu(disk_super->flags);
cmd->root = le64_to_cpu(disk_super->mapping_root);
cmd->hint_root = le64_to_cpu(disk_super->hint_root);
cmd->discard_root = le64_to_cpu(disk_super->discard_root);
@@ -625,6 +637,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
if (mutator)
update_flags(disk_super, mutator);
+ disk_super->flags = cpu_to_le32(cmd->flags);
disk_super->mapping_root = cpu_to_le64(cmd->root);
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
@@ -693,6 +706,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
cmd->cache_blocks = 0;
cmd->policy_hint_size = policy_hint_size;
cmd->changed = true;
+ cmd->fail_io = false;
r = __create_persistent_data_objects(cmd, may_format_device);
if (r) {
@@ -796,7 +810,8 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
list_del(&cmd->list);
mutex_unlock(&table_lock);
- __destroy_persistent_data_objects(cmd);
+ if (!cmd->fail_io)
+ __destroy_persistent_data_objects(cmd);
kfree(cmd);
}
}
@@ -848,13 +863,26 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
return 0;
}
+#define WRITE_LOCK(cmd) \
+ if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \
+ return -EINVAL; \
+ down_write(&cmd->root_lock)
+
+#define WRITE_LOCK_VOID(cmd) \
+ if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \
+ return; \
+ down_write(&cmd->root_lock)
+
+#define WRITE_UNLOCK(cmd) \
+ up_write(&cmd->root_lock)
+
int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
{
int r;
bool clean;
__le64 null_mapping = pack_value(0, 0);
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
__dm_bless_for_disk(&null_mapping);
if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
@@ -880,7 +908,7 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
cmd->changed = true;
out:
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -891,7 +919,7 @@ int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = dm_bitset_resize(&cmd->discard_info,
cmd->discard_root,
from_dblock(cmd->discard_nr_blocks),
@@ -903,7 +931,7 @@ int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
}
cmd->changed = true;
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -946,9 +974,9 @@ int dm_cache_set_discard(struct dm_cache_metadata *cmd,
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __discard(cmd, dblock, discard);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -1020,9 +1048,9 @@ int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __remove(cmd, cblock);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -1048,9 +1076,9 @@ int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __insert(cmd, cblock, oblock);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -1234,9 +1262,9 @@ int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __dirty(cmd, cblock, dirty);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -1252,9 +1280,9 @@ void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
struct dm_cache_statistics *stats)
{
- down_write(&cmd->root_lock);
+ WRITE_LOCK_VOID(cmd);
cmd->stats = *stats;
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
}
int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
@@ -1263,7 +1291,7 @@ int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
clear_clean_shutdown);
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __commit_transaction(cmd, mutator);
if (r)
goto out;
@@ -1271,7 +1299,7 @@ int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
r = __begin_transaction(cmd);
out:
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -1376,9 +1404,9 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = write_hints(cmd, policy);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
@@ -1387,3 +1415,70 @@ int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
{
return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
}
+
+void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
+{
+ WRITE_LOCK_VOID(cmd);
+ dm_bm_set_read_only(cmd->bm);
+ WRITE_UNLOCK(cmd);
+}
+
+void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
+{
+ WRITE_LOCK_VOID(cmd);
+ dm_bm_set_read_write(cmd->bm);
+ WRITE_UNLOCK(cmd);
+}
+
+int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
+{
+ int r;
+ struct dm_block *sblock;
+ struct cache_disk_superblock *disk_super;
+
+ /*
+ * We ignore fail_io for this function.
+ */
+ down_write(&cmd->root_lock);
+ set_bit(NEEDS_CHECK, &cmd->flags);
+
+ r = superblock_lock(cmd, &sblock);
+ if (r) {
+ DMERR("couldn't read superblock");
+ goto out;
+ }
+
+ disk_super = dm_block_data(sblock);
+ disk_super->flags = cpu_to_le32(cmd->flags);
+
+ dm_bm_unlock(sblock);
+
+out:
+ up_write(&cmd->root_lock);
+ return r;
+}
+
+bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd)
+{
+ bool needs_check;
+
+ down_read(&cmd->root_lock);
+ needs_check = !!test_bit(NEEDS_CHECK, &cmd->flags);
+ up_read(&cmd->root_lock);
+
+ return needs_check;
+}
+
+int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
+{
+ int r;
+
+ WRITE_LOCK(cmd);
+ __destroy_persistent_data_objects(cmd);
+ r = __create_persistent_data_objects(cmd, false);
+ if (r)
+ cmd->fail_io = true;
+ WRITE_UNLOCK(cmd);
+
+ return r;
+}
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 4ecc403be283..2ffee21f318d 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -102,6 +102,10 @@ struct dm_cache_statistics {
void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
struct dm_cache_statistics *stats);
+
+/*
+ * 'void' because it's no big deal if it fails.
+ */
void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
struct dm_cache_statistics *stats);
@@ -133,6 +137,12 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *
*/
int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result);
+bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd);
+int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd);
+void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd);
+void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd);
+int dm_cache_metadata_abort(struct dm_cache_metadata *cmd);
+
/*----------------------------------------------------------------*/
#endif /* DM_CACHE_METADATA_H */
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
index b04d1f904d07..240c9f0e85e7 100644
--- a/drivers/md/dm-cache-policy-cleaner.c
+++ b/drivers/md/dm-cache-policy-cleaner.c
@@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e)
/* Public interface (see dm-cache-policy.h */
static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
- struct bio *bio, struct policy_result *result)
+ struct bio *bio, struct policy_locker *locker,
+ struct policy_result *result)
{
struct policy *p = to_policy(pe);
struct wb_cache_entry *e;
@@ -358,7 +359,8 @@ static struct wb_cache_entry *get_next_dirty_entry(struct policy *p)
static int wb_writeback_work(struct dm_cache_policy *pe,
dm_oblock_t *oblock,
- dm_cblock_t *cblock)
+ dm_cblock_t *cblock,
+ bool critical_only)
{
int r = -ENOENT;
struct policy *p = to_policy(pe);
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
index 2256a1f24f73..2816018faa7f 100644
--- a/drivers/md/dm-cache-policy-internal.h
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -7,6 +7,7 @@
#ifndef DM_CACHE_POLICY_INTERNAL_H
#define DM_CACHE_POLICY_INTERNAL_H
+#include <linux/vmalloc.h>
#include "dm-cache-policy.h"
/*----------------------------------------------------------------*/
@@ -16,9 +17,10 @@
*/
static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
- struct bio *bio, struct policy_result *result)
+ struct bio *bio, struct policy_locker *locker,
+ struct policy_result *result)
{
- return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result);
+ return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result);
}
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
@@ -54,9 +56,10 @@ static inline int policy_walk_mappings(struct dm_cache_policy *p,
static inline int policy_writeback_work(struct dm_cache_policy *p,
dm_oblock_t *oblock,
- dm_cblock_t *cblock)
+ dm_cblock_t *cblock,
+ bool critical_only)
{
- return p->writeback_work ? p->writeback_work(p, oblock, cblock) : -ENOENT;
+ return p->writeback_work ? p->writeback_work(p, oblock, cblock, critical_only) : -ENOENT;
}
static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
@@ -80,19 +83,21 @@ static inline dm_cblock_t policy_residency(struct dm_cache_policy *p)
return p->residency(p);
}
-static inline void policy_tick(struct dm_cache_policy *p)
+static inline void policy_tick(struct dm_cache_policy *p, bool can_block)
{
if (p->tick)
- return p->tick(p);
+ return p->tick(p, can_block);
}
-static inline int policy_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen)
+static inline int policy_emit_config_values(struct dm_cache_policy *p, char *result,
+