diff options
24 files changed, 1634 insertions, 580 deletions
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt index 66c2774c0c64..0d124a971801 100644 --- a/Documentation/device-mapper/cache-policies.txt +++ b/Documentation/device-mapper/cache-policies.txt @@ -47,20 +47,26 @@ Message and constructor argument pairs are: 'discard_promote_adjustment <value>' The sequential threshold indicates the number of contiguous I/Os -required before a stream is treated as sequential. The random threshold +required before a stream is treated as sequential. Once a stream is +considered sequential it will bypass the cache. The random threshold is the number of intervening non-contiguous I/Os that must be seen before the stream is treated as random again. The sequential and random thresholds default to 512 and 4 respectively. -Large, sequential ios are probably better left on the origin device -since spindles tend to have good bandwidth. The io_tracker counts -contiguous I/Os to try to spot when the io is in one of these sequential -modes. - -Internally the mq policy maintains a promotion threshold variable. If -the hit count of a block not in the cache goes above this threshold it -gets promoted to the cache. The read, write and discard promote adjustment +Large, sequential I/Os are probably better left on the origin device +since spindles tend to have good sequential I/O bandwidth. The +io_tracker counts contiguous I/Os to try to spot when the I/O is in one +of these sequential modes. But there are use-cases for wanting to +promote sequential blocks to the cache (e.g. fast application startup). +If sequential threshold is set to 0 the sequential I/O detection is +disabled and sequential I/O will no longer implicitly bypass the cache. +Setting the random threshold to 0 does _not_ disable the random I/O +stream detection. + +Internally the mq policy determines a promotion threshold. If the hit +count of a block not in the cache goes above this threshold it gets +promoted to the cache. The read, write and discard promote adjustment tunables allow you to tweak the promotion threshold by adding a small value based on the io type. They default to 4, 8 and 1 respectively. If you're trying to quickly warm a new cache device you may wish to diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index f752d12081ff..be065300e93c 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -14,68 +14,38 @@ /*----------------------------------------------------------------*/ -struct bucket { - spinlock_t lock; - struct hlist_head cells; -}; +#define MIN_CELLS 1024 struct dm_bio_prison { + spinlock_t lock; mempool_t *cell_pool; - - unsigned nr_buckets; - unsigned hash_mask; - struct bucket *buckets; + struct rb_root cells; }; -/*----------------------------------------------------------------*/ - -static uint32_t calc_nr_buckets(unsigned nr_cells) -{ - uint32_t n = 128; - - nr_cells /= 4; - nr_cells = min(nr_cells, 8192u); - - while (n < nr_cells) - n <<= 1; - - return n; -} - static struct kmem_cache *_cell_cache; -static void init_bucket(struct bucket *b) -{ - spin_lock_init(&b->lock); - INIT_HLIST_HEAD(&b->cells); -} +/*----------------------------------------------------------------*/ /* * @nr_cells should be the number of cells you want in use _concurrently_. * Don't confuse it with the number of distinct keys. */ -struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) +struct dm_bio_prison *dm_bio_prison_create(void) { - unsigned i; - uint32_t nr_buckets = calc_nr_buckets(nr_cells); - size_t len = sizeof(struct dm_bio_prison) + - (sizeof(struct bucket) * nr_buckets); - struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); + struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); if (!prison) return NULL; - prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); + spin_lock_init(&prison->lock); + + prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); if (!prison->cell_pool) { kfree(prison); return NULL; } - prison->nr_buckets = nr_buckets; - prison->hash_mask = nr_buckets - 1; - prison->buckets = (struct bucket *) (prison + 1); - for (i = 0; i < nr_buckets; i++) - init_bucket(prison->buckets + i); + prison->cells = RB_ROOT; return prison; } @@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison, } EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); -static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) +static void __setup_new_cell(struct dm_cell_key *key, + struct bio *holder, + struct dm_bio_prison_cell *cell) { - const unsigned long BIG_PRIME = 4294967291UL; - uint64_t hash = key->block * BIG_PRIME; - - return (uint32_t) (hash & prison->hash_mask); + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = holder; + bio_list_init(&cell->bios); } -static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) +static int cmp_keys(struct dm_cell_key *lhs, + struct dm_cell_key *rhs) { - return (lhs->virtual == rhs->virtual) && - (lhs->dev == rhs->dev) && - (lhs->block == rhs->block); -} + if (lhs->virtual < rhs->virtual) + return -1; -static struct bucket *get_bucket(struct dm_bio_prison *prison, - struct dm_cell_key *key) -{ - return prison->buckets + hash_key(prison, key); -} + if (lhs->virtual > rhs->virtual) + return 1; -static struct dm_bio_prison_cell *__search_bucket(struct bucket *b, - struct dm_cell_key *key) -{ - struct dm_bio_prison_cell *cell; + if (lhs->dev < rhs->dev) + return -1; - hlist_for_each_entry(cell, &b->cells, list) - if (keys_equal(&cell->key, key)) - return cell; + if (lhs->dev > rhs->dev) + return 1; - return NULL; -} + if (lhs->block_end <= rhs->block_begin) + return -1; -static void __setup_new_cell(struct bucket *b, - struct dm_cell_key *key, - struct bio *holder, - struct dm_bio_prison_cell *cell) -{ - memcpy(&cell->key, key, sizeof(cell->key)); - cell->holder = holder; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, &b->cells); + if (lhs->block_begin >= rhs->block_end) + return 1; + + return 0; } -static int __bio_detain(struct bucket *b, +static int __bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell *cell_prealloc, struct dm_bio_prison_cell **cell_result) { - struct dm_bio_prison_cell *cell; - - cell = __search_bucket(b, key); - if (cell) { - if (inmate) - bio_list_add(&cell->bios, inmate); - *cell_result = cell; - return 1; + int r; + struct rb_node **new = &prison->cells.rb_node, *parent = NULL; + + while (*new) { + struct dm_bio_prison_cell *cell = + container_of(*new, struct dm_bio_prison_cell, node); + + r = cmp_keys(key, &cell->key); + + parent = *new; + if (r < 0) + new = &((*new)->rb_left); + else if (r > 0) + new = &((*new)->rb_right); + else { + if (inmate) + bio_list_add(&cell->bios, inmate); + *cell_result = cell; + return 1; + } } - __setup_new_cell(b, key, inmate, cell_prealloc); + __setup_new_cell(key, inmate, cell_prealloc); *cell_result = cell_prealloc; + + rb_link_node(&cell_prealloc->node, parent, new); + rb_insert_color(&cell_prealloc->node, &prison->cells); + return 0; } @@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison, { int r; unsigned long flags; - struct bucket *b = get_bucket(prison, key); - spin_lock_irqsave(&b->lock, flags); - r = __bio_detain(b, key, inmate, cell_prealloc, cell_result); - spin_unlock_irqrestore(&b->lock, flags); + spin_lock_irqsave(&prison->lock, flags); + r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); return r; } @@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell); /* * @inmates must have been initialised prior to this call */ -static void __cell_release(struct dm_bio_prison_cell *cell, +static void __cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, struct bio_list *inmates) { - hlist_del(&cell->list); + rb_erase(&cell->node, &prison->cells); if (inmates) { if (cell->holder) @@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison, struct bio_list *bios) { unsigned long flags; - struct bucket *b = get_bucket(prison, &cell->key); - spin_lock_irqsave(&b->lock, flags); - __cell_release(cell, bios); - spin_unlock_irqrestore(&b->lock, flags); + spin_lock_irqsave(&prison->lock, flags); + __cell_release(prison, cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); } EXPORT_SYMBOL_GPL(dm_cell_release); /* * Sometimes we don't want the holder, just the additional bios. */ -static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, +static void __cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, struct bio_list *inmates) { - hlist_del(&cell->list); + rb_erase(&cell->node, &prison->cells); bio_list_merge(inmates, &cell->bios); } @@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, struct bio_list *inmates) { unsigned long flags; - struct bucket *b = get_bucket(prison, &cell->key); - spin_lock_irqsave(&b->lock, flags); - __cell_release_no_holder(cell, inmates); - spin_unlock_irqrestore(&b->lock, flags); + spin_lock_irqsave(&prison->lock, flags); + __cell_release_no_holder(prison, cell, inmates); + spin_unlock_irqrestore(&prison->lock, flags); } EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); @@ -267,6 +241,20 @@ void dm_cell_error(struct dm_bio_prison *prison, } EXPORT_SYMBOL_GPL(dm_cell_error); +void dm_cell_visit_release(struct dm_bio_prison *prison, + void (*visit_fn)(void *, struct dm_bio_prison_cell *), + void *context, + struct dm_bio_prison_cell *cell) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + visit_fn(context, cell); + rb_erase(&cell->node, &prison->cells); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_visit_release); + /*----------------------------------------------------------------*/ #define DEFERRED_SET_SIZE 64 diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 6805a142b750..74cf01144b1f 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -10,8 +10,8 @@ #include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ #include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ -#include <linux/list.h> #include <linux/bio.h> +#include <linux/rbtree.h> /*----------------------------------------------------------------*/ @@ -23,11 +23,14 @@ */ struct dm_bio_prison; -/* FIXME: this needs to be more abstract */ +/* + * Keys define a range of blocks within either a virtual or physical + * device. + */ struct dm_cell_key { int virtual; dm_thin_id dev; - dm_block_t block; + dm_block_t block_begin, block_end; }; /* @@ -35,13 +38,15 @@ struct dm_cell_key { * themselves. */ struct dm_bio_prison_cell { - struct hlist_node list; + struct list_head user_list; /* for client use */ + struct rb_node node; + struct dm_cell_key key; struct bio *holder; struct bio_list bios; }; -struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); +struct dm_bio_prison *dm_bio_prison_create(void); void dm_bio_prison_destroy(struct dm_bio_prison *prison); /* @@ -57,7 +62,7 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison, struct dm_bio_prison_cell *cell); /* - * Creates, or retrieves a cell for the given key. + * Creates, or retrieves a cell that overlaps the given key. * * Returns 1 if pre-existing cell returned, zero if new cell created using * @cell_prealloc. @@ -68,7 +73,8 @@ int dm_get_cell(struct dm_bio_prison *prison, struct dm_bio_prison_cell **cell_result); /* - * An atomic op that combines retrieving a cell, and adding a bio to it. + * An atomic op that combines retrieving or creating a cell, and adding a + * bio to it. * * Returns 1 if the cell was already held, 0 if @inmate is the new holder. */ @@ -87,6 +93,14 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, void dm_cell_error(struct dm_bio_prison *prison, struct dm_bio_prison_cell *cell, int error); +/* + * Visits the cell and then releases. Guarantees no new inmates are + * inserted between the visit and release. + */ +void dm_cell_visit_release(struct dm_bio_prison *prison, + void (*visit_fn)(void *, struct dm_bio_prison_cell *), + void *context, struct dm_bio_prison_cell *cell); + /*----------------------------------------------------------------*/ /* diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index afe79719ea32..c33b49792b87 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -14,6 +14,7 @@ #include <linux/vmalloc.h> #include <linux/shrinker.h> #include <linux/module.h> +#include <linux/rbtree.h> #define DM_MSG_PREFIX "bufio" @@ -34,26 +35,23 @@ /* * Check buffer ages in this interval (seconds) */ -#define DM_BUFIO_WORK_TIMER_SECS 10 +#define DM_BUFIO_WORK_TIMER_SECS 30 /* * Free buffers when they are older than this (seconds) */ -#define DM_BUFIO_DEFAULT_AGE_SECS 60 +#define DM_BUFIO_DEFAULT_AGE_SECS 300 /* - * The number of bvec entries that are embedded directly in the buffer. - * If the chunk size is larger, dm-io is used to do the io. + * The nr of bytes of cached data to keep around. */ -#define DM_BUFIO_INLINE_VECS 16 +#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) /* - * Buffer hash + * The number of bvec entries that are embedded directly in the buffer. + * If the chunk size is larger, dm-io is used to do the io. */ -#define DM_BUFIO_HASH_BITS 20 -#define DM_BUFIO_HASH(block) \ - ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \ - ((1 << DM_BUFIO_HASH_BITS) - 1)) +#define DM_BUFIO_INLINE_VECS 16 /* * Don't try to use kmem_cache_alloc for blocks larger than this. @@ -106,7 +104,7 @@ struct dm_bufio_client { unsigned minimum_buffers; - struct hlist_head *cache_hash; + struct rb_root buffer_tree; wait_queue_head_t free_buffer_wait; int async_write_error; @@ -135,7 +133,7 @@ enum data_mode { }; struct dm_buffer { - struct hlist_node hash_list; + struct rb_node node; struct list_head lru_list; sector_t block; void *data; @@ -223,6 +221,7 @@ static DEFINE_SPINLOCK(param_spinlock); * Buffers are freed after this timeout */ static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; +static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; static unsigned long dm_bufio_peak_allocated; static unsigned long dm_bufio_allocated_kmem_cache; @@ -253,6 +252,53 @@ static LIST_HEAD(dm_bufio_all_clients); */ static DEFINE_MUTEX(dm_bufio_clients_lock); +/*---------------------------------------------------------------- + * A red/black tree acts as an index for all the buffers. + *--------------------------------------------------------------*/ +static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) +{ + struct rb_node *n = c->buffer_tree.rb_node; + struct dm_buffer *b; + + while (n) { + b = container_of(n, struct dm_buffer, node); + + if (b->block == block) + return b; + + n = (b->block < block) ? n->rb_left : n->rb_right; + } + + return NULL; +} + +static void __insert(struct dm_bufio_client *c, struct dm_buffer *b) +{ + struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL; + struct dm_buffer *found; + + while (*new) { + found = container_of(*new, struct dm_buffer, node); + + if (found->block == b->block) { + BUG_ON(found != b); + return; + } + + parent = *new; + new = (found->block < b->block) ? + &((*new)->rb_left) : &((*new)->rb_right); + } + + rb_link_node(&b->node, parent, new); + rb_insert_color(&b->node, &c->buffer_tree); +} + +static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) +{ + rb_erase(&b->node, &c->buffer_tree); +} + /*----------------------------------------------------------------*/ static void adjust_total_allocated(enum data_mode data_mode, long diff) @@ -434,7 +480,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) b->block = block; b->list_mode = dirty; list_add(&b->lru_list, &c->lru[dirty]); - hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]); + __insert(b->c, b); b->last_accessed = jiffies; } @@ -448,7 +494,7 @@ static void __unlink_buffer(struct dm_buffer *b) BUG_ON(!c->n_buffers[b->list_mode]); c->n_buffers[b->list_mode]--; - hlist_del(&b->hash_list); + __remove(b->c, b); list_del(&b->lru_list); } @@ -532,6 +578,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, end_io(&b->bio, r); } +static void inline_endio(struct bio *bio, int error) +{ + bio_end_io_t *end_fn = bio->bi_private; + + /* + * Reset the bio to free any attached resources + * (e.g. bio integrity profiles). + */ + bio_reset(bio); + + end_fn(bio, error); +} + static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, bio_end_io_t *end_io) { @@ -543,7 +602,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits; b->bio.bi_bdev = b->c->bdev; - b->bio.bi_end_io = end_io; + b->bio.bi_end_io = inline_endio; + /* + * Use of .bi_private isn't a problem here because + * the dm_buffer's inline bio is local to bufio. + */ + b->bio.bi_private = end_io; /* * We assume that if len >= PAGE_SIZE ptr is page-aligned. @@ -887,23 +951,6 @@ static void __check_watermark(struct dm_bufio_client *c, __write_dirty_buffers_async(c, 1, write_list); } -/* - * Find a buffer in the hash. - */ -static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) -{ - struct dm_buffer *b; - - hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)], - hash_list) { - dm_bufio_cond_resched(); - if (b->block == block) - return b; - } - - return NULL; -} - /*---------------------------------------------------------------- * Getting a buffer *--------------------------------------------------------------*/ @@ -1433,45 +1480,52 @@ static void drop_buffers(struct dm_bufio_client *c) } /* - * Test if the buffer is unused and too old, and commit it. + * We may not be able to evict this buffer if IO pending or the client + * is still using it. Caller is expected to know buffer is too old. + * * And if GFP_NOFS is used, we must not do any I/O because we hold * dm_bufio_clients_lock and we would risk deadlock if the I/O gets * rerouted to different bufio client. */ -static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, - unsigned long max_jiffies) +static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) { - if (jiffies - b->last_accessed < max_jiffies) - return 0; - if (!(gfp & __GFP_FS)) { if (test_bit(B_READING, &b->state) || test_bit(B_WRITING, &b->state) || test_bit(B_DIRTY, &b->state)) - return 0; + return false; } if (b->hold_count) - return 0; + return false; __make_buffer_clean(b); __unlink_buffer(b); __free_buffer_wake(b); - return 1; + return true; } -static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, - gfp_t gfp_mask) +static unsigned get_retain_buffers(struct dm_bufio_client *c) +{ + unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); + return retain_bytes / c->block_size; +} + +static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, + gfp_t gfp_mask) { int l; struct dm_buffer *b, *tmp; - long freed = 0; + unsigned long freed = 0; + unsigned long count = nr_to_scan; + unsigned retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { - freed += __cleanup_old_buffer(b, gfp_mask, 0); - if (!--nr_to_scan) + if (__try_evict_buffer(b, gfp_mask)) + freed++; + if (!--nr_to_scan || ((count - freed) <= retain_target)) return freed; dm_bufio_cond_resched(); } @@ -1533,11 +1587,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign r = -ENOMEM; goto bad_client; } - c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS); - if (!c->cache_hash) { - r = -ENOMEM; - goto bad_hash; - } + c->buffer_tree = RB_ROOT; c->bdev = bdev; c->block_size = block_size; @@ -1556,9 +1606,6 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign c->n_buffers[i] = 0; } - for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++) - INIT_HLIST_HEAD(&c->cache_hash[i]); - mutex_init(&c->lock); INIT_LIST_HEAD(&c->reserved_buffers); c->need_reserved_buffers = reserved_buffers; @@ -1632,8 +1679,6 @@ bad_cache: } dm_io_client_destroy(c->dm_io); bad_dm_io: - vfree(c->cache_hash); -bad_hash: kfree(c); bad_client: return ERR_PTR(r); @@ -1660,9 +1705,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) mutex_unlock(&dm_bufio_clients_lock); - for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++) - BUG_ON(!hlist_empty(&c->cache_hash[i])); - + BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree)); BUG_ON(c->need_reserved_buffers); while (!list_empty(&c->reserved_buffers)) { @@ -1680,36 +1723,60 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) BUG_ON(c->n_buffers[i]); dm_io_client_destroy(c->dm_io); - vfree(c->cache_hash); kfree(c); } EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); -static void cleanup_old_buffers(void) +static unsigned get_max_age_hz(void) { - unsigned long max_age = ACCESS_ONCE(dm_bufio_max_age); - struct dm_bufio_client *c; + unsigned max_age = ACCESS_ONCE(dm_bufio_max_age); - if (max_age > ULONG_MAX / HZ) - max_age = ULONG_MAX / HZ; + if (max_age > UINT_MAX / HZ) + max_age = UINT_MAX / HZ; - mutex_lock(&dm_bufio_clients_lock); - list_for_each_entry(c, &dm_bufio_all_clients, client_list) { - if (!dm_bufio_trylock(c)) - continue; + return max_age * HZ; +} - while (!list_empty(&c->lru[LIST_CLEAN])) { - struct dm_buffer *b; - b = list_entry(c->lru[LIST_CLEAN].prev, - struct dm_buffer, lru_list); - if (!__cleanup_old_buffer(b, 0, max_age * HZ)) - break; - dm_bufio_cond_resched(); - } +static bool older_than(struct dm_buffer *b, unsigned long age_hz) +{ + return (jiffies - b->last_accessed) >= age_hz; +} + +static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) +{ + struct dm_buffer *b, *tmp; + unsigned retain_target = get_retain_buffers(c); + unsigned count; + + dm_bufio_lock(c); + + count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) { + if (count <= retain_target) + break; + + if (!older_than(b, age_hz)) + break; + + if (__try_evict_buffer(b, 0)) + count--; - dm_bufio_unlock(c); dm_bufio_cond_resched(); } + + dm_bufio_unlock(c); +} + +static void cleanup_old_buffers(void) +{ + unsigned long max_age_hz = get_max_age_hz(); + struct dm_bufio_client *c; + + mutex_lock(&dm_bufio_clients_lock); + + list_for_each_entry(c, &dm_bufio_all_clients, client_list) + __evict_old_buffers(c, max_age_hz); + mutex_unlock(&dm_bufio_clients_lock); } @@ -1834,6 +1901,9 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); +module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); + module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h index aac0e2df06be..bed4ad4e1b7c 100644 --- a/drivers/md/dm-cache-block-types.h +++ b/drivers/md/dm-cache-block-types.h @@ -19,6 +19,7 @@ typedef dm_block_t __bitwise__ dm_oblock_t; typedef uint32_t __bitwise__ dm_cblock_t; +typedef dm_block_t __bitwise__ dm_dblock_t; static inline dm_oblock_t to_oblock(dm_block_t b) { @@ -40,4 +41,14 @@ static inline uint32_t from_cblock(dm_cblock_t b) return (__force uint32_t) b; } +static inline dm_dblock_t to_dblock(dm_block_t b) +{ + return (__force dm_dblock_t) b; +} + +static inline dm_block_t from_dblock(dm_dblock_t b) +{ + return (__force dm_block_t) b; +} + #endif /* DM_CACHE_BLOCK_TYPES_H */ diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 06709257adde..9fc616c2755e 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -109,7 +109,7 @@ struct dm_cache_metadata { dm_block_t discard_root; sector_t discard_block_size; - dm_oblock_t discard_nr_blocks; + dm_dblock_t discard_nr_blocks; sector_t data_block_size; dm_cblock_t cache_blocks; @@ -329,7 +329,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->hint_root = cpu_to_le64(cmd->hint_root); disk_super->discard_root = cpu_to_le64(cmd->discard_root); disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); - disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); + disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); disk_super->cache_blocks = cpu_to_le32(0); @@ -528,7 +528,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, cmd->hint_root = le64_to_cpu(disk_super->hint_root); cmd->discard_root = le64_to_cpu(disk_super->discard_root); cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); - cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks)); + cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); @@ -626,7 +626,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->hint_root = cpu_to_le64(cmd->hint_root); disk_super->discard_root = cpu_to_le64(cmd->discard_root); disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); - disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); + disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); @@ -797,15 +797,15 @@ out: int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, sector_t discard_block_size, - dm_oblock_t new_nr_entries) + dm_dblock_t new_nr_entries) { int r; down_write(&cmd->root_lock); r = dm_bitset_resize(&cmd->discard_info, cmd->discard_root, - from_oblock(cmd->discard_nr_blocks), - from_oblock(new_nr_entries), + from_dblock(cmd->discard_nr_blocks), + from_dblock(new_nr_entries), false, &cmd->discard_root); if |
