// SPDX-License-Identifier: GPL-2.0-or-later
/*
* zswap.c - zswap driver file
*
* zswap is a backend for frontswap that takes pages that are in the process
* of being swapped out and attempts to compress and store them in a
* RAM-based memory pool. This can result in a significant I/O reduction on
* the swap device and, in the case where decompressing from RAM is faster
* than reading from the swap device, can also improve workload performance.
*
* Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/frontswap.h>
#include <linux/rbtree.h>
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/mempool.h>
#include <linux/zpool.h>
#include <crypto/acompress.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
#include <linux/swapops.h>
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/workqueue.h>
#include "swap.h"
/*********************************
* statistics
**********************************/
/* Total bytes used by the compressed storage */
u64 zswap_pool_total_size;
/* The number of compressed pages currently stored in zswap */
atomic_t zswap_stored_pages = ATOMIC_INIT(0);
/* The number of same-value filled pages currently stored in zswap */
static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
/*
* The statistics below are not protected from concurrent access for
* performance reasons so they may not be a 100% accurate. However,
* they do provide useful information on roughly how many times a
* certain event is occurring.
*/
/* Pool limit was hit (see zswap_max_pool_percent) */
static u64 zswap_pool_limit_hit;
/* Pages written back when pool limit was reached */
static u64 zswap_written_back_pages;
/* Store failed due to a reclaim failure after pool limit was reached */
static u64 zswap_reject_reclaim_fail;
/* Compressed page was too big for the allocator to (optimally) store */
static u64 zswap_reject_compress_poor;
/* Store failed because underlying allocator could not get memory */
static u64 zswap_reject_alloc_fail;
/* Store failed because the entry metadata could not be allocated (rare) */
static u64 zswap_reject_kmemcache_fail;
/* Duplicate store was encountered (rare) */
static u64 zswap_duplicate_entry;
/* Shrinker work queue */
static struct workqueue_struct *shrink_wq;
/* Pool limit was hit, we need to calm down */
static bool zswap_pool_reached_full;
/*********************************
* tunables
**********************************/
#define ZSWAP_PARAM_UNSET ""
/* Enable/disable zswap */
static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
static int zswap_enabled_param_set(const char *,
const struct kernel_param *);
static const struct kernel_param_ops zswap_enabled_param_ops = {
.set = zswap_enabled_param_set,
.get = param_get_bool,
};
module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
/* Crypto compressor to use */
static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
static int zswap_compressor_param_set(const char *,
const struct kernel_param *);
static const struct kernel_param_ops zswap_compressor_param_ops = {
.set = zswap_compressor_param_set,
.get = param_get_charp,
.free = param_free_charp,
};
module_param_cb(compressor, &zswap_compressor_param_ops,
&zswap_compressor, 0644);
/* Compressed storage zpool to use */
static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
static int zswap_zpool_param_set(const char *, const struct kernel_param *);
static const struct kernel_param_ops zswap_zpool_param_ops = {
.set = zswap_zpool_param_set,
.get = param_get_charp,
.free = param_free_charp,
};
module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
/* The maximum percentage of memory that the compressed pool can occupy */
static unsigned int zswap_max_pool_percent = 20;
module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
/* The threshold for accepting new pages after the max_pool_percent was hit */
static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
uint, 0644);
/*
* Enable/disable handling same-value filled pages (enabled by default).
* If disabled every page is considered non-same-value filled.
*/
static bool zswap_same_filled_pages_enabled = true;
module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
bool, 0644);
/* Enable/disable handling non-same-value filled pages (enabled by default) */
static bool zswap_non_same_filled_pages_enabled = true;
module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
bool, 0644);
/*********************************
* data structures
**********************************/
struct crypto_acomp_ctx {
struct crypto_acomp *acomp;
struct acomp_req *req;
struct crypto_wait wait;
u8 *dstmem;
struct mutex *mutex;
};
struct zswap_pool {
struct zpool *zpool;
struct crypto_acomp_ctx __percpu *acomp_ctx;
struct kref kref;
struct list_head list;
struct work_struct release_work;
struct work_struct shrink_work;
struct hlist_node node;
char tfm_name[CRYPTO_MAX_ALG_NAME];
};
/*
* struct zswap_entry
*
* This structure contains the metadata for tracking a single compressed
* page within zswap.
*
* rbnode - links the entry into red-black tree for the appropriate swap type
* offset - the swap offset for the entry. Index into the red-black tree.
* refcount - the number of outstanding reference to the entry. This is needed
* to protect against premature freeing of the entry by code
* concurrent calls to load, invalidate, and writeback. The lock
* for the zswap_tree structure that contains the entry must
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
* length - the length in bytes of the compressed page data. Needed during
* decompression. For a same value filled page length is 0.
* pool - the zswap_pool the entry's data is in
* handle - zpool allocation handle that stores the compressed page data
* value - value of the same-value filled pages which have same content
*/
struct zswap_entry {
struct rb_node rbnode;
pgoff_t offset;
int refcount;
unsigned int length;
struct zswap_pool *pool;
union {
unsigned long handle;
unsigned long value;
};
struct obj_cgroup *objcg;
};
struct zswap_header {
swp_entry_t swpentry;
};
/*
* The tree lock in the zswap_tree struct protects a few things:
* - the rbtree
* - the refcount field of each entry in the tree
*/
struct zswap_tree {
struct rb_root rbroot;
spinlock_t lock;
};
static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
/* RCU-protected iteration */
static LIST_HEAD(zswap_pools);
/* protects zswap_pools list modification */
static DEFINE_SPINLOCK(zswap_pools_lock);
/* pool counter to provide unique names to zpool */
static atomic_t zswap_pools_count = ATOMIC_INIT(0);
/* used by param callback function */
static bool zswap_init_started;
/* fatal error during init */
static bool zswap_init_failed;
/* init completed, but couldn't create the initial pool */
static bool zswap_has_pool;
/*********************************
* helpers and fwd declarations
*
|