/*
* Swap block device support for MTDs
* Turns an MTD device into a swap device with block wear leveling
*
* Copyright © 2007,2011 Nokia Corporation. All rights reserved.
*
* Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
*
* Based on Richard Purdie's earlier implementation in 2007. Background
* support and lock-less operation written by Adrian Hunter.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/blktrans.h>
#include <linux/rbtree.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/genhd.h>
#include <linux/swap.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/math64.h>
#define MTDSWAP_PREFIX "mtdswap"
/*
* The number of free eraseblocks when GC should stop
*/
#define CLEAN_BLOCK_THRESHOLD 20
/*
* Number of free eraseblocks below which GC can also collect low frag
* blocks.
*/
#define LOW_FRAG_GC_TRESHOLD 5
/*
* Wear level cost amortization. We want to do wear leveling on the background
* without disturbing gc too much. This is made by defining max GC frequency.
* Frequency value 6 means 1/6 of the GC passes will pick an erase block based
* on the biggest wear difference rather than the biggest dirtiness.
*
* The lower freq2 should be chosen so that it makes sure the maximum erase
* difference will decrease even if a malicious application is deliberately
* trying to make erase differences large.
*/
#define MAX_ERASE_DIFF 4000
#define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF
#define COLLECT_NONDIRTY_FREQ1 6
#define COLLECT_NONDIRTY_FREQ2 4
#define PAGE_UNDEF UINT_MAX
#define BLOCK_UNDEF UINT_MAX
#define BLOCK_ERROR (UINT_MAX - 1)
#define BLOCK_MAX (UINT_MAX - 2)
#define EBLOCK_BAD (1 << 0)
#define EBLOCK_NOMAGIC (1 << 1)
#define EBLOCK_BITFLIP (1 << 2)
#define EBLOCK_FAILED (1 << 3)
#define EBLOCK_READERR (1 << 4)
#define EBLOCK_IDX_SHIFT 5
struct swap_eb {
struct rb_node rb;
struct rb_root *root;
unsigned int flags;
unsigned int active_count;
unsigned int erase_count;
unsigned int pad; /* speeds up pointer decrement */
};
#define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
rb)->erase_count)
#define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
rb)->erase_count)
struct mtdswap_tree {
struct rb_root root;
unsigned int count;
};
enum {
MTDSWAP_CLEAN,
MTDSWAP_USED,
MTDSWAP_LOWFRAG,
MTDSWAP_HIFRAG,
MTDSWAP_DIRTY,
MTDSWAP_BITFLIP,
MTDSWAP_FAILING,
MTDSWAP_TREE_CNT,
};
struct mtdswap_dev {
struct mtd_blktrans_dev *mbd_dev;
struct mtd_info *mtd;
struct device *dev;
unsigned int *page_data;
unsigned int *revmap;
unsigned int eblks;
unsigned int spare_eblks;
unsigned int pages_per_eblk;
unsigned int max_erase_count;
struct swap_eb *eb_data;
struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
unsigned long long sect_read_count;
unsigned long long sect_write_count;
unsigned long long mtd_write_count;
unsigned long long mtd_read_count;
unsigned long long discard_count;
unsigned long long discard_page_count;
unsigned int curr_write_pos;
struct swap_eb *curr_write;
char *page_buf;
char *oob_buf;
struct dentry *debugfs_root;
};
struct mtdswap_oobdata {
__le16 magic;
__le32 count;
} __packed;
#define MTDSWAP_MAGIC_CLEAN 0x2095
#define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1)
#define MTDSWAP_TYPE_CLEAN 0
#define MTDSWAP_TYPE_DIRTY 1
#define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata)
#define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */
#define MTDSWAP_IO_RETRIES 3
enum {
MTDSWAP_SCANNED_CLEAN,
MTDSWAP_SCANNED_DIRTY,
MTDSWAP_SCANNED_BITFLIP,
MTDSWAP_SCANNED_BAD,
};
/*
* In the worst case mtdswap_writesect() has allocated the last clean
* page from the current block and is then pre-empted by the GC
* thread. The thread can consume a full erase block when moving a
* block.
*/
#define MIN_SPARE_EBLOCKS 2
#define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1)
#define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
#define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
#define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
#define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
#define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
static char partitions[128] = "";
module_param_string(partitions, partitions, sizeof(partitions), 0444);
MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
"partitions=\"1,3,5\"");
static unsigned int spare_eblocks = 10;
module_param(spare_eblocks, uint, 0444);
MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
"garbage collection (default 10%)");
static bool header; /* false */
module_param(header, bool, 0444);
MODULE_PARM_DESC(header,
"Include builtin swap header (default 0, without he
|