/*
* Copyright (C) 2010-2011 Neil Brown
* Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include <linux/slab.h>
#include <linux/module.h>
#include "md.h"
#include "raid1.h"
#include "raid5.h"
#include "raid10.h"
#include "bitmap.h"
#include <linux/device-mapper.h>
#define DM_MSG_PREFIX "raid"
/*
* The following flags are used by dm-raid.c to set up the array state.
* They must be cleared before md_run is called.
*/
#define FirstUse 10 /* rdev flag */
struct raid_dev {
/*
* Two DM devices, one to hold metadata and one to hold the
* actual data/parity. The reason for this is to not confuse
* ti->len and give more flexibility in altering size and
* characteristics.
*
* While it is possible for this device to be associated
* with a different physical device than the data_dev, it
* is intended for it to be the same.
* |--------- Physical Device ---------|
* |- meta_dev -|------ data_dev ------|
*/
struct dm_dev *meta_dev;
struct dm_dev *data_dev;
struct md_rdev rdev;
};
/*
* Flags for rs->print_flags field.
*/
#define DMPF_SYNC 0x1
#define DMPF_NOSYNC 0x2
#define DMPF_REBUILD 0x4
#define DMPF_DAEMON_SLEEP 0x8
#define DMPF_MIN_RECOVERY_RATE 0x10
#define DMPF_MAX_RECOVERY_RATE 0x20
#define DMPF_MAX_WRITE_BEHIND 0x40
#define DMPF_STRIPE_CACHE 0x80
#define DMPF_REGION_SIZE 0x100
#define DMPF_RAID10_COPIES 0x200
#define DMPF_RAID10_FORMAT 0x400
struct raid_set {
struct dm_target *ti;
uint32_t bitmap_loaded;
uint32_t print_flags;
struct mddev md;
struct raid_type *raid_type;
struct dm_target_callbacks callbacks;
struct raid_dev dev[0];
};
/* Supported raid types and properties. */
static struct raid_type {
const char *name; /* RAID algorithm. */
const char *descr; /* Descriptor text for logging. */
const unsigned parity_devs; /* # of parity devices. */
const unsigned minimal_devs; /* minimal # of devices in set. */
const unsigned level; /* RAID level. */
const unsigned algorithm; /* RAID algorithm. */
} raid_types[] = {
{"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
{"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */},
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
{"raid5_ls", "RAID5 (left symmetric)", 1, 2, 5, ALGORITHM_LEFT_SYMMETRIC},
{"raid5_rs", "RAID5 (right symmetric)", 1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC},
{"raid6_zr", "RAID6 (zero restart)", 2, 4, 6, ALGORITHM_ROTATING_ZERO_RESTART},
{"raid6_nr", "RAID6 (N restart)", 2, 4, 6, ALGORITHM_ROTATING_N_RESTART},
{"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
};
static char *raid10_md_layout_to_format(int layout)
{
/*
* Bit 16 and 17 stand for "offset" and "use_far_sets"
* Refer to MD's raid10.c for details
*/
if ((layout & 0x10000) && (layout & 0x20000))
return "offset";
if ((layout & 0xFF) > 1)
return "near";
return "far";
}
static unsigned raid10_md_layout_to_copies(int layout)
{
if ((layout & 0xFF) > 1)
return layout & 0xFF;
return (layout >> 8) & 0xFF;
}
static int raid10_format_to_md_layout(char *format, unsigned copies)
{
unsigned n = 1, f = 1;
if (!strcmp("near", format))
n = copies;
else
f = copies;
if (!strcmp("offset", format))
return 0x30000 | (f << 8) | n;
if (!strcmp("far", format))
return 0x20000 | (f << 8) | n;
return (f << 8) | n;
}
static struct raid_type *get_raid_type(char *name)
{
int i;
for (i = 0; i < ARRAY_SIZE(raid_types); i++)
if (!strcmp(raid_types[i].name, name))
return &raid_types[i];
return NULL;
}
static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *raid_type, unsigned raid_devs)
{
unsigned i;
struct raid_set *rs;
if (raid_devs <= raid_type->parity_devs) {
ti->error = "Insufficient number of devices";
return ERR_PTR(-EINVAL);
}
rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
if (!rs) {
ti->error = "Cannot allocate raid context";
return ERR_PTR(-ENOMEM);
}
mddev_init(&rs->md);
rs->ti = ti;
rs->raid_type = raid_type;
rs->md.raid_disks = raid_devs;
rs->md.level = raid_type->level;
rs->md.new_level = rs->md.level;
rs->md.layout = raid_type->algorithm;
rs->md.new_layout = rs->md.layout;
rs->md.delta_disks = 0;
rs->md.recovery_cp = 0;
for (i = 0; i < raid_devs; i++)
md_rdev_init(&rs->dev[i].rdev);
/*
* Remaining items to be initialized by further RAID params:
* rs->md.persistent
* rs->md.external
* rs->md.chunk_sectors
* rs->md.new_chunk_sectors
* rs->md.dev_sectors
*/
return rs;
}
static void context_free(struct raid_set *rs)
{
int i;
for (i = 0; i < rs->md.raid_disks; i++) {
if (rs->dev[i].meta_dev)
dm_put_device(rs->ti, rs->dev[i].meta_dev);
md_rdev_clear(&rs->dev[i].rdev);
if (rs->dev[i].data_dev)
dm_put_device(rs->ti, rs->dev[i].data_dev);
}
kfree(rs);
}
/*
* For every device we have two words
* <meta_dev>: meta device name or '-' if missing
* <data_dev>: data device name or '-' if missing
*
* The following are permitted:
* - -
* - <data_dev>
* <meta_dev> <data_dev>
*
* The following is not allowed:
* <meta_dev> -
*
* This code parses those words. If there is a failure,
* the caller must use context_free to unwind the operations.
*/
static int dev_parms(struct raid_set *rs, char **argv)
{
int i;
int rebuild = 0;
int metadata_available = 0;
int ret = 0;
for (i = 0; i < rs->md.raid_disks; i++, argv += 2) {
rs->dev[i].rdev.raid_disk = i;
rs->dev[i].meta_dev = NULL;
rs->dev[i].data_dev = NULL;
/*
* There are no offsets, since there is a separate device
* for data and metadata.
*/
rs->dev[i].rdev.data_offset = 0;
rs->dev[i].rdev.mddev = &rs->md;
if (strcmp(argv[0], "-")) {
ret = dm_get_device(rs->ti, argv[0],
dm_table_get_mode(rs->ti->table),
&rs->dev[i].meta_dev);
rs->ti->error = "RAID metadata device lookup failure";
if (ret)
return ret;
rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
if (!rs->dev[i].rdev.sb_page)
return -ENOMEM;
}
if (!strcmp(argv[1], "-")) {
if (!test_bit(In_sync, &rs->dev[i].rdev.flags) &&
(!rs->dev[i].rdev.recovery_offset)) {
rs->ti->error = "Drive designated for rebuild not specified";
return -EINVAL;
}
rs->ti->error = "No data device supplied with metadata device";
if (rs->dev[i].meta_dev)
return -EINVAL;
continue;
}
ret = dm_get_device(rs->ti, argv[1],
dm_table_get_mode(rs->ti->table),
&rs->dev[i].data_dev);
if (ret) {
rs->ti->error = "RAID device lookup failure";
return ret;
}
if (rs->dev[i].meta_dev) {
metadata_available = 1;
rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
}
rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
rebuild++;
}
if (metadata_available) {
rs->md.external = 0;
rs->md.persistent = 1;
rs->md.major_version = 2;
} else if (rebuild && !rs->md.recovery_cp) {
/*
* Without metadata, we will not be able to tell if the array
* is in-sync or not - we must assume it is not. Therefore,
* it is impossible to rebuild a drive.
|