summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 11:54:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 11:54:56 -0700
commit3a564bb3a8a6950e18b1f5d209bda39fc3831074 (patch)
treeb19bb80be84d7452b45332aa83d558151e9d7279 /drivers
parent9871ab22f2784b2823b01522772a72ee4fc9d1fa (diff)
parent3908c9839b1077e677ef9e92d2bce7f224519c59 (diff)
downloadlinux-3a564bb3a8a6950e18b1f5d209bda39fc3831074.tar.gz
linux-3a564bb3a8a6950e18b1f5d209bda39fc3831074.tar.bz2
linux-3a564bb3a8a6950e18b1f5d209bda39fc3831074.zip
Merge tag 'for-4.13/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Add the ability to use select or poll /dev/mapper/control to wait for events from multiple DM devices. - Convert DM's printk macros over to using pr_<level> macros. - Add a big-endian variant of plain64 IV to dm-crypt. - Add support for zoned (aka SMR) devices to DM core. DM kcopyd was also improved to provide a sequential write feature needed by zoned devices. - Introduce DM zoned target that provides support for host-managed zoned devices, the result dm-zoned device acts as a drive-managed interface to the underlying host-managed device. - A DM raid fix to avoid using BUG() for error handling. * tag 'for-4.13/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm zoned: fix overflow when converting zone ID to sectors dm raid: stop using BUG() in __rdev_sectors() dm zoned: drive-managed zoned block device target dm kcopyd: add sequential write feature dm linear: add support for zoned block devices dm flakey: add support for zoned block devices dm: introduce dm_remap_zone_report() dm: fix REQ_OP_ZONE_REPORT bio handling dm: fix REQ_OP_ZONE_RESET bio handling dm table: add zoned block devices validation dm: convert DM printk macros to pr_<level> macros dm crypt: add big-endian variant of plain64 IV dm bio prison: use rb_entry() rather than container_of() dm ioctl: report event number in DM_LIST_DEVICES dm ioctl: add a new DM_DEV_ARM_POLL ioctl dm: add basic support for using the select or poll function
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/Kconfig17
-rw-r--r--drivers/md/Makefile2
-rw-r--r--drivers/md/dm-bio-prison-v1.c2
-rw-r--r--drivers/md/dm-bio-prison-v2.c2
-rw-r--r--drivers/md/dm-core.h3
-rw-r--r--drivers/md/dm-crypt.c21
-rw-r--r--drivers/md/dm-flakey.c23
-rw-r--r--drivers/md/dm-ioctl.c109
-rw-r--r--drivers/md/dm-kcopyd.c65
-rw-r--r--drivers/md/dm-linear.c18
-rw-r--r--drivers/md/dm-raid.c13
-rw-r--r--drivers/md/dm-table.c162
-rw-r--r--drivers/md/dm-zoned-metadata.c2509
-rw-r--r--drivers/md/dm-zoned-reclaim.c570
-rw-r--r--drivers/md/dm-zoned-target.c967
-rw-r--r--drivers/md/dm-zoned.h228
-rw-r--r--drivers/md/dm.c97
17 files changed, 4770 insertions, 38 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 906103c168ea..4a249ee86364 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -521,6 +521,23 @@ config DM_INTEGRITY
To compile this code as a module, choose M here: the module will
be called dm-integrity.
+config DM_ZONED
+ tristate "Drive-managed zoned block device target support"
+ depends on BLK_DEV_DM
+ depends on BLK_DEV_ZONED
+ ---help---
+ This device-mapper target takes a host-managed or host-aware zoned
+ block device and exposes most of its capacity as a regular block
+ device (drive-managed zoned block device) without any write
+ constraints. This is mainly intended for use with file systems that
+ do not natively support zoned block devices but still want to
+ benefit from the increased capacity offered by SMR disks. Other uses
+ by applications using raw block devices (for example object stores)
+ are also possible.
+
+ To compile this code as a module, choose M here: the module will
+ be called dm-zoned.
+
If unsure, say N.
endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 913720bd81c1..786ec9e86d65 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -20,6 +20,7 @@ dm-era-y += dm-era-target.o
dm-verity-y += dm-verity-target.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
+dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
@@ -60,6 +61,7 @@ obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
obj-$(CONFIG_DM_ERA) += dm-era.o
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
+obj-$(CONFIG_DM_ZONED) += dm-zoned.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index 82d27384d31f..874841f0fc83 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -116,7 +116,7 @@ static int __bio_detain(struct dm_bio_prison *prison,
while (*new) {
struct dm_bio_prison_cell *cell =
- container_of(*new, struct dm_bio_prison_cell, node);
+ rb_entry(*new, struct dm_bio_prison_cell, node);
r = cmp_keys(key, &cell->key);
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index c9b11f799cd8..8ce3a1a588cf 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -120,7 +120,7 @@ static bool __find_or_insert(struct dm_bio_prison_v2 *prison,
while (*new) {
struct dm_bio_prison_cell_v2 *cell =
- container_of(*new, struct dm_bio_prison_cell_v2, node);
+ rb_entry(*new, struct dm_bio_prison_cell_v2, node);
r = cmp_keys(key, &cell->key);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 52ca8d059e82..24eddbdf2ab4 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -147,4 +147,7 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen
return !maxlen || strlen(result) + 1 >= maxlen;
}
+extern atomic_t dm_global_event_nr;
+extern wait_queue_head_t dm_global_eventq;
+
#endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9e1b72e8f7ef..cdf6b1e12460 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -246,6 +246,9 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc)
* plain64: the initial vector is the 64-bit little-endian version of the sector
* number, padded with zeros if necessary.
*
+ * plain64be: the initial vector is the 64-bit big-endian version of the sector
+ * number, padded with zeros if necessary.
+ *
* essiv: "encrypted sector|salt initial vector", the sector number is
* encrypted with the bulk cipher using a salt as key. The salt
* should be derived from the bulk cipher's key via hashing.
@@ -302,6 +305,16 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
return 0;
}
+static int crypt_iv_plain64be_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
+{
+ memset(iv, 0, cc->iv_size);
+ /* iv_size is at least of size u64; usually it is 16 bytes */
+ *(__be64 *)&iv[cc->iv_size - sizeof(u64)] = cpu_to_be64(dmreq->iv_sector);
+
+ return 0;
+}
+
/* Initialise ESSIV - compute salt but no local memory allocations */
static int crypt_iv_essiv_init(struct crypt_config *cc)
{
@@ -835,6 +848,10 @@ static const struct crypt_iv_operations crypt_iv_plain64_ops = {
.generator = crypt_iv_plain64_gen
};
+static const struct crypt_iv_operations crypt_iv_plain64be_ops = {
+ .generator = crypt_iv_plain64be_gen
+};
+
static const struct crypt_iv_operations crypt_iv_essiv_ops = {
.ctr = crypt_iv_essiv_ctr,
.dtr = crypt_iv_essiv_dtr,
@@ -2208,6 +2225,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
cc->iv_gen_ops = &crypt_iv_plain_ops;
else if (strcmp(ivmode, "plain64") == 0)
cc->iv_gen_ops = &crypt_iv_plain64_ops;
+ else if (strcmp(ivmode, "plain64be") == 0)
+ cc->iv_gen_ops = &crypt_iv_plain64be_ops;
else if (strcmp(ivmode, "essiv") == 0)
cc->iv_gen_ops = &crypt_iv_essiv_ops;
else if (strcmp(ivmode, "benbi") == 0)
@@ -2987,7 +3006,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 17, 0},
+ .version = {1, 18, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 3d04d5ce19d9..e2c7234931bc 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -275,7 +275,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
struct flakey_c *fc = ti->private;
bio->bi_bdev = fc->dev->bdev;
- if (bio_sectors(bio))
+ if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -306,6 +306,14 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
pb->bio_submitted = false;
+ /* Do not fail reset zone */
+ if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ goto map_bio;
+
+ /* We need to remap reported zones, so remember the BIO iter */
+ if (bio_op(bio) == REQ_OP_ZONE_REPORT)
+ goto map_bio;
+
/* Are we alive ? */
elapsed = (jiffies - fc->start_time) / HZ;
if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
@@ -359,11 +367,19 @@ map_bio:
}
static int flakey_end_io(struct dm_target *ti, struct bio *bio,
- blk_status_t *error)
+ blk_status_t *error)
{
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
+ if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ return DM_ENDIO_DONE;
+
+ if (bio_op(bio) == REQ_OP_ZONE_REPORT) {
+ dm_remap_zone_report(ti, bio, fc->start);
+ return DM_ENDIO_DONE;
+ }
+
if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
@@ -446,7 +462,8 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
static struct target_type flakey_target = {
.name = "flakey",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
+ .features = DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = flakey_ctr,
.dtr = flakey_dtr,
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 41852ae287a5..e06f0ef7d2ec 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -23,6 +23,14 @@
#define DM_MSG_PREFIX "ioctl"
#define DM_DRIVER_EMAIL "dm-devel@redhat.com"
+struct dm_file {
+ /*
+ * poll will wait until the global event number is greater than
+ * this value.
+ */
+ volatile unsigned global_event_nr;
+};
+
/*-----------------------------------------------------------------
* The ioctl interface needs to be able to look up devices by
* name or uuid.
@@ -456,9 +464,9 @@ void dm_deferred_remove(void)
* All the ioctl commands get dispatched to functions with this
* prototype.
*/
-typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
+typedef int (*ioctl_fn)(struct file *filp, struct dm_ioctl *param, size_t param_size);
-static int remove_all(struct dm_ioctl *param, size_t param_size)
+static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
param->data_size = 0;
@@ -491,13 +499,14 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
return ((void *) param) + param->data_start;
}
-static int list_devices(struct dm_ioctl *param, size_t param_size)
+static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
unsigned int i;
struct hash_cell *hc;
size_t len, needed = 0;
struct gendisk *disk;
struct dm_name_list *nl, *old_nl = NULL;
+ uint32_t *event_nr;
down_write(&_hash_lock);
@@ -510,6 +519,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size)
needed += sizeof(struct dm_name_list);
needed += strlen(hc->name) + 1;
needed += ALIGN_MASK;
+ needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK;
}
}
@@ -539,7 +549,9 @@ static int list_devices(struct dm_ioctl *param, size_t param_size)
strcpy(nl->name, hc->name);
old_nl = nl;
- nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
+ event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
+ *event_nr = dm_get_event_nr(hc->md);
+ nl = align_ptr(event_nr + 1);
}
}
@@ -582,7 +594,7 @@ static void list_version_get_info(struct target_type *tt, void *param)
info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
}
-static int list_versions(struct dm_ioctl *param, size_t param_size)
+static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
size_t len, needed = 0;
struct dm_target_versions *vers;
@@ -724,7 +736,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
}
}
-static int dev_create(struct dm_ioctl *param, size_t param_size)
+static int dev_create(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
int r, m = DM_ANY_MINOR;
struct mapped_device *md;
@@ -816,7 +828,7 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
return md;
}
-static int dev_remove(struct dm_ioctl *param, size_t param_size)
+static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
struct hash_cell *hc;
struct mapped_device *md;
@@ -881,7 +893,7 @@ static int invalid_str(char *str, void *end)
return -EINVAL;
}
-static int dev_rename(struct dm_ioctl *param, size_t param_size)
+static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
int r;
char *new_data = (char *) param + param->data_start;
@@ -911,7 +923,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
return 0;
}
-static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
+static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
int r = -EINVAL, x;
struct mapped_device *md;
@@ -1060,7 +1072,7 @@ static int do_resume(struct dm_ioctl *param)
* Set or unset the suspension state of a device.
* If the device already is in the requested state we just return its status.
*/
-static int dev_suspend(struct dm_ioctl *param, size_t param_size)
+static int dev_suspend(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
if (param->flags & DM_SUSPEND_FLAG)
return do_suspend(param);
@@ -1072,7 +1084,7 @@ static int dev_suspend(struct dm_ioctl *param, size_t param_size)
* Copies device info back to user space, used by
* the create and info ioctls.
*/
-static int dev_status(struct dm_ioctl *param, size_t param_size)
+static int dev_status(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
struct mapped_device *md;
@@ -1163,7 +1175,7 @@ static void retrieve_status(struct dm_table *table,
/*
* Wait for a device to report an event
*/
-static int dev_wait(struct dm_ioctl *param, size_t param_size)
+static int dev_wait(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
int r = 0;
struct mapped_device *md;
@@ -1200,6 +1212,19 @@ out:
return r;
}
+/*
+ * Remember the global event number and make it possible to poll
+ * for further events.
+ */
+static int dev_arm_poll(struct file *filp, struct dm_ioctl *param, size_t param_size)
+{
+ struct dm_file *priv = filp->private_data;
+
+ priv->global_event_nr = atomic_read(&dm_global_event_nr);
+
+ return 0;
+}
+
static inline fmode_t get_mode(struct dm_ioctl *param)
{
fmode_t mode = FMODE_READ | FMODE_WRITE;
@@ -1269,7 +1294,7 @@ static bool is_valid_type(enum dm_queue_mode cur, enum dm_queue_mode new)
return false;
}
-static int table_load(struct dm_ioctl *param, size_t param_size)
+static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
int r;
struct hash_cell *hc;
@@ -1356,7 +1381,7 @@ err:
return r;
}
-static int table_clear(struct dm_ioctl *param, size_t param_size)
+static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
struct hash_cell *hc;
struct mapped_device *md;
@@ -1430,7 +1455,7 @@ static void retrieve_deps(struct dm_table *table,
param->data_size = param->data_start + needed;
}
-static int table_deps(struct dm_ioctl *param, size_t param_size)
+static int table_deps(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
struct mapped_device *md;
struct dm_table *table;
@@ -1456,7 +1481,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
* Return the status of a device as a text string for each
* target.
*/
-static int table_status(struct dm_ioctl *param, size_t param_size)
+static int table_status(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
struct mapped_device *md;
struct dm_table *table;
@@ -1511,7 +1536,7 @@ static int message_for_md(struct mapped_device *md, unsigned argc, char **argv,
/*
* Pass a message to the target that's at the supplied device offset.
*/
-static int target_message(struct dm_ioctl *param, size_t param_size)
+static int target_message(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
int r, argc;
char **argv;
@@ -1628,7 +1653,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
{DM_LIST_VERSIONS_CMD, 0, list_versions},
{DM_TARGET_MSG_CMD, 0, target_message},
- {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}
+ {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
+ {DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
};
if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
@@ -1783,7 +1809,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
return 0;
}
-static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
+static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *user)
{
int r = 0;
int ioctl_flags;
@@ -1837,7 +1863,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
goto out;
param->data_size = offsetof(struct dm_ioctl, data);
- r = fn(param, input_param_size);
+ r = fn(file, param, input_param_size);
if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) &&
unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
@@ -1856,7 +1882,7 @@ out:
static long dm_ctl_ioctl(struct file *file, uint command, ulong u)
{
- return (long)ctl_ioctl(command, (struct dm_ioctl __user *)u);
+ return (long)ctl_ioctl(file, command, (struct dm_ioctl __user *)u);
}
#ifdef CONFIG_COMPAT
@@ -1868,8 +1894,47 @@ static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
#define dm_compat_ctl_ioctl NULL
#endif
+static int dm_open(struct inode *inode, struct file *filp)
+{
+ int r;
+ struct dm_file *priv;
+
+ r = nonseekable_open(inode, filp);
+ if (unlikely(r))
+ return r;
+
+ priv = filp->private_data = kmalloc(sizeof(struct dm_file), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->global_event_nr = atomic_read(&dm_global_event_nr);
+
+ return 0;
+}
+
+static int dm_release(struct inode *inode, struct file *filp)
+{
+ kfree(filp->private_data);
+ return 0;
+}
+
+static unsigned dm_poll(struct file *filp, poll_table *wait)
+{
+ struct dm_file *priv = filp->private_data;
+ unsigned mask = 0;
+
+ poll_wait(filp, &dm_global_eventq, wait);
+
+ if ((int)(atomic_read(&dm_global_event_nr) - priv->global_event_nr) > 0)
+ mask |= POLLIN;
+
+ return mask;
+}
+
static const struct file_operations _ctl_fops = {
- .open = nonseekable_open,
+ .open = dm_open,
+ .release = dm_release,
+ .poll = dm_poll,
.unlocked_ioctl = dm_ctl_ioctl,
.compat_ioctl = dm_compat_ctl_ioctl,
.owner = THIS_MODULE,
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index f85846741d50..cf2c67e35eaf 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -356,6 +356,7 @@ struct kcopyd_job {
struct mutex lock;
atomic_t sub_jobs;
sector_t progress;
+ sector_t write_offset;
struct kcopyd_job *master_job;
};
@@ -386,6 +387,31 @@ void dm_kcopyd_exit(void)
* Functions to push and pop a job onto the head of a given job
* list.
*/
+static struct kcopyd_job *pop_io_job(struct list_head *jobs,
+ struct dm_kcopyd_client *kc)
+{
+ struct kcopyd_job *job;
+
+ /*
+ * For I/O jobs, pop any read, any write without sequential write
+ * constraint and sequential writes that are at the right position.
+ */
+ list_for_each_entry(job, jobs, list) {
+ if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
+ list_del(&job->list);
+ return job;
+ }
+
+ if (job->write_offset == job->master_job->write_offset) {
+ job->master_job->write_offset += job->source.count;
+ list_del(&job->list);
+ return job;
+ }
+ }
+
+ return NULL;
+}
+
static struct kcopyd_job *pop(struct list_head *jobs,
struct dm_kcopyd_client *kc)
{
@@ -395,8 +421,12 @@ static struct kcopyd_job *pop(struct list_head *jobs,
spin_lock_irqsave(&kc->job_lock, flags);
if (!list_empty(jobs)) {
- job = list_entry(jobs->next, struct kcopyd_job, list);
- list_del(&job->list);
+ if (jobs == &kc->io_jobs)
+ job = pop_io_job(jobs, kc);
+ else {
+ job = list_entry(jobs->next, struct kcopyd_job, list);
+ list_del(&job->list);
+ }
}
spin_unlock_irqrestore(&kc->job_lock, flags);
@@ -506,6 +536,14 @@ static int run_io_job(struct kcopyd_job *job)
.client = job->kc->io_client,
};
+ /*
+ * If we need to write sequentially and some reads or writes failed,
+ * no point in continuing.
+ */
+ if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
+ job->master_job->write_err)
+ return -EIO;
+
io_job_start(job->kc->throttle);
if (job->rw == READ)
@@ -655,6 +693,7 @@ static void segment_complete(int read_err, unsigned long write_err,
int i;
*sub_job = *job;
+ sub_job->write_offset = progress;
sub_job->source.sector += progress;
sub_job->source.count = count;
@@ -723,6 +762,27 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->num_dests = num_dests;
memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
+ /*
+ * If one of the destination is a host-managed zoned block device,
+ * we need to write sequentially. If one of the destination is a
+ * host-aware device, then leave it to the caller to choose what to do.
+ */
+ if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
+ for (i = 0; i < job->num_dests; i++) {
+ if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
+ set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags);
+ break;
+ }
+ }
+ }
+
+ /*
+ * If we need to write sequentially, errors cannot be ignored.
+ */
+ if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
+ test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags))
+ clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags);
+
if (from) {
job->source = *from;
job->pages = NULL;
@@ -746,6 +806,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->fn = fn;
job->context = context;
job->master_job = job;
+ job->write_offset = 0;
if (job->source.count <= SUB_JOB_SIZE)
dispatch_job(job);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 7d42a9d9f406..c03c203a90b4 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -89,7 +89,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
struct linear_c *lc = ti->private;
bio->bi_bdev = lc->dev->bdev;
- if (bio_sectors(bio))
+ if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -101,6 +101,17 @@ static int linear_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
+static int linear_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
+{
+ struct linear_c *lc = ti->private;
+
+ if (!*error && bio_op(bio) == REQ_OP_ZONE_REPORT)
+ dm_remap_zone_report(ti, bio, lc->start);
+
+ return DM_ENDIO_DONE;
+}
+
static void linear_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
@@ -161,12 +172,13 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
static struct target_type linear_target = {
.name = "linear",
- .version = {1, 3, 0},
- .features = DM_TARGET_PASSES_INTEGRITY,
+ .version = {1, 4, 0},
+ .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = linear_ctr,
.dtr = linear_dtr,
.map = linear_map,
+ .end_io = linear_end_io,
.status = linear_status,
.prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b4b75dad816a..2e10c2f13a34 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1571,7 +1571,7 @@ static sector_t __rdev_sectors(struct raid_set *rs)
return rdev->sectors;
}
- BUG(); /* Constructor ensures we got some. */
+ return 0;
}
/* Calculate the sectors per device and per array used for @rs */
@@ -2941,7 +2941,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bool resize;
struct raid_type *rt;
unsigned int num_raid_params, num_raid_devs;
- sector_t calculated_dev_sectors;
+ sector_t calculated_dev_sectors, rdev_sectors;
struct raid_set *rs = NULL;
const char *arg;
struct rs_layout rs_layout;
@@ -3017,7 +3017,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto bad;
- resize = calculated_dev_sectors != __rdev_sectors(rs);
+ rdev_sectors = __rdev_sectors(rs);
+ if (!rdev_sectors) {
+ ti->error = "Invalid rdev size";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ resize = calculated_dev_sectors != rdev_sectors;
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5f5eae41f804..a39bcd9b982a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -319,6 +319,39 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
return 1;
}
+ /*
+ * If the target is mapped to zoned block device(s), check
+ * that the zones are not partially mapped.
+ */
+ if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) {
+ unsigned int zone_sectors = bdev_zone_sectors(bdev);
+
+ if (start & (zone_sectors - 1)) {
+ DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s",
+ dm_device_name(ti->table->md),
+ (unsigned long long)start,
+ zone_sectors, bdevname(bdev, b));
+ return 1;
+ }
+
+ /*
+ * Note: The last zone of a zoned block device may be smaller
+ * than other zones. So for a target mapping the end of a
+ * zoned block device with such a zone, len would not be zone
+ * aligned. We do not allow such last smaller zone to be part
+ * of the mapping here to ensure that mappings with multiple
+ * devices do not end up with a smaller zone in the middle of
+ * the sector range.
+ */
+ if (len & (zone_sectors - 1)) {
+ DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s",
+ dm_device_name(ti->table->md),
+ (unsigned long long)len,
+ zone_sectors, bdevname(bdev, b));
+ return 1;
+ }
+ }
+
if (logical_block_size_sectors <= 1)
return 0;
@@ -456,6 +489,8 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
q->limits.alignment_offset,
(unsigned long long) start << SECTOR_SHIFT);
+ limits->zoned = blk_queue_zoned_model(q);
+
return 0;
}
@@ -1346,6 +1381,88 @@ bool dm_table_has_no_data_devices(struct dm_table *table)
return true;
}
+static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+ enum blk_zoned_model *zoned_model = data;
+
+ return q && blk_queue_zoned_model(q) == *zoned_model;
+}
+
+static bool dm_table_supports_zoned_model(struct dm_table *t,
+ enum blk_zoned_model zoned_model)
+{
+ struct dm_target *ti;
+ unsigned i;
+
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ ti = dm_table_get_target(t, i);
+
+ if (zoned_model == BLK_ZONED_HM &&
+ !dm_target_supports_zoned_hm(ti->type))
+ return false;
+
+ if (!ti->type->iterate_devices ||
+ !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model))
+ return false;
+ }
+
+ return true;
+}
+
+static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+ unsigned int *zone_sectors = data;
+
+ return q && blk_queue_zone_sectors(q) == *zone_sectors;
+}
+
+static bool dm_table_matches_zone_sectors(struct dm_table *t,
+ unsigned int zone_sectors)
+{
+ struct dm_target *ti;
+ unsigned i;
+
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ ti = dm_table_get_target(t, i);
+
+ if (!ti->type->iterate_devices ||
+ !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors))
+ return false;
+ }
+
+ return true;
+}
+
+static int validate_hardware_zoned_model(struct dm_table *table,
+ enum blk_zoned_model zoned_model,
+ unsigned int zone_sectors)
+{
+ if (zoned_model == BLK_ZONED_NONE)
+ return 0;
+
+ if (!dm_table_supports_zoned_model(table, zoned_model)) {
+ DMERR("%s: zoned model is not consistent across all devices",
+ dm_device_name(table->md));
+ return -EINVAL;
+ }
+
+ /* Check zone size validity and compatibility */
+ if (!zone_sectors || !is_power_of_2(zone_sectors))
+ return -EINVAL;
+
+ if (!dm_table_matches_zone_sectors(table, zone_sectors)) {
+ DMERR("%s: zone sectors is not consistent across all devices",
+ dm_device_name(table->md));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
* Establish the new table's queue_limits and validate them.
*/
@@ -1355,6 +1472,8 @@ int dm_calculate_queue_limits(struct dm_table *table,
struct dm_target *ti;
struct queue_limits ti_limits;
unsigned i;
+ enum blk_zoned_model zoned_model = BLK_ZONED_NONE;
+ unsigned int zone_sectors = 0;
blk_set_stacking_limits(limits);
@@ -1372,6 +1491,15 @@ int dm_calculate_queue_limits(struct dm_table *table,
ti-&