diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 11:54:56 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 11:54:56 -0700 |
| commit | 3a564bb3a8a6950e18b1f5d209bda39fc3831074 (patch) | |
| tree | b19bb80be84d7452b45332aa83d558151e9d7279 /drivers | |
| parent | 9871ab22f2784b2823b01522772a72ee4fc9d1fa (diff) | |
| parent | 3908c9839b1077e677ef9e92d2bce7f224519c59 (diff) | |
| download | linux-3a564bb3a8a6950e18b1f5d209bda39fc3831074.tar.gz linux-3a564bb3a8a6950e18b1f5d209bda39fc3831074.tar.bz2 linux-3a564bb3a8a6950e18b1f5d209bda39fc3831074.zip | |
Merge tag 'for-4.13/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Add the ability to use select or poll /dev/mapper/control to wait for
events from multiple DM devices.
- Convert DM's printk macros over to using pr_<level> macros.
- Add a big-endian variant of plain64 IV to dm-crypt.
- Add support for zoned (aka SMR) devices to DM core. DM kcopyd was
also improved to provide a sequential write feature needed by zoned
devices.
- Introduce DM zoned target that provides support for host-managed
zoned devices, the result dm-zoned device acts as a drive-managed
interface to the underlying host-managed device.
- A DM raid fix to avoid using BUG() for error handling.
* tag 'for-4.13/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm zoned: fix overflow when converting zone ID to sectors
dm raid: stop using BUG() in __rdev_sectors()
dm zoned: drive-managed zoned block device target
dm kcopyd: add sequential write feature
dm linear: add support for zoned block devices
dm flakey: add support for zoned block devices
dm: introduce dm_remap_zone_report()
dm: fix REQ_OP_ZONE_REPORT bio handling
dm: fix REQ_OP_ZONE_RESET bio handling
dm table: add zoned block devices validation
dm: convert DM printk macros to pr_<level> macros
dm crypt: add big-endian variant of plain64 IV
dm bio prison: use rb_entry() rather than container_of()
dm ioctl: report event number in DM_LIST_DEVICES
dm ioctl: add a new DM_DEV_ARM_POLL ioctl
dm: add basic support for using the select or poll function
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/md/Kconfig | 17 | ||||
| -rw-r--r-- | drivers/md/Makefile | 2 | ||||
| -rw-r--r-- | drivers/md/dm-bio-prison-v1.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-bio-prison-v2.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-core.h | 3 | ||||
| -rw-r--r-- | drivers/md/dm-crypt.c | 21 | ||||
| -rw-r--r-- | drivers/md/dm-flakey.c | 23 | ||||
| -rw-r--r-- | drivers/md/dm-ioctl.c | 109 | ||||
| -rw-r--r-- | drivers/md/dm-kcopyd.c | 65 | ||||
| -rw-r--r-- | drivers/md/dm-linear.c | 18 | ||||
| -rw-r--r-- | drivers/md/dm-raid.c | 13 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 162 | ||||
| -rw-r--r-- | drivers/md/dm-zoned-metadata.c | 2509 | ||||
| -rw-r--r-- | drivers/md/dm-zoned-reclaim.c | 570 | ||||
| -rw-r--r-- | drivers/md/dm-zoned-target.c | 967 | ||||
| -rw-r--r-- | drivers/md/dm-zoned.h | 228 | ||||
| -rw-r--r-- | drivers/md/dm.c | 97 |
17 files changed, 4770 insertions, 38 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 906103c168ea..4a249ee86364 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -521,6 +521,23 @@ config DM_INTEGRITY To compile this code as a module, choose M here: the module will be called dm-integrity. +config DM_ZONED + tristate "Drive-managed zoned block device target support" + depends on BLK_DEV_DM + depends on BLK_DEV_ZONED + ---help--- + This device-mapper target takes a host-managed or host-aware zoned + block device and exposes most of its capacity as a regular block + device (drive-managed zoned block device) without any write + constraints. This is mainly intended for use with file systems that + do not natively support zoned block devices but still want to + benefit from the increased capacity offered by SMR disks. Other uses + by applications using raw block devices (for example object stores) + are also possible. + + To compile this code as a module, choose M here: the module will + be called dm-zoned. + If unsure, say N. endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 913720bd81c1..786ec9e86d65 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -20,6 +20,7 @@ dm-era-y += dm-era-target.o dm-verity-y += dm-verity-target.o md-mod-y += md.o bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o +dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o # Note: link order is important. All raid personalities # and must come before md.o, as they each initialise @@ -60,6 +61,7 @@ obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o obj-$(CONFIG_DM_ERA) += dm-era.o obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o +obj-$(CONFIG_DM_ZONED) += dm-zoned.o ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c index 82d27384d31f..874841f0fc83 100644 --- a/drivers/md/dm-bio-prison-v1.c +++ b/drivers/md/dm-bio-prison-v1.c @@ -116,7 +116,7 @@ static int __bio_detain(struct dm_bio_prison *prison, while (*new) { struct dm_bio_prison_cell *cell = - container_of(*new, struct dm_bio_prison_cell, node); + rb_entry(*new, struct dm_bio_prison_cell, node); r = cmp_keys(key, &cell->key); diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c index c9b11f799cd8..8ce3a1a588cf 100644 --- a/drivers/md/dm-bio-prison-v2.c +++ b/drivers/md/dm-bio-prison-v2.c @@ -120,7 +120,7 @@ static bool __find_or_insert(struct dm_bio_prison_v2 *prison, while (*new) { struct dm_bio_prison_cell_v2 *cell = - container_of(*new, struct dm_bio_prison_cell_v2, node); + rb_entry(*new, struct dm_bio_prison_cell_v2, node); r = cmp_keys(key, &cell->key); diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 52ca8d059e82..24eddbdf2ab4 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -147,4 +147,7 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen return !maxlen || strlen(result) + 1 >= maxlen; } +extern atomic_t dm_global_event_nr; +extern wait_queue_head_t dm_global_eventq; + #endif diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9e1b72e8f7ef..cdf6b1e12460 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -246,6 +246,9 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc) * plain64: the initial vector is the 64-bit little-endian version of the sector * number, padded with zeros if necessary. * + * plain64be: the initial vector is the 64-bit big-endian version of the sector + * number, padded with zeros if necessary. + * * essiv: "encrypted sector|salt initial vector", the sector number is * encrypted with the bulk cipher using a salt as key. The salt * should be derived from the bulk cipher's key via hashing. @@ -302,6 +305,16 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, return 0; } +static int crypt_iv_plain64be_gen(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + memset(iv, 0, cc->iv_size); + /* iv_size is at least of size u64; usually it is 16 bytes */ + *(__be64 *)&iv[cc->iv_size - sizeof(u64)] = cpu_to_be64(dmreq->iv_sector); + + return 0; +} + /* Initialise ESSIV - compute salt but no local memory allocations */ static int crypt_iv_essiv_init(struct crypt_config *cc) { @@ -835,6 +848,10 @@ static const struct crypt_iv_operations crypt_iv_plain64_ops = { .generator = crypt_iv_plain64_gen }; +static const struct crypt_iv_operations crypt_iv_plain64be_ops = { + .generator = crypt_iv_plain64be_gen +}; + static const struct crypt_iv_operations crypt_iv_essiv_ops = { .ctr = crypt_iv_essiv_ctr, .dtr = crypt_iv_essiv_dtr, @@ -2208,6 +2225,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) cc->iv_gen_ops = &crypt_iv_plain_ops; else if (strcmp(ivmode, "plain64") == 0) cc->iv_gen_ops = &crypt_iv_plain64_ops; + else if (strcmp(ivmode, "plain64be") == 0) + cc->iv_gen_ops = &crypt_iv_plain64be_ops; else if (strcmp(ivmode, "essiv") == 0) cc->iv_gen_ops = &crypt_iv_essiv_ops; else if (strcmp(ivmode, "benbi") == 0) @@ -2987,7 +3006,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 17, 0}, + .version = {1, 18, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 3d04d5ce19d9..e2c7234931bc 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -275,7 +275,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) struct flakey_c *fc = ti->private; bio->bi_bdev = fc->dev->bdev; - if (bio_sectors(bio)) + if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); } @@ -306,6 +306,14 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); pb->bio_submitted = false; + /* Do not fail reset zone */ + if (bio_op(bio) == REQ_OP_ZONE_RESET) + goto map_bio; + + /* We need to remap reported zones, so remember the BIO iter */ + if (bio_op(bio) == REQ_OP_ZONE_REPORT) + goto map_bio; + /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { @@ -359,11 +367,19 @@ map_bio: } static int flakey_end_io(struct dm_target *ti, struct bio *bio, - blk_status_t *error) + blk_status_t *error) { struct flakey_c *fc = ti->private; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + if (bio_op(bio) == REQ_OP_ZONE_RESET) + return DM_ENDIO_DONE; + + if (bio_op(bio) == REQ_OP_ZONE_REPORT) { + dm_remap_zone_report(ti, bio, fc->start); + return DM_ENDIO_DONE; + } + if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) { @@ -446,7 +462,8 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 4, 0}, + .version = {1, 5, 0}, + .features = DM_TARGET_ZONED_HM, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 41852ae287a5..e06f0ef7d2ec 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -23,6 +23,14 @@ #define DM_MSG_PREFIX "ioctl" #define DM_DRIVER_EMAIL "dm-devel@redhat.com" +struct dm_file { + /* + * poll will wait until the global event number is greater than + * this value. + */ + volatile unsigned global_event_nr; +}; + /*----------------------------------------------------------------- * The ioctl interface needs to be able to look up devices by * name or uuid. @@ -456,9 +464,9 @@ void dm_deferred_remove(void) * All the ioctl commands get dispatched to functions with this * prototype. */ -typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size); +typedef int (*ioctl_fn)(struct file *filp, struct dm_ioctl *param, size_t param_size); -static int remove_all(struct dm_ioctl *param, size_t param_size) +static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_size) { dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false); param->data_size = 0; @@ -491,13 +499,14 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size, return ((void *) param) + param->data_start; } -static int list_devices(struct dm_ioctl *param, size_t param_size) +static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size) { unsigned int i; struct hash_cell *hc; size_t len, needed = 0; struct gendisk *disk; struct dm_name_list *nl, *old_nl = NULL; + uint32_t *event_nr; down_write(&_hash_lock); @@ -510,6 +519,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) needed += sizeof(struct dm_name_list); needed += strlen(hc->name) + 1; needed += ALIGN_MASK; + needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK; } } @@ -539,7 +549,9 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) strcpy(nl->name, hc->name); old_nl = nl; - nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1); + event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1); + *event_nr = dm_get_event_nr(hc->md); + nl = align_ptr(event_nr + 1); } } @@ -582,7 +594,7 @@ static void list_version_get_info(struct target_type *tt, void *param) info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1); } -static int list_versions(struct dm_ioctl *param, size_t param_size) +static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size) { size_t len, needed = 0; struct dm_target_versions *vers; @@ -724,7 +736,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) } } -static int dev_create(struct dm_ioctl *param, size_t param_size) +static int dev_create(struct file *filp, struct dm_ioctl *param, size_t param_size) { int r, m = DM_ANY_MINOR; struct mapped_device *md; @@ -816,7 +828,7 @@ static struct mapped_device *find_device(struct dm_ioctl *param) return md; } -static int dev_remove(struct dm_ioctl *param, size_t param_size) +static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_size) { struct hash_cell *hc; struct mapped_device *md; @@ -881,7 +893,7 @@ static int invalid_str(char *str, void *end) return -EINVAL; } -static int dev_rename(struct dm_ioctl *param, size_t param_size) +static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_size) { int r; char *new_data = (char *) param + param->data_start; @@ -911,7 +923,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return 0; } -static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) +static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t param_size) { int r = -EINVAL, x; struct mapped_device *md; @@ -1060,7 +1072,7 @@ static int do_resume(struct dm_ioctl *param) * Set or unset the suspension state of a device. * If the device already is in the requested state we just return its status. */ -static int dev_suspend(struct dm_ioctl *param, size_t param_size) +static int dev_suspend(struct file *filp, struct dm_ioctl *param, size_t param_size) { if (param->flags & DM_SUSPEND_FLAG) return do_suspend(param); @@ -1072,7 +1084,7 @@ static int dev_suspend(struct dm_ioctl *param, size_t param_size) * Copies device info back to user space, used by * the create and info ioctls. */ -static int dev_status(struct dm_ioctl *param, size_t param_size) +static int dev_status(struct file *filp, struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; @@ -1163,7 +1175,7 @@ static void retrieve_status(struct dm_table *table, /* * Wait for a device to report an event */ -static int dev_wait(struct dm_ioctl *param, size_t param_size) +static int dev_wait(struct file *filp, struct dm_ioctl *param, size_t param_size) { int r = 0; struct mapped_device *md; @@ -1200,6 +1212,19 @@ out: return r; } +/* + * Remember the global event number and make it possible to poll + * for further events. + */ +static int dev_arm_poll(struct file *filp, struct dm_ioctl *param, size_t param_size) +{ + struct dm_file *priv = filp->private_data; + + priv->global_event_nr = atomic_read(&dm_global_event_nr); + + return 0; +} + static inline fmode_t get_mode(struct dm_ioctl *param) { fmode_t mode = FMODE_READ | FMODE_WRITE; @@ -1269,7 +1294,7 @@ static bool is_valid_type(enum dm_queue_mode cur, enum dm_queue_mode new) return false; } -static int table_load(struct dm_ioctl *param, size_t param_size) +static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_size) { int r; struct hash_cell *hc; @@ -1356,7 +1381,7 @@ err: return r; } -static int table_clear(struct dm_ioctl *param, size_t param_size) +static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_size) { struct hash_cell *hc; struct mapped_device *md; @@ -1430,7 +1455,7 @@ static void retrieve_deps(struct dm_table *table, param->data_size = param->data_start + needed; } -static int table_deps(struct dm_ioctl *param, size_t param_size) +static int table_deps(struct file *filp, struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; struct dm_table *table; @@ -1456,7 +1481,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) * Return the status of a device as a text string for each * target. */ -static int table_status(struct dm_ioctl *param, size_t param_size) +static int table_status(struct file *filp, struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; struct dm_table *table; @@ -1511,7 +1536,7 @@ static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, /* * Pass a message to the target that's at the supplied device offset. */ -static int target_message(struct dm_ioctl *param, size_t param_size) +static int target_message(struct file *filp, struct dm_ioctl *param, size_t param_size) { int r, argc; char **argv; @@ -1628,7 +1653,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) {DM_LIST_VERSIONS_CMD, 0, list_versions}, {DM_TARGET_MSG_CMD, 0, target_message}, - {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry} + {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}, + {DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll}, }; if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) @@ -1783,7 +1809,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) return 0; } -static int ctl_ioctl(uint command, struct dm_ioctl __user *user) +static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *user) { int r = 0; int ioctl_flags; @@ -1837,7 +1863,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) goto out; param->data_size = offsetof(struct dm_ioctl, data); - r = fn(param, input_param_size); + r = fn(file, param, input_param_size); if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) && unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS)) @@ -1856,7 +1882,7 @@ out: static long dm_ctl_ioctl(struct file *file, uint command, ulong u) { - return (long)ctl_ioctl(command, (struct dm_ioctl __user *)u); + return (long)ctl_ioctl(file, command, (struct dm_ioctl __user *)u); } #ifdef CONFIG_COMPAT @@ -1868,8 +1894,47 @@ static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u) #define dm_compat_ctl_ioctl NULL #endif +static int dm_open(struct inode *inode, struct file *filp) +{ + int r; + struct dm_file *priv; + + r = nonseekable_open(inode, filp); + if (unlikely(r)) + return r; + + priv = filp->private_data = kmalloc(sizeof(struct dm_file), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->global_event_nr = atomic_read(&dm_global_event_nr); + + return 0; +} + +static int dm_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static unsigned dm_poll(struct file *filp, poll_table *wait) +{ + struct dm_file *priv = filp->private_data; + unsigned mask = 0; + + poll_wait(filp, &dm_global_eventq, wait); + + if ((int)(atomic_read(&dm_global_event_nr) - priv->global_event_nr) > 0) + mask |= POLLIN; + + return mask; +} + static const struct file_operations _ctl_fops = { - .open = nonseekable_open, + .open = dm_open, + .release = dm_release, + .poll = dm_poll, .unlocked_ioctl = dm_ctl_ioctl, .compat_ioctl = dm_compat_ctl_ioctl, .owner = THIS_MODULE, diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index f85846741d50..cf2c67e35eaf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -356,6 +356,7 @@ struct kcopyd_job { struct mutex lock; atomic_t sub_jobs; sector_t progress; + sector_t write_offset; struct kcopyd_job *master_job; }; @@ -386,6 +387,31 @@ void dm_kcopyd_exit(void) * Functions to push and pop a job onto the head of a given job * list. */ +static struct kcopyd_job *pop_io_job(struct list_head *jobs, + struct dm_kcopyd_client *kc) +{ + struct kcopyd_job *job; + + /* + * For I/O jobs, pop any read, any write without sequential write + * constraint and sequential writes that are at the right position. + */ + list_for_each_entry(job, jobs, list) { + if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) { + list_del(&job->list); + return job; + } + + if (job->write_offset == job->master_job->write_offset) { + job->master_job->write_offset += job->source.count; + list_del(&job->list); + return job; + } + } + + return NULL; +} + static struct kcopyd_job *pop(struct list_head *jobs, struct dm_kcopyd_client *kc) { @@ -395,8 +421,12 @@ static struct kcopyd_job *pop(struct list_head *jobs, spin_lock_irqsave(&kc->job_lock, flags); if (!list_empty(jobs)) { - job = list_entry(jobs->next, struct kcopyd_job, list); - list_del(&job->list); + if (jobs == &kc->io_jobs) + job = pop_io_job(jobs, kc); + else { + job = list_entry(jobs->next, struct kcopyd_job, list); + list_del(&job->list); + } } spin_unlock_irqrestore(&kc->job_lock, flags); @@ -506,6 +536,14 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + /* + * If we need to write sequentially and some reads or writes failed, + * no point in continuing. + */ + if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && + job->master_job->write_err) + return -EIO; + io_job_start(job->kc->throttle); if (job->rw == READ) @@ -655,6 +693,7 @@ static void segment_complete(int read_err, unsigned long write_err, int i; *sub_job = *job; + sub_job->write_offset = progress; sub_job->source.sector += progress; sub_job->source.count = count; @@ -723,6 +762,27 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->num_dests = num_dests; memcpy(&job->dests, dests, sizeof(*dests) * num_dests); + /* + * If one of the destination is a host-managed zoned block device, + * we need to write sequentially. If one of the destination is a + * host-aware device, then leave it to the caller to choose what to do. + */ + if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) { + for (i = 0; i < job->num_dests; i++) { + if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { + set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags); + break; + } + } + } + + /* + * If we need to write sequentially, errors cannot be ignored. + */ + if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && + test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) + clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags); + if (from) { job->source = *from; job->pages = NULL; @@ -746,6 +806,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->fn = fn; job->context = context; job->master_job = job; + job->write_offset = 0; if (job->source.count <= SUB_JOB_SIZE) dispatch_job(job); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 7d42a9d9f406..c03c203a90b4 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -89,7 +89,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) struct linear_c *lc = ti->private; bio->bi_bdev = lc->dev->bdev; - if (bio_sectors(bio)) + if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) bio->bi_iter.bi_sector = linear_map_sector(ti, bio->bi_iter.bi_sector); } @@ -101,6 +101,17 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +static int linear_end_io(struct dm_target *ti, struct bio *bio, + blk_status_t *error) +{ + struct linear_c *lc = ti->private; + + if (!*error && bio_op(bio) == REQ_OP_ZONE_REPORT) + dm_remap_zone_report(ti, bio, lc->start); + + return DM_ENDIO_DONE; +} + static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -161,12 +172,13 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, static struct target_type linear_target = { .name = "linear", - .version = {1, 3, 0}, - .features = DM_TARGET_PASSES_INTEGRITY, + .version = {1, 4, 0}, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, + .end_io = linear_end_io, .status = linear_status, .prepare_ioctl = linear_prepare_ioctl, .iterate_devices = linear_iterate_devices, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b4b75dad816a..2e10c2f13a34 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1571,7 +1571,7 @@ static sector_t __rdev_sectors(struct raid_set *rs) return rdev->sectors; } - BUG(); /* Constructor ensures we got some. */ + return 0; } /* Calculate the sectors per device and per array used for @rs */ @@ -2941,7 +2941,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) bool resize; struct raid_type *rt; unsigned int num_raid_params, num_raid_devs; - sector_t calculated_dev_sectors; + sector_t calculated_dev_sectors, rdev_sectors; struct raid_set *rs = NULL; const char *arg; struct rs_layout rs_layout; @@ -3017,7 +3017,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; - resize = calculated_dev_sectors != __rdev_sectors(rs); + rdev_sectors = __rdev_sectors(rs); + if (!rdev_sectors) { + ti->error = "Invalid rdev size"; + r = -EINVAL; + goto bad; + } + + resize = calculated_dev_sectors != rdev_sectors; INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5f5eae41f804..a39bcd9b982a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -319,6 +319,39 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 1; } + /* + * If the target is mapped to zoned block device(s), check + * that the zones are not partially mapped. + */ + if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) { + unsigned int zone_sectors = bdev_zone_sectors(bdev); + + if (start & (zone_sectors - 1)) { + DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s", + dm_device_name(ti->table->md), + (unsigned long long)start, + zone_sectors, bdevname(bdev, b)); + return 1; + } + + /* + * Note: The last zone of a zoned block device may be smaller + * than other zones. So for a target mapping the end of a + * zoned block device with such a zone, len would not be zone + * aligned. We do not allow such last smaller zone to be part + * of the mapping here to ensure that mappings with multiple + * devices do not end up with a smaller zone in the middle of + * the sector range. + */ + if (len & (zone_sectors - 1)) { + DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s", + dm_device_name(ti->table->md), + (unsigned long long)len, + zone_sectors, bdevname(bdev, b)); + return 1; + } + } + if (logical_block_size_sectors <= 1) return 0; @@ -456,6 +489,8 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); + limits->zoned = blk_queue_zoned_model(q); + return 0; } @@ -1346,6 +1381,88 @@ bool dm_table_has_no_data_devices(struct dm_table *table) return true; } +static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + enum blk_zoned_model *zoned_model = data; + + return q && blk_queue_zoned_model(q) == *zoned_model; +} + +static bool dm_table_supports_zoned_model(struct dm_table *t, + enum blk_zoned_model zoned_model) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (zoned_model == BLK_ZONED_HM && + !dm_target_supports_zoned_hm(ti->type)) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model)) + return false; + } + + return true; +} + +static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + unsigned int *zone_sectors = data; + + return q && blk_queue_zone_sectors(q) == *zone_sectors; +} + +static bool dm_table_matches_zone_sectors(struct dm_table *t, + unsigned int zone_sectors) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors)) + return false; + } + + return true; +} + +static int validate_hardware_zoned_model(struct dm_table *table, + enum blk_zoned_model zoned_model, + unsigned int zone_sectors) +{ + if (zoned_model == BLK_ZONED_NONE) + return 0; + + if (!dm_table_supports_zoned_model(table, zoned_model)) { + DMERR("%s: zoned model is not consistent across all devices", + dm_device_name(table->md)); + return -EINVAL; + } + + /* Check zone size validity and compatibility */ + if (!zone_sectors || !is_power_of_2(zone_sectors)) + return -EINVAL; + + if (!dm_table_matches_zone_sectors(table, zone_sectors)) { + DMERR("%s: zone sectors is not consistent across all devices", + dm_device_name(table->md)); + return -EINVAL; + } + + return 0; +} + /* * Establish the new table's queue_limits and validate them. */ @@ -1355,6 +1472,8 @@ int dm_calculate_queue_limits(struct dm_table *table, struct dm_target *ti; struct queue_limits ti_limits; unsigned i; + enum blk_zoned_model zoned_model = BLK_ZONED_NONE; + unsigned int zone_sectors = 0; blk_set_stacking_limits(limits); @@ -1372,6 +1491,15 @@ int dm_calculate_queue_limits(struct dm_table *table, ti-& |
