diff options
97 files changed, 2255 insertions, 1768 deletions
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 282de3680367..c57e5b7cb532 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -336,18 +336,11 @@ What: /sys/block/<disk>/queue/io_poll_delay Date: November 2016 Contact: linux-block@vger.kernel.org Description: - [RW] If polling is enabled, this controls what kind of polling - will be performed. It defaults to -1, which is classic polling. + [RW] This was used to control what kind of polling will be + performed. It is now fixed to -1, which is classic polling. In this mode, the CPU will repeatedly ask for completions - without giving up any time. If set to 0, a hybrid polling mode - is used, where the kernel will attempt to make an educated guess - at when the IO will complete. Based on this guess, the kernel - will put the process issuing IO to sleep for an amount of time, - before entering a classic poll loop. This mode might be a little - slower than pure classic polling, but it will be more efficient. - If set to a value larger than 0, the kernel will put the process - issuing IO to sleep for this amount of microseconds before - entering classic polling. + without giving up any time. + <deprecated> What: /sys/block/<disk>/queue/io_timeout diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index f9bf18ea6509..90b733422ed4 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -270,8 +270,7 @@ Request queue based layered devices like dm-rq that wish to support inline encryption need to create their own blk_crypto_profile for their request_queue, and expose whatever functionality they choose. When a layered device wants to pass a clone of that request to another request_queue, blk-crypto will -initialize and prepare the clone as necessary; see -``blk_crypto_insert_cloned_request()``. +initialize and prepare the clone as necessary. Interaction between inline encryption and blk integrity ======================================================= diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index 08e420e10973..b64809514b0f 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -52,6 +52,14 @@ Available fault injection capabilities status code is NVME_SC_INVALID_OPCODE with no retry. The status code and retry flag can be set via the debugfs. +- Null test block driver fault injection + + inject IO timeouts by setting config items under + /sys/kernel/config/nullb/<disk>/timeout_inject, + inject requeue requests by setting config items under + /sys/kernel/config/nullb/<disk>/requeue_inject, and + inject init_hctx() errors by setting config items under + /sys/kernel/config/nullb/<disk>/init_hctx_fault_inject. Configure fault-injection capabilities behavior ----------------------------------------------- diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 93d1ccd3304c..9c49c3d67cd5 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -78,6 +78,7 @@ typedef struct dasd_information2_t { * 0x040: give access to raw eckd data * 0x080: enable discard support * 0x100: enable autodisable for IFCC errors (default) + * 0x200: enable requeue of all requests on autoquiesce */ #define DASD_FEATURE_READONLY 0x001 #define DASD_FEATURE_USEDIAG 0x002 @@ -88,6 +89,7 @@ typedef struct dasd_information2_t { #define DASD_FEATURE_USERAW 0x040 #define DASD_FEATURE_DISCARD 0x080 #define DASD_FEATURE_PATH_AUTODISABLE 0x100 +#define DASD_FEATURE_REQUEUEQUIESCE 0x200 #define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE #define DASD_PARTN_BITS 2 diff --git a/block/Kconfig b/block/Kconfig index 69ccf7457ae1..86122e459fe0 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -215,11 +215,6 @@ config BLK_MQ_VIRTIO depends on VIRTIO default y -config BLK_MQ_RDMA - bool - depends on INFINIBAND - default y - config BLK_PM def_bool PM diff --git a/block/Makefile b/block/Makefile index 4e01bb71ad6e..b31b05390749 100644 --- a/block/Makefile +++ b/block/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o -obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/bdev.c b/block/bdev.c index 1795c7d4b99e..850852fe4b78 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -419,6 +419,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) bdev->bd_inode = inode; bdev->bd_queue = disk->queue; bdev->bd_stats = alloc_percpu(struct disk_stats); + bdev->bd_has_submit_bio = false; if (!bdev->bd_stats) { iput(inode); return NULL; diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 89ffb3aa992c..2c90e5de0acd 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -497,15 +497,9 @@ static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) bgd = kzalloc(sizeof(*bgd), gfp); if (!bgd) return NULL; - return &bgd->pd; -} - -static void bfq_cpd_init(struct blkcg_policy_data *cpd) -{ - struct bfq_group_data *d = cpd_to_bfqgd(cpd); - d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ? - CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL; + bgd->weight = CGROUP_WEIGHT_DFL; + return &bgd->pd; } static void bfq_cpd_free(struct blkcg_policy_data *cpd) @@ -1111,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, struct bfq_group *bfqg; u64 v; - ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx); + blkg_conf_init(&ctx, buf); + + ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) - return ret; + goto out; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ @@ -1135,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, ret = 0; } out: - blkg_conf_finish(&ctx); + blkg_conf_exit(&ctx); return ret ?: nbytes; } @@ -1301,8 +1297,6 @@ struct blkcg_policy blkcg_policy_bfq = { .legacy_cftypes = bfq_blkcg_legacy_files, .cpd_alloc_fn = bfq_cpd_alloc, - .cpd_init_fn = bfq_cpd_init, - .cpd_bind_fn = bfq_cpd_init, .cpd_free_fn = bfq_cpd_free, .pd_alloc_fn = bfq_pd_alloc, diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index d9ed3108c17a..3164e3177965 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -129,7 +129,6 @@ #include "elevator.h" #include "blk.h" #include "blk-mq.h" -#include "blk-mq-tag.h" #include "blk-mq-sched.h" #include "bfq-iosched.h" #include "blk-wbt.h" @@ -649,6 +648,8 @@ retry: sched_data->service_tree[i].wsum; } } + if (!wsum) + continue; limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum); if (entity->allocated >= limit) { bfq_log_bfqq(bfqq->bfqd, bfqq, @@ -6232,7 +6233,7 @@ static inline void bfq_update_insert_stats(struct request_queue *q, static struct bfq_queue *bfq_init_rq(struct request *rq); static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool at_head) + blk_insert_t flags) { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; @@ -6255,11 +6256,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, trace_block_rq_insert(rq); - if (!bfqq || at_head) { - if (at_head) - list_add(&rq->queuelist, &bfqd->dispatch); - else - list_add_tail(&rq->queuelist, &bfqd->dispatch); + if (flags & BLK_MQ_INSERT_AT_HEAD) { + list_add(&rq->queuelist, &bfqd->dispatch); + } else if (!bfqq) { + list_add_tail(&rq->queuelist, &bfqd->dispatch); } else { idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* @@ -6289,14 +6289,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, - struct list_head *list, bool at_head) + struct list_head *list, + blk_insert_t flags) { while (!list_empty(list)) { struct request *rq; rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); - bfq_insert_request(hctx, rq, at_head); + bfq_insert_request(hctx, rq, flags); } } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 69aaee52285a..467e8cfc41a2 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -20,7 +20,6 @@ #define BFQ_DEFAULT_QUEUE_IOPRIO 4 -#define BFQ_WEIGHT_LEGACY_DFL 100 #define BFQ_DEFAULT_GRP_IOPRIO 0 #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 18c922579719..ff45649361e7 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -33,7 +33,6 @@ #include "blk-cgroup.h" #include "blk-ioprio.h" #include "blk-throttle.h" -#include "blk-rq-qos.h" /* * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. @@ -693,69 +692,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) EXPORT_SYMBOL_GPL(__blkg_prfill_u64); /** - * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update - * @inputp: input string pointer + * blkg_conf_init - initialize a blkg_conf_ctx + * @ctx: blkg_conf_ctx to initialize + * @input: input string * - * Parse the device node prefix part, MAJ:MIN, of per-blkg config update - * from @input and get and return the matching bdev. *@inputp is - * updated to point past the device node prefix. Returns an ERR_PTR() - * value on error. + * Initialize @ctx which can be used to parse blkg config input string @input. + * Once initialized, @ctx can be used with blkg_conf_open_bdev() and + * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit(). + */ +void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input) +{ + *ctx = (struct blkg_conf_ctx){ .input = input }; +} +EXPORT_SYMBOL_GPL(blkg_conf_init); + +/** + * blkg_conf_open_bdev - parse and open bdev for per-blkg config update + * @ctx: blkg_conf_ctx initialized with blkg_conf_init() * - * Use this function iff blkg_conf_prep() can't be used for some reason. + * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from + * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is + * set to point past the device node prefix. + * + * This function may be called multiple times on @ctx and the extra calls become + * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function + * explicitly if bdev access is needed without resolving the blkcg / policy part + * of @ctx->input. Returns -errno on error. */ -struct block_device *blkcg_conf_open_bdev(char **inputp) +int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) { - char *input = *inputp; + char *input = ctx->input; unsigned int major, minor; struct block_device *bdev; int key_len; + if (ctx->bdev) + return 0; + if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) - return ERR_PTR(-EINVAL); + return -EINVAL; input += key_len; if (!isspace(*input)) - return ERR_PTR(-EINVAL); + return -EINVAL; input = skip_spaces(input); bdev = blkdev_get_no_ope |
