summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:29:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:29:19 -0700
commit750a02ab8d3c49ca7d23102be90d3d1db19e2827 (patch)
tree3c829af238b6598178c9ed859edb00bc8a280c05 /block
parent1966391fa576e1fb2701be8bcca197d8f72737b7 (diff)
parentabb30460bda232f304f642510adc8c6576ea51ea (diff)
downloadlinux-750a02ab8d3c49ca7d23102be90d3d1db19e2827.tar.gz
linux-750a02ab8d3c49ca7d23102be90d3d1db19e2827.tar.bz2
linux-750a02ab8d3c49ca7d23102be90d3d1db19e2827.zip
Merge tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "Core block changes that have been queued up for this release: - Remove dead blk-throttle and blk-wbt code (Guoqing) - Include pid in blktrace note traces (Jan) - Don't spew I/O errors on wouldblock termination (me) - Zone append addition (Johannes, Keith, Damien) - IO accounting improvements (Konstantin, Christoph) - blk-mq hardware map update improvements (Ming) - Scheduler dispatch improvement (Salman) - Inline block encryption support (Satya) - Request map fixes and improvements (Weiping) - blk-iocost tweaks (Tejun) - Fix for timeout failing with error injection (Keith) - Queue re-run fixes (Douglas) - CPU hotplug improvements (Christoph) - Queue entry/exit improvements (Christoph) - Move DMA drain handling to the few drivers that use it (Christoph) - Partition handling cleanups (Christoph)" * tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block: (127 commits) block: mark bio_wouldblock_error() bio with BIO_QUIET blk-wbt: rename __wbt_update_limits to wbt_update_limits blk-wbt: remove wbt_update_limits blk-throttle: remove tg_drain_bios blk-throttle: remove blk_throtl_drain null_blk: force complete for timeout request blk-mq: drain I/O when all CPUs in a hctx are offline blk-mq: add blk_mq_all_tag_iter blk-mq: open code __blk_mq_alloc_request in blk_mq_alloc_request_hctx blk-mq: use BLK_MQ_NO_TAG in more places blk-mq: rename BLK_MQ_TAG_FAIL to BLK_MQ_NO_TAG blk-mq: move more request initialization to blk_mq_rq_ctx_init blk-mq: simplify the blk_mq_get_request calling convention blk-mq: remove the bio argument to ->prepare_request nvme: force complete cancelled requests blk-mq: blk-mq: provide forced completion method block: fix a warning when blkdev.h is included for !CONFIG_BLOCK builds block: blk-crypto-fallback: remove redundant initialization of variable err block: reduce part_stat_lock() scope block: use __this_cpu_add() instead of access by smp_processor_id() ...
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig18
-rw-r--r--block/Makefile2
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bio-integrity.c3
-rw-r--r--block/bio.c184
-rw-r--r--block/blk-cgroup.c6
-rw-r--r--block/blk-core.c325
-rw-r--r--block/blk-crypto-fallback.c657
-rw-r--r--block/blk-crypto-internal.h201
-rw-r--r--block/blk-crypto.c404
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c26
-rw-r--r--block/blk-integrity.c7
-rw-r--r--block/blk-iocost.c86
-rw-r--r--block/blk-map.c15
-rw-r--r--block/blk-merge.c76
-rw-r--r--block/blk-mq-debugfs.c3
-rw-r--r--block/blk-mq-sched.c82
-rw-r--r--block/blk-mq-tag.c70
-rw-r--r--block/blk-mq-tag.h6
-rw-r--r--block/blk-mq.c407
-rw-r--r--block/blk-mq.h4
-rw-r--r--block/blk-settings.c68
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/blk-throttle.c63
-rw-r--r--block/blk-wbt.c16
-rw-r--r--block/blk-wbt.h4
-rw-r--r--block/blk-zoned.c23
-rw-r--r--block/blk.h88
-rw-r--r--block/bounce.c2
-rw-r--r--block/genhd.c133
-rw-r--r--block/ioctl.c154
-rw-r--r--block/keyslot-manager.c397
-rw-r--r--block/kyber-iosched.c2
-rw-r--r--block/mq-deadline.c2
-rw-r--r--block/partitions/core.c187
36 files changed, 2879 insertions, 859 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 3bc76bb113a0..9382a4acefc3 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -146,6 +146,7 @@ config BLK_CGROUP_IOLATENCY
config BLK_CGROUP_IOCOST
bool "Enable support for cost model based cgroup IO controller"
depends on BLK_CGROUP=y
+ select BLK_RQ_IO_DATA_LEN
select BLK_RQ_ALLOC_TIME
---help---
Enabling this option enables the .weight interface for cost
@@ -185,6 +186,23 @@ config BLK_SED_OPAL
Enabling this option enables users to setup/unlock/lock
Locking ranges for SED devices using the Opal protocol.
+config BLK_INLINE_ENCRYPTION
+ bool "Enable inline encryption support in block layer"
+ help
+ Build the blk-crypto subsystem. Enabling this lets the
+ block layer handle encryption, so users can take
+ advantage of inline encryption hardware if present.
+
+config BLK_INLINE_ENCRYPTION_FALLBACK
+ bool "Enable crypto API fallback for blk-crypto"
+ depends on BLK_INLINE_ENCRYPTION
+ select CRYPTO
+ select CRYPTO_SKCIPHER
+ help
+ Enabling this lets the block layer handle inline encryption
+ by falling back to the kernel crypto API when inline
+ encryption hardware is not present.
+
menu "Partition Types"
source "block/partitions/Kconfig"
diff --git a/block/Makefile b/block/Makefile
index 206b96e9387f..78719169fb2a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -36,3 +36,5 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
obj-$(CONFIG_BLK_PM) += blk-pm.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o blk-crypto.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3d411716d7ee..50c8f034c01c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6073,7 +6073,7 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
* comments on bfq_init_rq for the reason behind this delayed
* preparation.
*/
-static void bfq_prepare_request(struct request *rq, struct bio *bio)
+static void bfq_prepare_request(struct request *rq)
{
/*
* Regardless of whether we have an icq attached, we have to
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bf62c25cde8f..3579ac0f6ec1 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -42,6 +42,9 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
struct bio_set *bs = bio->bi_pool;
unsigned inline_vecs;
+ if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
inline_vecs = nr_vecs;
diff --git a/block/bio.c b/block/bio.c
index 21cbaa6a1c20..5235da6434aa 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -18,6 +18,7 @@
#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
#include <linux/sched/sysctl.h>
+#include <linux/blk-crypto.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -237,6 +238,8 @@ void bio_uninit(struct bio *bio)
if (bio_integrity(bio))
bio_integrity_free(bio);
+
+ bio_crypt_free_ctx(bio);
}
EXPORT_SYMBOL(bio_uninit);
@@ -708,6 +711,8 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
__bio_clone_fast(b, bio);
+ bio_crypt_clone(b, bio, gfp_mask);
+
if (bio_integrity(bio)) {
int ret;
@@ -748,9 +753,14 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
return true;
}
-static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned len, unsigned offset,
- bool *same_page)
+/*
+ * Try to merge a page into a segment, while obeying the hardware segment
+ * size limit. This is not for normal read/write bios, but for passthrough
+ * or Zone Append operations that we can't split.
+ */
+static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
+ struct page *page, unsigned len,
+ unsigned offset, bool *same_page)
{
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned long mask = queue_segment_boundary(q);
@@ -765,38 +775,32 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
}
/**
- * __bio_add_pc_page - attempt to add page to passthrough bio
- * @q: the target queue
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- * @same_page: return if the merge happen inside the same page
- *
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block device
- * limitations. The target block device must allow bio's up to PAGE_SIZE,
- * so it is always possible to add a single page to an empty bio.
+ * bio_add_hw_page - attempt to add a page to a bio with hw constraints
+ * @q: the target queue
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ * @max_sectors: maximum number of sectors that can be added
+ * @same_page: return if the segment has been merged inside the same page
*
- * This should only be used by passthrough bios.
+ * Add a page to a bio while respecting the hardware max_sectors, max_segment
+ * and gap limitations.
*/
-int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
- bool *same_page)
+ unsigned int max_sectors, bool *same_page)
{
struct bio_vec *bvec;
- /*
- * cloned bio must not modify vec list
- */
- if (unlikely(bio_flagged(bio, BIO_CLONED)))
+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
- if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
+ if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
return 0;
if (bio->bi_vcnt > 0) {
- if (bio_try_merge_pc_page(q, bio, page, len, offset, same_page))
+ if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
return len;
/*
@@ -823,11 +827,27 @@ int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
return len;
}
+/**
+ * bio_add_pc_page - attempt to add page to passthrough bio
+ * @q: the target queue
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ *
+ * Attempt to add a page to the bio_vec maplist. This can fail for a
+ * number of reasons, such as the bio being full or target block device
+ * limitations. The target block device must allow bio's up to PAGE_SIZE,
+ * so it is always possible to add a single page to an empty bio.
+ *
+ * This should only be used by passthrough bios.
+ */
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset)
{
bool same_page = false;
- return __bio_add_pc_page(q, bio, page, len, offset, &same_page);
+ return bio_add_hw_page(q, bio, page, len, offset,
+ queue_max_hw_sectors(q), &same_page);
}
EXPORT_SYMBOL(bio_add_pc_page);
@@ -936,6 +956,7 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
put_page(bvec->bv_page);
}
}
+EXPORT_SYMBOL_GPL(bio_release_pages);
static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
{
@@ -1010,6 +1031,50 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
return 0;
}
+static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+ unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+ struct request_queue *q = bio->bi_disk->queue;
+ unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
+ struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+ struct page **pages = (struct page **)bv;
+ ssize_t size, left;
+ unsigned len, i;
+ size_t offset;
+
+ if (WARN_ON_ONCE(!max_append_sectors))
+ return 0;
+
+ /*
+ * Move page array up in the allocated memory for the bio vecs as far as
+ * possible so that we can start filling biovecs from the beginning
+ * without overwriting the temporary page array.
+ */
+ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
+ pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
+
+ size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+ if (unlikely(size <= 0))
+ return size ? size : -EFAULT;
+
+ for (left = size, i = 0; left > 0; left -= len, i++) {
+ struct page *page = pages[i];
+ bool same_page = false;
+
+ len = min_t(size_t, PAGE_SIZE - offset, left);
+ if (bio_add_hw_page(q, bio, page, len, offset,
+ max_append_sectors, &same_page) != len)
+ return -EINVAL;
+ if (same_page)
+ put_page(page);
+ offset = 0;
+ }
+
+ iov_iter_advance(iter, size);
+ return 0;
+}
+
/**
* bio_iov_iter_get_pages - add user or kernel pages to a bio
* @bio: bio to add pages to
@@ -1039,16 +1104,23 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
return -EINVAL;
do {
- if (is_bvec)
- ret = __bio_iov_bvec_add_pages(bio, iter);
- else
- ret = __bio_iov_iter_get_pages(bio, iter);
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ if (WARN_ON_ONCE(is_bvec))
+ return -EINVAL;
+ ret = __bio_iov_append_get_pages(bio, iter);
+ } else {
+ if (is_bvec)
+ ret = __bio_iov_bvec_add_pages(bio, iter);
+ else
+ ret = __bio_iov_iter_get_pages(bio, iter);
+ }
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
if (is_bvec)
bio_set_flag(bio, BIO_NO_PAGE_REF);
return bio->bi_vcnt ? 0 : ret;
}
+EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
static void submit_bio_wait_endio(struct bio *bio)
{
@@ -1105,6 +1177,7 @@ void bio_advance(struct bio *bio, unsigned bytes)
if (bio_integrity(bio))
bio_integrity_advance(bio, bytes);
+ bio_crypt_advance(bio, bytes);
bio_advance_iter(bio, &bio->bi_iter, bytes);
}
EXPORT_SYMBOL(bio_advance);
@@ -1303,55 +1376,6 @@ defer:
schedule_work(&bio_dirty_work);
}
-void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
-{
- unsigned long stamp;
-again:
- stamp = READ_ONCE(part->stamp);
- if (unlikely(stamp != now)) {
- if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
- __part_stat_add(part, io_ticks, end ? now - stamp : 1);
- }
- }
- if (part->partno) {
- part = &part_to_disk(part)->part0;
- goto again;
- }
-}
-
-void generic_start_io_acct(struct request_queue *q, int op,
- unsigned long sectors, struct hd_struct *part)
-{
- const int sgrp = op_stat_group(op);
-
- part_stat_lock();
-
- update_io_ticks(part, jiffies, false);
- part_stat_inc(part, ios[sgrp]);
- part_stat_add(part, sectors[sgrp], sectors);
- part_inc_in_flight(q, part, op_is_write(op));
-
- part_stat_unlock();
-}
-EXPORT_SYMBOL(generic_start_io_acct);
-
-void generic_end_io_acct(struct request_queue *q, int req_op,
- struct hd_struct *part, unsigned long start_time)
-{
- unsigned long now = jiffies;
- unsigned long duration = now - start_time;
- const int sgrp = op_stat_group(req_op);
-
- part_stat_lock();
-
- update_io_ticks(part, now, true);
- part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_dec_in_flight(q, part, op_is_write(req_op));
-
- part_stat_unlock();
-}
-EXPORT_SYMBOL(generic_end_io_acct);
-
static inline bool bio_remaining_done(struct bio *bio)
{
/*
@@ -1445,6 +1469,10 @@ struct bio *bio_split(struct bio *bio, int sectors,
BUG_ON(sectors <= 0);
BUG_ON(sectors >= bio_sectors(bio));
+ /* Zone append commands cannot be split */
+ if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
+ return NULL;
+
split = bio_clone_fast(bio, gfp, bs);
if (!split)
return NULL;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 930212c1a512..0ecc897b225c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1530,6 +1530,10 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
{
u64 old = atomic64_read(&blkg->delay_start);
+ /* negative use_delay means no scaling, see blkcg_set_delay() */
+ if (atomic_read(&blkg->use_delay) < 0)
+ return;
+
/*
* We only want to scale down every second. The idea here is that we
* want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
@@ -1717,6 +1721,8 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
*/
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
{
+ if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
+ return;
blkcg_scale_delay(blkg, now);
atomic64_add(delta, &blkg->delay_nsec);
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 38d7b1f16067..03252af8c82c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@
#include <linux/debugfs.h>
#include <linux/bpf.h>
#include <linux/psi.h>
+#include <linux/sched/sysctl.h>
+#include <linux/blk-crypto.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -121,6 +123,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
refcount_set(&rq->ref, 1);
+ blk_crypto_rq_set_defaults(rq);
}
EXPORT_SYMBOL(blk_rq_init);
@@ -136,6 +139,7 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(ZONE_OPEN),
REQ_OP_NAME(ZONE_CLOSE),
REQ_OP_NAME(ZONE_FINISH),
+ REQ_OP_NAME(ZONE_APPEND),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
@@ -241,6 +245,17 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_advance(bio, nbytes);
+ if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+ /*
+ * Partial zone append completions cannot be supported as the
+ * BIO fragments may end up not being written sequentially.
+ */
+ if (bio->bi_iter.bi_size)
+ bio->bi_status = BLK_STS_IOERR;
+ else
+ bio->bi_iter.bi_sector = rq->__sector;
+ }
+
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
@@ -441,6 +456,23 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
}
}
+static inline int bio_queue_enter(struct bio *bio)
+{
+ struct request_queue *q = bio->bi_disk->queue;
+ bool nowait = bio->bi_opf & REQ_NOWAIT;
+ int ret;
+
+ ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
+ if (unlikely(ret)) {
+ if (nowait && !blk_queue_dying(q))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
+ }
+
+ return ret;
+}
+
void blk_queue_exit(struct request_queue *q)
{
percpu_ref_put(&q->q_usage_counter);
@@ -485,7 +517,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
+ q->backing_dev_info = bdi_alloc(node_id);
if (!q->backing_dev_info)
goto fail_split;
@@ -495,7 +527,6 @@ struct request_queue *__blk_alloc_queue(int node_id)
q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
- q->backing_dev_info->name = "block";
q->node = node_id;
timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
@@ -606,6 +637,16 @@ void blk_put_request(struct request *req)
}
EXPORT_SYMBOL(blk_put_request);
+static void blk_account_io_merge_bio(struct request *req)
+{
+ if (!blk_do_io_stat(req))
+ return;
+
+ part_stat_lock();
+ part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+ part_stat_unlock();
+}
+
bool bio_attempt_back_merge(struct request *req, struct bio *bio,
unsigned int nr_segs)
{
@@ -624,7 +665,9 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio,
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
- blk_account_io_start(req, false);
+ bio_crypt_free_ctx(bio);
+
+ blk_account_io_merge_bio(req);
return true;
}
@@ -648,7 +691,9 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio,
req->__sector = bio->bi_iter.bi_sector;
req->__data_len += bio->bi_iter.bi_size;
- blk_account_io_start(req, false);
+ bio_crypt_do_front_merge(req, bio);
+
+ blk_account_io_merge_bio(req);
return true;
}
@@ -670,7 +715,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
req->__data_len += bio->bi_iter.bi_size;
req->nr_phys_segments = segments + 1;
- blk_account_io_start(req, false);
+ blk_account_io_merge_bio(req);
return true;
no_merge:
req_set_nomerge(q, req);
@@ -872,6 +917,41 @@ out:
return ret;
}
+/*
+ * Check write append to a zoned block device.
+ */
+static inline blk_status_t blk_check_zone_append(struct request_queue *q,
+ struct bio *bio)
+{
+ sector_t pos = bio->bi_iter.bi_sector;
+ int nr_sectors = bio_sectors(bio);
+
+ /* Only applicable to zoned block devices */
+ if (!blk_queue_is_zoned(q))
+ return BLK_STS_NOTSUPP;
+
+ /* The bio sector must point to the start of a sequential zone */
+ if (pos & (blk_queue_zone_sectors(q) - 1) ||
+ !blk_queue_zone_is_seq(q, pos))
+ return BLK_STS_IOERR;
+
+ /*
+ * Not allowed to cross zone boundaries. Otherwise, the BIO will be
+ * split and could result in non-contiguous sectors being written in
+ * different zones.
+ */
+ if (nr_sectors > q->limits.chunk_sectors)
+ return BLK_STS_IOERR;
+
+ /* Make sure the BIO is small enough and will not get split */
+ if (nr_sectors > q->limits.max_zone_append_sectors)
+ return BLK_STS_IOERR;
+
+ bio->bi_opf |= REQ_NOMERGE;
+
+ return BLK_STS_OK;
+}
+
static noinline_for_stack bool
generic_make_request_checks(struct bio *bio)
{
@@ -941,6 +1021,11 @@ generic_make_request_checks(struct bio *bio)
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
+ case REQ_OP_ZONE_APPEND:
+ status = blk_check_zone_append(q, bio);
+ if (status != BLK_STS_OK)
+ goto end_io;
+ break;
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
@@ -961,12 +1046,13 @@ generic_make_request_checks(struct bio *bio)
}
/*
- * Various block parts want %current->io_context and lazy ioc
- * allocation ends up trading a lot of pain for a small amount of
- * memory. Just allocate it upfront. This may fail and block
- * layer knows how to live with it.
+ * Various block parts want %current->io_context, so allocate it up
+ * front rather than dealing with lots of pain to allocate it only
+ * where needed. This may fail and the block layer knows how to live
+ * with it.
*/
- create_io_context(GFP_ATOMIC, q->node);
+ if (unlikely(!current->io_context))
+ create_task_io_context(current, GFP_ATOMIC, q->node);
if (!blkcg_bio_issue_check(q, bio))
return false;
@@ -988,29 +1074,28 @@ end_io:
return false;
}
+static blk_qc_t do_make_request(struct bio *bio)
+{
+ struct request_queue *q = bio->bi_disk->queue;
+ blk_qc_t ret = BLK_QC_T_NONE;
+
+ if (blk_crypto_bio_prep(&bio)) {
+ if (!q->make_request_fn)
+ return blk_mq_make_request(q, bio);
+ ret = q->make_request_fn(q, bio);
+ }
+ blk_queue_exit(q);
+ return ret;
+}
+
/**
- * generic_make_request - hand a buffer to its device driver for I/O
+ * generic_make_request - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
*
- * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct bio, which describes the I/O that needs
- * to be done.
- *
- * generic_make_request() does not return any status. The
- * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bio->bi_end_io
- * function described (one day) else where.
- *
- * The caller of generic_make_request must make sure that bi_io_vec
- * are set to describe the memory buffer, and that bi_dev and bi_sector are
- * set to describe the device address, and the
- * bi_end_io and optionally bi_private are set to describe how
- * completion notification should be signaled.
- *
- * generic_make_request and the drivers it calls may use bi_next if this
- * bio happens to be merged with someone else, and may resubmit the bio to
- * a lower device by calling into generic_make_request recursively, which
- * means the bio should NOT be touched after the call to ->make_request_fn.
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers. All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
*/
blk_qc_t generic_make_request(struct bio *bio)
{
@@ -1061,18 +1146,14 @@ blk_qc_t generic_make_request(struct bio *bio)
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bio->bi_disk->queue;
- blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
- BLK_MQ_REQ_NOWAIT : 0;
- if (likely(blk_queue_enter(q, flags) == 0)) {
+ if (likely(bio_queue_enter(bio) == 0)) {
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
bio_list_on_stack[1] = bio_list_on_stack[0];
bio_list_init(&bio_list_on_stack[0]);
- ret = q->make_request_fn(q, bio);
-
- blk_queue_exit(q);
+ ret = do_make_request(bio);
/* sort new bios into those for a lower level
* and those for the same level
@@ -1088,12 +1169,6 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(&bio_list_on_stack[0], &lower);
bio_list_merge(&bio_list_on_stack[0], &same);
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
- } else {
- if (unlikely(!blk_queue_dying(q) &&
- (bio->bi_opf & REQ_NOWAIT)))
- bio_wouldblock_error(bio);
- else
- bio_io_error(bio);
}
bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
@@ -1110,30 +1185,25 @@ EXPORT_SYMBOL(generic_make_request);
*
* This function behaves like generic_make_request(), but does not protect
* against recursion. Must only be used if the called driver is known
- * to not call generic_make_request (or direct_make_request) again from
- * its make_request function. (Calling direct_make_request again from
- * a workqueue is perfectly fine as that doesn't recurse).
+ * to be blk-mq based.
*/
blk_qc_t direct_make_request(struct bio *bio)
{
struct request_queue *q = bio->bi_disk->queue;
- bool nowait = bio->bi_opf & REQ_NOWAIT;
- blk_qc_t ret;
+ if (WARN_ON_ONCE(q->make_request_fn)) {
+ bio_io_error(bio);
+ return BLK_QC_T_NONE;
+ }
if (!generic_make_request_checks(bio))
return BLK_QC_T_NONE;
-
- if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
- if (nowait && !blk_queue_dying(q))
- bio_wouldblock_error(bio);
- else
- bio_io_error(bio);
+ if (unlikely(bio_queue_enter(bio)))
+ return BLK_QC_T_NONE;
+ if (!blk_crypto_bio_prep(&bio)) {
+ blk_queue_exit(q);
return BLK_QC_T_NONE;
}
-
- ret = q->make_request_fn(q, bio);