summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 20:21:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 20:21:42 -0700
commit3d3dfeb3aec7b612d266d500c82054f1fded4980 (patch)
tree11649eab5c74deb74e6e0879613e8053ae3b9970
parentc1b7fcf3f6d94c2c3528bf77054bf174a5ef63d7 (diff)
parent146afeb235ccec10c17ad8ea26327c0c79dbd968 (diff)
downloadlinux-3d3dfeb3aec7b612d266d500c82054f1fded4980.tar.gz
linux-3d3dfeb3aec7b612d266d500c82054f1fded4980.tar.bz2
linux-3d3dfeb3aec7b612d266d500c82054f1fded4980.zip
Merge tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: "Pretty quiet round for this release. This contains: - Add support for zoned storage to ublk (Andreas, Ming) - Series improving performance for drivers that mark themselves as needing a blocking context for issue (Bart) - Cleanup the flush logic (Chengming) - sed opal keyring support (Greg) - Fixes and improvements to the integrity support (Jinyoung) - Add some exports for bcachefs that we can hopefully delete again in the future (Kent) - deadline throttling fix (Zhiguo) - Series allowing building the kernel without buffer_head support (Christoph) - Sanitize the bio page adding flow (Christoph) - Write back cache fixes (Christoph) - MD updates via Song: - Fix perf regression for raid0 large sequential writes (Jan) - Fix split bio iostat for raid0 (David) - Various raid1 fixes (Heinz, Xueshi) - raid6test build fixes (WANG) - Deprecate bitmap file support (Christoph) - Fix deadlock with md sync thread (Yu) - Refactor md io accounting (Yu) - Various non-urgent fixes (Li, Yu, Jack) - Various fixes and cleanups (Arnd, Azeem, Chengming, Damien, Li, Ming, Nitesh, Ruan, Tejun, Thomas, Xu)" * tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux: (113 commits) block: use strscpy() to instead of strncpy() block: sed-opal: keyring support for SED keys block: sed-opal: Implement IOC_OPAL_REVERT_LSP block: sed-opal: Implement IOC_OPAL_DISCOVERY blk-mq: prealloc tags when increase tagset nr_hw_queues blk-mq: delete redundant tagset map update when fallback blk-mq: fix tags leak when shrink nr_hw_queues ublk: zoned: support REQ_OP_ZONE_RESET_ALL md: raid0: account for split bio in iostat accounting md/raid0: Fix performance regression for large sequential writes md/raid0: Factor out helper for mapping and submitting a bio md raid1: allow writebehind to work on any leg device set WriteMostly md/raid1: hold the barrier until handle_read_error() finishes md/raid1: free the r1bio before waiting for blocked rdev md/raid1: call free_r1bio() before allow_barrier() in raid_end_bio_io() blk-cgroup: Fix NULL deref caused by blkg_policy_data being installed before init drivers/rnbd: restore sysfs interface to rnbd-client md/raid5-cache: fix null-ptr-deref for r5l_flush_stripe_to_raid() raid6: test: only check for Altivec if building on powerpc hosts raid6: test: make sure all intermediate and artifact files are .gitignored ...
-rw-r--r--block/Kconfig3
-rw-r--r--block/bio-integrity.c59
-rw-r--r--block/bio.c142
-rw-r--r--block/blk-cgroup.c32
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-flush.c26
-rw-r--r--block/blk-iolatency.c35
-rw-r--r--block/blk-mq.c45
-rw-r--r--block/blk-settings.c7
-rw-r--r--block/blk-sysfs.c21
-rw-r--r--block/blk.h10
-rw-r--r--block/fops.c143
-rw-r--r--block/mq-deadline.c3
-rw-r--r--block/opal_proto.h4
-rw-r--r--block/partitions/cmdline.c12
-rw-r--r--block/sed-opal.c252
-rw-r--r--drivers/block/nbd.c1
-rw-r--r--drivers/block/swim3.c2
-rw-r--r--drivers/block/ublk_drv.c366
-rw-r--r--drivers/md/Kconfig11
-rw-r--r--drivers/md/dm-crypt.c1
-rw-r--r--drivers/md/dm-raid.c1
-rw-r--r--drivers/md/md-bitmap.c347
-rw-r--r--drivers/md/md-bitmap.h1
-rw-r--r--drivers/md/md-cluster.c8
-rw-r--r--drivers/md/md-faulty.c2
-rw-r--r--drivers/md/md-linear.c1
-rw-r--r--drivers/md/md-multipath.c1
-rw-r--r--drivers/md/md.c228
-rw-r--r--drivers/md/md.h13
-rw-r--r--drivers/md/raid0.c98
-rw-r--r--drivers/md/raid1.c86
-rw-r--r--drivers/md/raid1.h1
-rw-r--r--drivers/md/raid10.c85
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--drivers/md/raid5-cache.c14
-rw-r--r--drivers/md/raid5.c72
-rw-r--r--drivers/nvme/host/ioctl.c1
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c3
-rw-r--r--drivers/scsi/scsi_lib.c12
-rw-r--r--drivers/target/target_core_iblock.c3
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/Kconfig1
-rw-r--r--fs/affs/Kconfig1
-rw-r--r--fs/befs/Kconfig1
-rw-r--r--fs/bfs/Kconfig1
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/efs/Kconfig1
-rw-r--r--fs/exfat/Kconfig1
-rw-r--r--fs/ext2/Kconfig1
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/f2fs/Kconfig1
-rw-r--r--fs/f2fs/file.c2
-rw-r--r--fs/fat/Kconfig1
-rw-r--r--fs/freevxfs/Kconfig1
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/file.c16
-rw-r--r--fs/hfs/Kconfig1
-rw-r--r--fs/hfsplus/Kconfig1
-rw-r--r--fs/hpfs/Kconfig1
-rw-r--r--fs/internal.h6
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/isofs/Kconfig1
-rw-r--r--fs/jfs/Kconfig1
-rw-r--r--fs/minix/Kconfig1
-rw-r--r--fs/nilfs2/Kconfig1
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/ntfs/Kconfig1
-rw-r--r--fs/ntfs3/Kconfig1
-rw-r--r--fs/ocfs2/Kconfig1
-rw-r--r--fs/omfs/Kconfig1
-rw-r--r--fs/qnx4/Kconfig1
-rw-r--r--fs/qnx6/Kconfig1
-rw-r--r--fs/reiserfs/Kconfig1
-rw-r--r--fs/romfs/Kconfig1
-rw-r--r--fs/super.c4
-rw-r--r--fs/sysv/Kconfig1
-rw-r--r--fs/udf/Kconfig1
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/ufs/Kconfig1
-rw-r--r--include/linux/bio.h7
-rw-r--r--include/linux/blk-mq.h6
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/buffer_head.h44
-rw-r--r--include/linux/iomap.h4
-rw-r--r--include/linux/mm.h18
-rw-r--r--include/linux/sed-opal.h5
-rw-r--r--include/trace/events/block.h2
-rw-r--r--include/trace/events/kyber.h8
-rw-r--r--include/trace/events/wbt.h8
-rw-r--r--include/uapi/linux/ioprio.h21
-rw-r--r--include/uapi/linux/sed-opal.h25
-rw-r--r--include/uapi/linux/ublk_cmd.h64
-rw-r--r--lib/raid6/mktables.c2
-rw-r--r--lib/raid6/recov.c1
-rw-r--r--lib/raid6/test/.gitignore3
-rw-r--r--lib/raid6/test/Makefile50
-rw-r--r--mm/migrate.c4
-rw-r--r--tools/cgroup/iocost_monitor.py21
101 files changed, 1683 insertions, 844 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 86122e459fe0..f1364d1c0d93 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,6 +5,7 @@
menuconfig BLOCK
bool "Enable the block layer" if EXPERT
default y
+ select FS_IOMAP
select SBITMAP
help
Provide block layer support for the kernel.
@@ -183,6 +184,8 @@ config BLK_DEBUG_FS_ZONED
config BLK_SED_OPAL
bool "Logic for interfacing with Opal enabled SEDs"
+ depends on KEYS
+ select PSERIES_PLPKS if PPC_PSERIES
help
Builds Logic for interfacing with Opal enabled controllers.
Enabling this option enables users to setup/unlock/lock
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 4533eb491661..ec8ac8cf6e1b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -123,20 +123,38 @@ void bio_integrity_free(struct bio *bio)
int bio_integrity_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_integrity_payload *bip = bio_integrity(bio);
- if (bip->bip_vcnt >= bip->bip_max_vcnt) {
- printk(KERN_ERR "%s: bip_vec full\n", __func__);
+ if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
+ queue_max_hw_sectors(q))
return 0;
- }
- if (bip->bip_vcnt &&
- bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
- &bip->bip_vec[bip->bip_vcnt - 1], offset))
- return 0;
+ if (bip->bip_vcnt > 0) {
+ struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
+ bool same_page = false;
+
+ if (bvec_try_merge_hw_page(q, bv, page, len, offset,
+ &same_page)) {
+ bip->bip_iter.bi_size += len;
+ return len;
+ }
+
+ if (bip->bip_vcnt >=
+ min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
+ return 0;
+
+ /*
+ * If the queue doesn't support SG gaps and adding this segment
+ * would create a gap, disallow it.
+ */
+ if (bvec_gap_to_prev(&q->limits, bv, offset))
+ return 0;
+ }
bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
bip->bip_vcnt++;
+ bip->bip_iter.bi_size += len;
return len;
}
@@ -199,8 +217,6 @@ bool bio_integrity_prep(struct bio *bio)
unsigned long start, end;
unsigned int len, nr_pages;
unsigned int bytes, offset, i;
- unsigned int intervals;
- blk_status_t status;
if (!bi)
return true;
@@ -224,12 +240,10 @@ bool bio_integrity_prep(struct bio *bio)
!(bi->flags & BLK_INTEGRITY_GENERATE))
return true;
}
- intervals = bio_integrity_intervals(bi, bio_sectors(bio));
/* Allocate kernel buffer for protection data */
- len = intervals * bi->tuple_size;
+ len = bio_integrity_bytes(bi, bio_sectors(bio));
buf = kmalloc(len, GFP_NOIO);
- status = BLK_STS_RESOURCE;
if (unlikely(buf == NULL)) {
printk(KERN_ERR "could not allocate integrity buffer\n");
goto err_end_io;
@@ -244,12 +258,10 @@ bool bio_integrity_prep(struct bio *bio)
if (IS_ERR(bip)) {
printk(KERN_ERR "could not allocate data integrity bioset\n");
kfree(buf);
- status = BLK_STS_RESOURCE;
goto err_end_io;
}
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
- bip->bip_iter.bi_size = len;
bip_set_seed(bip, bio->bi_iter.bi_sector);
if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
@@ -257,28 +269,18 @@ bool bio_integrity_prep(struct bio *bio)
/* Map it */
offset = offset_in_page(buf);
- for (i = 0 ; i < nr_pages ; i++) {
- int ret;
+ for (i = 0; i < nr_pages && len > 0; i++) {
bytes = PAGE_SIZE - offset;
- if (len <= 0)
- break;
-
if (bytes > len)
bytes = len;
- ret = bio_integrity_add_page(bio, virt_to_page(buf),
- bytes, offset);
-
- if (ret == 0) {
+ if (bio_integrity_add_page(bio, virt_to_page(buf),
+ bytes, offset) < bytes) {
printk(KERN_ERR "could not attach integrity payload\n");
- status = BLK_STS_RESOURCE;
goto err_end_io;
}
- if (ret < bytes)
- break;
-
buf += bytes;
len -= bytes;
offset = 0;
@@ -294,10 +296,9 @@ bool bio_integrity_prep(struct bio *bio)
return true;
err_end_io:
- bio->bi_status = status;
+ bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return false;
-
}
EXPORT_SYMBOL(bio_integrity_prep);
diff --git a/block/bio.c b/block/bio.c
index 8672179213b9..816d412c06e9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -606,15 +606,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
}
EXPORT_SYMBOL(bio_kmalloc);
-void zero_fill_bio(struct bio *bio)
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter)
+ __bio_for_each_segment(bv, bio, iter, start)
memzero_bvec(&bv);
}
-EXPORT_SYMBOL(zero_fill_bio);
+EXPORT_SYMBOL(zero_fill_bio_iter);
/**
* bio_truncate - truncate the bio to small size of @new_size
@@ -903,9 +903,8 @@ static inline bool bio_full(struct bio *bio, unsigned len)
return false;
}
-static inline bool page_is_mergeable(const struct bio_vec *bv,
- struct page *page, unsigned int len, unsigned int off,
- bool *same_page)
+static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
+ unsigned int len, unsigned int off, bool *same_page)
{
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
@@ -919,49 +918,15 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
return false;
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
- if (*same_page)
- return true;
- else if (IS_ENABLED(CONFIG_KMSAN))
- return false;
- return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
-}
-
-/**
- * __bio_try_merge_page - try appending data to an existing bvec.
- * @bio: destination bio
- * @page: start page to add
- * @len: length of the data to add
- * @off: offset of the data relative to @page
- * @same_page: return if the segment has been merged inside the same page
- *
- * Try to add the data at @page + @off to the last bvec of @bio. This is a
- * useful optimisation for file systems with a block size smaller than the
- * page size.
- *
- * Warn if (@len, @off) crosses pages in case that @same_page is true.
- *
- * Return %true on success or %false on failure.
- */
-static bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off, bool *same_page)
-{
- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return false;
-
- if (bio->bi_vcnt > 0) {
- struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
- if (page_is_mergeable(bv, page, len, off, same_page)) {
- if (bio->bi_iter.bi_size > UINT_MAX - len) {
- *same_page = false;
- return false;
- }
- bv->bv_len += len;
- bio->bi_iter.bi_size += len;
- return true;
- }
+ if (!*same_page) {
+ if (IS_ENABLED(CONFIG_KMSAN))
+ return false;
+ if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
+ return false;
}
- return false;
+
+ bv->bv_len += len;
+ return true;
}
/*
@@ -969,11 +934,10 @@ static bool __bio_try_merge_page(struct bio *bio, struct page *page,
* size limit. This is not for normal read/write bios, but for passthrough
* or Zone Append operations that we can't split.
*/
-static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned len,
- unsigned offset, bool *same_page)
+bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
+ struct page *page, unsigned len, unsigned offset,
+ bool *same_page)
{
- struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned long mask = queu