Merge tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe: - NVMe updates via Keith: - Device initialization memory leak fixes (Keith) - More constants defined (Weiwen) - Target debugfs support (Hannes) - PCIe subsystem reset enhancements (Keith) - Queue-depth multipath policy (Redhat and PureStorage) - Implement get_unique_id (Christoph) - Authentication error fixes (Gaosheng) - MD updates via Song - sync_action fix and refactoring (Yu Kuai) - Various small fixes (Christoph Hellwig, Li Nan, and Ofir Gal, Yu Kuai, Benjamin Marzinski, Christophe JAILLET, Yang Li) - Fix loop detach/open race (Gulam) - Fix lower control limit for blk-throttle (Yu) - Add module descriptions to various drivers (Jeff) - Add support for atomic writes for block devices, and statx reporting for same. Includes SCSI and NVMe (John, Prasad, Alan) - Add IO priority information to block trace points (Dongliang) - Various zone improvements and tweaks (Damien) - mq-deadline tag reservation improvements (Bart) - Ignore direct reclaim swap writes in writeback throttling (Baokun) - Block integrity improvements and fixes (Anuj) - Add basic support for rust based block drivers. Has a dummy null_blk variant for now (Andreas) - Series converting driver settings to queue limits, and cleanups and fixes related to that (Christoph) - Cleanup for poking too deeply into the bvec internals, in preparation for DMA mapping API changes (Christoph) - Various minor tweaks and fixes (Jiapeng, John, Kanchan, Mikulas, Ming, Zhu, Damien, Christophe, Chaitanya) * tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux: (206 commits) floppy: add missing MODULE_DESCRIPTION() macro loop: add missing MODULE_DESCRIPTION() macro ublk_drv: add missing MODULE_DESCRIPTION() macro xen/blkback: add missing MODULE_DESCRIPTION() macro block/rnbd: Constify struct kobj_type block: take offset into account in blk_bvec_map_sg again block: fix get_max_segment_size() warning loop: Don't bother validating blocksize virtio_blk: Don't bother validating blocksize null_blk: Don't bother validating blocksize block: Validate logical block size in blk_validate_limits() virtio_blk: Fix default logical block size fallback nvmet-auth: fix nvmet_auth hash error handling nvme: implement ->get_unique_id block: pass a phys_addr_t to get_max_segment_size block: add a bvec_phys helper blk-lib: check for kill signal in ioctl BLKZEROOUT block: limit the Write Zeroes to manually writing zeroes fallback block: refacto blkdev_issue_zeroout block: move read-only and supported checks into (__)blkdev_issue_zeroout ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-07-15 14:20:22 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-07-15 14:20:22 -0700
commit: 3e7819886281e077e82006fe4804b0d6b0f5643b (patch)
tree: 40766af623d8a1dde0edaee8b6abc496efbcc615 /block
parent: 3a56e241732975c2c1247047ddbfc0ac6f6a4905 (diff)
parent: 3c1743a685b19bc17cf65af4a2eb149fd3b15c50 (diff)
download: linux-3e7819886281e077e82006fe4804b0d6b0f5643b.tar.gz
linux-3e7819886281e077e82006fe4804b0d6b0f5643b.tar.bz2
linux-3e7819886281e077e82006fe4804b0d6b0f5643b.zip
30 files changed, 1113 insertions, 1416 deletions
diff --git a/block/Kconfig b/block/Kconfig
index dc12af58dbae..5b623b876d3b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -62,6 +62,8 @@ config BLK_DEV_BSGLIB
 
 config BLK_DEV_INTEGRITY
 	bool "Block layer data integrity support"
+	select CRC_T10DIF
+	select CRC64_ROCKSOFT
 	help
 	Some storage devices allow extra information to be
 	stored/retrieved to help protect the data.  The block layer
@@ -72,12 +74,6 @@ config BLK_DEV_INTEGRITY
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
-config BLK_DEV_INTEGRITY_T10
-	tristate
-	depends on BLK_DEV_INTEGRITY
-	select CRC_T10DIF
-	select CRC64_ROCKSOFT
-
 config BLK_DEV_WRITE_MOUNTED
 	bool "Allow writing to mounted block devices"
 	default y
diff --git a/block/Makefile b/block/Makefile
index 168150b9c510..ddfd21c1a9ff 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -26,8 +26,7 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
 
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY_T10)	+= t10-pi.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
 obj-$(CONFIG_BLK_MQ_PCI)	+= blk-mq-pci.o
 obj-$(CONFIG_BLK_MQ_VIRTIO)	+= blk-mq-virtio.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
diff --git a/block/bdev.c b/block/bdev.c
index 353677ac49b3..c5507b6f63b8 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -385,7 +385,7 @@ static struct file_system_type bd_type = {
 };
 
 struct super_block *blockdev_superblock __ro_after_init;
-struct vfsmount *blockdev_mnt __ro_after_init;
+static struct vfsmount *blockdev_mnt __ro_after_init;
 EXPORT_SYMBOL_GPL(blockdev_superblock);
 
 void __init bdev_cache_init(void)
@@ -1260,23 +1260,42 @@ void sync_bdevs(bool wait)
 }
 
 /*
- * Handle STATX_DIOALIGN for block devices.
- *
- * Note that the inode passed to this is the inode of a block device node file,
- * not the block device's internal inode.  Therefore it is *not* valid to use
- * I_BDEV() here; the block device has to be looked up by i_rdev instead.
+ * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
  */
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+void bdev_statx(struct path *path, struct kstat *stat,
+		u32 request_mask)
 {
+	struct inode *backing_inode;
 	struct block_device *bdev;
 
-	bdev = blkdev_get_no_open(inode->i_rdev);
+	if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC)))
+		return;
+
+	backing_inode = d_backing_inode(path->dentry);
+
+	/*
+	 * Note that backing_inode is the inode of a block device node file,
+	 * not the block device's internal inode.  Therefore it is *not* valid
+	 * to use I_BDEV() here; the block device has to be looked up by i_rdev
+	 * instead.
+	 */
+	bdev = blkdev_get_no_open(backing_inode->i_rdev);
 	if (!bdev)
 		return;
 
-	stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
-	stat->dio_offset_align = bdev_logical_block_size(bdev);
-	stat->result_mask |= STATX_DIOALIGN;
+	if (request_mask & STATX_DIOALIGN) {
+		stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+		stat->dio_offset_align = bdev_logical_block_size(bdev);
+		stat->result_mask |= STATX_DIOALIGN;
+	}
+
+	if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
+		struct request_queue *bd_queue = bdev->bd_queue;
+
+		generic_fill_statx_atomic_writes(stat,
+			queue_atomic_write_unit_min_bytes(bd_queue),
+			queue_atomic_write_unit_max_bytes(bd_queue));
+	}
 
 	blkdev_put_no_open(bdev);
 }
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d442ee358fc2..b758693697c0 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -797,57 +797,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	 */
 	bfq_link_bfqg(bfqd, bfqg);
 	__bfq_bic_change_cgroup(bfqd, bic, bfqg);
-	/*
-	 * Update blkg_path for bfq_log_* functions. We cache this
-	 * path, and update it here, for the following
-	 * reasons. Operations on blkg objects in blk-cgroup are
-	 * protected with the request_queue lock, and not with the
-	 * lock that protects the instances of this scheduler
-	 * (bfqd->lock). This exposes BFQ to the following sort of
-	 * race.
-	 *
-	 * The blkg_lookup performed in bfq_get_queue, protected
-	 * through rcu, may happen to return the address of a copy of
-	 * the original blkg. If this is the case, then the
-	 * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
-	 * the blkg, is useless: it does not prevent blk-cgroup code
-	 * from destroying both the original blkg and all objects
-	 * directly or indirectly referred by the copy of the
-	 * blkg.
-	 *
-	 * On the bright side, destroy operations on a blkg invoke, as
-	 * a first step, hooks of the scheduler associated with the
-	 * blkg. And these hooks are executed with bfqd->lock held for
-	 * BFQ. As a consequence, for any blkg associated with the
-	 * request queue this instance of the scheduler is attached
-	 * to, we are guaranteed that such a blkg is not destroyed, and
-	 * that all the pointers it contains are consistent, while we
-	 * are holding bfqd->lock. A blkg_lookup performed with
-	 * bfqd->lock held then returns a fully consistent blkg, which
-	 * remains consistent until this lock is held.
-	 *
-	 * Thanks to the last fact, and to the fact that: (1) bfqg has
-	 * been obtained through a blkg_lookup in the above
-	 * assignment, and (2) bfqd->lock is being held, here we can
-	 * safely use the policy data for the involved blkg (i.e., the
-	 * field bfqg->pd) to get to the blkg associated with bfqg,
-	 * and then we can safely use any field of blkg. After we
-	 * release bfqd->lock, even just getting blkg through this
-	 * bfqg may cause dangling references to be traversed, as
-	 * bfqg->pd may not exist any more.
-	 *
-	 * In view of the above facts, here we cache, in the bfqg, any
-	 * blkg data we may need for this bic, and for its associated
-	 * bfq_queue. As of now, we need to cache only the path of the
-	 * blkg, which is used in the bfq_log_* functions.
-	 *
-	 * Finally, note that bfqg itself needs to be protected from
-	 * destruction on the blkg_free of the original blkg (which
-	 * invokes bfq_pd_free). We use an additional private
-	 * refcounter for bfqg, to let it disappear only after no
-	 * bfq_queue refers to it any longer.
-	 */
-	blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
 	bic->blkcg_serial_nr = serial_nr;
 }
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4b88a54a9b76..36a4998c4b37 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5463,40 +5463,42 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
 	}
 }
 
+static void _bfq_exit_icq(struct bfq_io_cq *bic, unsigned int num_actuators)
+{
+	struct bfq_iocq_bfqq_data *bfqq_data = bic->bfqq_data;
+	unsigned int act_idx;
+
+	for (act_idx = 0; act_idx < num_actuators; act_idx++) {
+		if (bfqq_data[act_idx].stable_merge_bfqq)
+			bfq_put_stable_ref(bfqq_data[act_idx].stable_merge_bfqq);
+
+		bfq_exit_icq_bfqq(bic, true, act_idx);
+		bfq_exit_icq_bfqq(bic, false, act_idx);
+	}
+}
+
 static void bfq_exit_icq(struct io_cq *icq)
 {
 	struct bfq_io_cq *bic = icq_to_bic(icq);
 	struct bfq_data *bfqd = bic_to_bfqd(bic);
 	unsigned long flags;
-	unsigned int act_idx;
+
 	/*
 	 * If bfqd and thus bfqd->num_actuators is not available any
 	 * longer, then cycle over all possible per-actuator bfqqs in
 	 * next loop. We rely on bic being zeroed on creation, and
 	 * therefore on its unused per-actuator fields being NULL.
-	 */
-	unsigned int num_actuators = BFQ_MAX_ACTUATORS;
-	struct bfq_iocq_bfqq_data *bfqq_data = bic->bfqq_data;
-
-	/*
+	 *
 	 * bfqd is NULL if scheduler already exited, and in that case
 	 * this is the last time these queues are accessed.
 	 */
 	if (bfqd) {
 		spin_lock_irqsave(&bfqd->lock, flags);
-		num_actuators = bfqd->num_actuators;
-	}
-
-	for (act_idx = 0; act_idx < num_actuators; act_idx++) {
-		if (bfqq_data[act_idx].stable_merge_bfqq)
-			bfq_put_stable_ref(bfqq_data[act_idx].stable_merge_bfqq);
-
-		bfq_exit_icq_bfqq(bic, true, act_idx);
-		bfq_exit_icq_bfqq(bic, false, act_idx);
-	}
-
-	if (bfqd)
+		_bfq_exit_icq(bic, bfqd->num_actuators);
 		spin_unlock_irqrestore(&bfqd->lock, flags);
+	} else {
+		_bfq_exit_icq(bic, BFQ_MAX_ACTUATORS);
+	}
 }
 
 /*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 467e8cfc41a2..08ddf2cfae5b 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -1003,9 +1003,6 @@ struct bfq_group {
 	/* must be the first member */
 	struct blkg_policy_data pd;
 
-	/* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
-	char blkg_path[128];
-
 	/* reference counter (see comments in bfq_bic_update_cgroup) */
 	refcount_t ref;
 
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8b528e12136f..b78c145eb026 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -76,7 +76,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 					  &bip->bip_max_vcnt, gfp_mask);
 		if (!bip->bip_vec)
 			goto err;
-	} else {
+	} else if (nr_vecs) {
 		bip->bip_vec = bip->bip_inline_vecs;
 	}
 
@@ -276,6 +276,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
 
 	bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
 	bip->bip_iter.bi_sector = seed;
+	bip->bip_vcnt = nr_vecs;
 	return 0;
 free_bip:
 	bio_integrity_free(bio);
@@ -297,6 +298,7 @@ static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
 	bip->bip_flags |= BIP_INTEGRITY_USER;
 	bip->bip_iter.bi_sector = seed;
 	bip->bip_iter.bi_size = len;
+	bip->bip_vcnt = nr_vecs;
 	return 0;
 }
 
@@ -334,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
 			   u32 seed)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
+	unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
 	struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
 	struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
 	unsigned int direction, nr_bvecs;
@@ -397,44 +399,6 @@ free_bvec:
 EXPORT_SYMBOL_GPL(bio_integrity_map_user);
 
 /**
- * bio_integrity_process - Process integrity metadata for a bio
- * @bio:	bio to generate/verify integrity metadata for
- * @proc_iter:  iterator to process
- * @proc_fn:	Pointer to the relevant processing function
- */
-static blk_status_t bio_integrity_process(struct bio *bio,
-		struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
-{
-	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
-	struct blk_integrity_iter iter;
-	struct bvec_iter bviter;
-	struct bio_vec bv;
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-	blk_status_t ret = BLK_STS_OK;
-
-	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	iter.interval = 1 << bi->interval_exp;
-	iter.tuple_size = bi->tuple_size;
-	iter.seed = proc_iter->bi_sector;
-	iter.prot_buf = bvec_virt(bip->bip_vec);
-	iter.pi_offset = bi->pi_offset;
-
-	__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
-		void *kaddr = bvec_kmap_local(&bv);
-
-		iter.data_buf = kaddr;
-		iter.data_size = bv.bv_len;
-		ret = proc_fn(&iter);
-		kunmap_local(kaddr);
-
-		if (ret)
-			break;
-
-	}
-	return ret;
-}
-
-/**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
  *
@@ -450,17 +414,13 @@ bool bio_integrity_prep(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+	unsigned int len;
 	void *buf;
-	unsigned long start, end;
-	unsigned int len, nr_pages;
-	unsigned int bytes, offset, i;
+	gfp_t gfp = GFP_NOIO;
 
 	if (!bi)
 		return true;
 
-	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
-		return true;
-
 	if (!bio_sectors(bio))
 		return true;
 
@@ -468,32 +428,36 @@ bool bio_integrity_prep(struct bio *bio)
 	if (bio_integrity(bio))
 		return true;
 
-	if (bio_data_dir(bio) == READ) {
-		if (!bi->profile->verify_fn ||
-		    !(bi->flags & BLK_INTEGRITY_VERIFY))
+	switch (bio_op(bio)) {
+	case REQ_OP_READ:
+		if (bi->flags & BLK_INTEGRITY_NOVERIFY)
 			return true;
-	} else {
-		if (!bi->profile->generate_fn ||
-		    !(bi->flags & BLK_INTEGRITY_GENERATE))
+		break;
+	case REQ_OP_WRITE:
+		if (bi->flags & BLK_INTEGRITY_NOGENERATE)
 			return true;
+
+		/*
+		 * Zero the memory allocated to not leak uninitialized kernel
+		 * memory to disk for non-integrity metadata where nothing else
+		 * initializes the memory.
+		 */
+		if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+			gfp |= __GFP_ZERO;
+		break;
+	default:
+		return true;
 	}
 
 	/* Allocate kernel buffer for protection data */
 	len = bio_integrity_bytes(bi, bio_sectors(bio));
-	buf = kmalloc(len, GFP_NOIO);
+	buf = kmalloc(len, gfp);
 	if (unlikely(buf == NULL)) {
-		printk(KERN_ERR "could not allocate integrity buffer\n");
 		goto err_end_io;
 	}
 
-	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	start = ((unsigned long) buf) >> PAGE_SHIFT;
-	nr_pages = end - start;
-
-	/* Allocate bio integrity payload and integrity vectors */
-	bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
+	bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
 	if (IS_ERR(bip)) {
-		printk(KERN_ERR "could not allocate data integrity bioset\n");
 		kfree(buf);
 		goto err_end_io;
 	}
@@ -501,35 +465,20 @@ bool bio_integrity_prep(struct bio *bio)
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
-	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
+	if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
 		bip->bip_flags |= BIP_IP_CHECKSUM;
 
-	/* Map it */
-	offset = offset_in_page(buf);
-	for (i = 0; i < nr_pages && len > 0; i++) {
-		bytes = PAGE_SIZE - offset;
-
-		if (bytes > len)
-			bytes = len;
-
-		if (bio_integrity_add_page(bio, virt_to_page(buf),
-					   bytes, offset) < bytes) {
-			printk(KERN_ERR "could not attach integrity payload\n");
-			goto err_end_io;
-		}
-
-		buf += bytes;
-		len -= bytes;
-		offset = 0;
+	if (bio_integrity_add_page(bio, virt_to_page(buf), len,
+			offset_in_page(buf)) < len) {
+		printk(KERN_ERR "could not attach integrity payload\n");
+		goto err_end_io;
 	}
 
 	/* Auto-generate integrity metadata if this is a write */
-	if (bio_data_dir(bio) == WRITE) {
-		bio_integrity_process(bio, &bio->bi_iter,
-				      bi->profile->generate_fn);
-	} else {
+	if (bio_data_dir(bio) == WRITE)
+		blk_integrity_generate(bio);
+	else
 		bip->bio_iter = bio->bi_iter;
-	}
 	return true;
 
 err_end_io:
@@ -552,15 +501,8 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio_integrity_payload *bip =
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
-	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 
-	/*
-	 * At the moment verify is called bio's iterator was advanced
-	 * during split and completion, we need to rewind iterator to
-	 * it's original position.
-	 */
-	bio->bi_status = bio_integrity_process(bio, &bip->bio_iter,
-						bi->profile->verify_fn);
+	blk_integrity_verify(bio);
 	bio_integrity_free(bio);
 	bio_endio(bio);
 }
@@ -582,7 +524,7 @@ bool __bio_integrity_endio(struct bio *bio)
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 
 	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
-	    (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
+	    (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->csum_type) {
 		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
 		queue_work(kintegrityd_wq, &bip->bip_work);
 		return false;
@@ -642,14 +584,11 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 
 	BUG_ON(bip_src == NULL);
 
-	bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+	bip = bio_integrity_alloc(bio, gfp_mask, 0);
 	if (IS_ERR(bip))
 		return PTR_ERR(bip);
 
-	memcpy(bip->bip_vec, bip_src->bip_vec,
-	       bip_src->bip_vcnt * sizeof(struct bio_vec));
-
-	bip->bip_vcnt = bip_src->bip_vcnt;
+	bip->bip_vec = bip_src->bip_vec;
 	bip->bip_iter = bip_src->bip_iter;
 	bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
 
diff --git a/block/bio.c b/block/bio.c
index e9e809a63c59..a3b1b2266c50 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -953,7 +953,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
 		bool *same_page)
 {
 	unsigned long mask = queue_segment_boundary(q);
-	phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
+	phys_addr_t addr1 = bvec_phys(bv);
 	phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
 
 	if ((addr1 | mask) != (addr2 | mask))
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 90b3959d88cf..bd472a30bc61 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -301,19 +301,6 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 }
 
 /**
- * blkg_path - format cgroup path of blkg
- * @blkg: blkg of interest
- * @buf: target buffer
- * @buflen: target buffer length
- *
- * Format the path of the cgroup of @blkg into @buf.
- */
-static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
-{
-	return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-}
-
-/**
  * blkg_get - get a blkg reference
  * @blkg: blkg to get
  *
diff --git a/block/blk-core.c b/block/blk-core.c
index 82c3ae22d76d..02bceeb36f2c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -94,20 +94,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_queue_flag_clear);
 
-/**
- * blk_queue_flag_test_and_set - atomically test and set a queue flag
- * @flag: flag to be set
- * @q: request queue
- *
- * Returns the previous value of @flag - 0 if the flag was not set and 1 if
- * the flag was already set.
- */
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
-{
-	return test_and_set_bit(flag, &q->queue_flags);
-}
-EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
-
 #define REQ_OP_NAME(name) [REQ_OP_##name] = #name
 static const char *const blk_op_name[] = {
 	REQ_OP_NAME(READ),
@@ -174,6 +160,8 @@ static const struct {
 	/* Command duration limit device-side timeout */
 	[BLK_STS_DURATION_LIMIT]	= { -ETIME, "duration limit exceeded" },
 
+	[BLK_STS_INVAL]		= { -EINVAL,	"invalid" },
+
 	/* everything else not covered above: */
 	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
 };
@@ -739,6 +727,18 @@ void submit_bio_noacct_nocheck(struct bio *bio)
 		__submit_bio_noacct(bio);
 }
 
+static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
+						 struct bio *bio)
+{
+	if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
+		return BLK_STS_INVAL;
+
+	if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
+		return BLK_STS_INVAL;
+
+	return BLK_STS_OK;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -782,7 +782,7 @@ void submit_bio_noacct(struct bio *bio)
 		if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE &&
 				 bio_op(bio) != REQ_OP_ZONE_APPEND))
 			goto end_io;
-		if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+		if (!bdev_write_cache(bdev)) {
 			bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
 			if (!bio_sectors(bio)) {
 				status = BLK_STS_OK;
@@ -791,12 +791,17 @@ void submit_bio_noacct(struct bio *bio)
 		}
 	}
 
-	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+	if (!(q->limits.features & BLK_FEAT_POLL))
 		bio_clear_polled(bio);
 
 	switch (bio_op(bio)) {
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
+		if (bio->bi_opf & REQ_ATOMIC) {
+			status = blk_validate_atomic_write_op_size(q, bio);
+			if (status != BLK_STS_OK)
+				goto end_io;
+		}
 		break;
 	case REQ_OP_FLUSH:
 		/*
@@ -825,11 +830,8 @@ void submit_bio_noacct(struct bio *bio)
 	case REQ_OP_ZONE_OPEN:
 	case REQ_OP_ZONE_CLOSE:
 	case REQ_OP_ZONE_FINISH:
-		if (!bdev_is_zoned(bio->bi_bdev))
-			goto not_supported;
-		break;
 	case REQ_OP_ZONE_RESET_ALL:
-		if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
+		if (!bdev_is_zoned(bio->bi_bdev))
 			goto not_supported;
 		break;
 	case REQ_OP_DRV_IN:
@@ -915,8 +917,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
 		return 0;
 
 	q = bdev_get_queue(bdev);
-	if (cookie == BLK_QC_T_NONE ||
-	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+	if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL))
 		return 0;
 
 	blk_flush_plug(current->plug, false);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index cca4f9131f79..a72e2a83d075 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -100,23 +100,6 @@ blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
 	return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
 }
 
-static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
-{
-	unsigned int policy = 0;
-
-	if (blk_rq_sectors(rq))
-		policy |= REQ_FSEQ_DATA;
-
-	if (fflags & (1UL << QUEUE_FLAG_WC)) {
-		if (rq->cmd_flags & REQ_PREFLUSH)
-			policy |= REQ_FSEQ_PREFLUSH;
-		if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
-		    (rq->cmd_flags & REQ_FUA))
-			policy |= REQ_FSEQ_POSTFLUSH;
-	}
-	return policy;
-}
-
 static unsigned int blk_flush_cur_seq(struct request *rq)
 {
 	return 1 << ffz(rq->flush.seq);
@@ -399,19 +382,32 @@ static void blk_rq_init_flush(struct request *rq)
 bool blk_insert_flush(struct request *rq)
 {
 	struct request_queue *q = rq->q;
-	unsigned long fflags = q->queue_flags;	/* may change, cache */
-	unsigned int policy = blk_flush_policy(fflags, rq);
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
+	bool supports_fua = q->limits.features & BLK_FEAT_FUA;
+	unsigned int policy = 0;
 
 	/* FLUSH/FUA request must never be merged */
 	WARN_ON_ONCE(rq->bio != rq->biotail);
 
+	if (blk_rq_sectors(rq))
+		policy |= REQ_FSEQ_DATA;
+
+	/*
+	 * Check which flushes we need to sequence for this operation.
+	 */
+	if (blk_queue_write_cache(q)) {
+		if (rq->cmd_flags & REQ_PREFLUSH)
+			policy |= REQ_FSEQ_PREFLUSH;
+		if ((rq->cmd_flags & REQ_FUA) && !supports_fua)
+			policy |= REQ_FSEQ_POSTFLUSH;
+	}
+
 	/*
 	 * @policy now records what operations need to be done.  Adjust
 	 * REQ_PREFLUSH and FUA for the driver.
 	 */
 	rq->cmd_flags &= ~REQ_PREFLUSH;
-	if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
+	if (!supports_fua)
 		rq->cmd_flags &= ~REQ_FUA;
 
 	/*
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index ccbeb6dfa87a..010decc892ea 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -107,60 +107,6 @@ new_segment:
 }
 EXPORT_SYMBOL(blk_rq_map_integrity_sg);
 
-/**
- * blk_integrity_compare - Compare integrity profile of two disks
- * @gd1:	Disk to compare
- * @gd2:	Disk to compare
- *
- * Description: Meta-devices like DM and MD need to verify that all
- * sub-devices use the same integrity format before advertising to
- * upper layers that they can send/receive integrity metadata.  This
- * function can be used to check whether two gendisk devices have
- * compatible integrity formats.
- */
-int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
-{
-	struct blk_integrity *b1 = &gd1->queue->integrity;
-	struct blk_integrity *b2 = &gd2->queue->integrity;
-
-	if (!b1->profile && !b2->profile)
-		return 0;
-
-	if (!b1->profile || !b2->profile)
-		return -1;
-
-	if (b1->interval_exp != b2->interval_exp) {
-		pr_err("%s: %s/%s protection interval %u != %u\n",
-		       __func__, gd1->disk_name, gd2->disk_name,
-		       1 << b1->interval_exp, 1 << b2->interval_exp);
-		return -1;
-	}
-
-	if (b1->tu
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-07-15 14:20:22 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-07-15 14:20:22 -0700
commit	3e7819886281e077e82006fe4804b0d6b0f5643b (patch)
tree	40766af623d8a1dde0edaee8b6abc496efbcc615 /block
parent	3a56e241732975c2c1247047ddbfc0ac6f6a4905 (diff)
parent	3c1743a685b19bc17cf65af4a2eb149fd3b15c50 (diff)
download	linux-3e7819886281e077e82006fe4804b0d6b0f5643b.tar.gz linux-3e7819886281e077e82006fe4804b0d6b0f5643b.tar.bz2 linux-3e7819886281e077e82006fe4804b0d6b0f5643b.zip