summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 12:47:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 12:47:20 -0700
commita0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch)
tree9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /block
parent0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff)
parentfcaa174a9c995cf0af3967e55644a1543ea07e36 (diff)
downloadlinux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.tar.gz
linux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.tar.bz2
linux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.zip
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull request via Keith: - Various cleanups all around (Irvin, Chaitanya, Christophe) - Better struct packing (Christophe JAILLET) - Reduce controller error logs for optional commands (Keith) - Support for >=64KiB block sizes (Daniel Gomez) - Fabrics fixes and code organization (Max, Chaitanya, Daniel Wagner) - bcache updates via Coly: - Fix a race at init time (Mingzhe Zou) - Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye) - use page pinning in the block layer for dio (David) - convert old block dio code to page pinning (David, Christoph) - cleanups for pktcdvd (Andy) - cleanups for rnbd (Guoqing) - use the unchecked __bio_add_page() for the initial single page additions (Johannes) - fix overflows in the Amiga partition handling code (Michael) - improve mq-deadline zoned device support (Bart) - keep passthrough requests out of the IO schedulers (Christoph, Ming) - improve support for flush requests, making them less special to deal with (Christoph) - add bdev holder ops and shutdown methods (Christoph) - fix the name_to_dev_t() situation and use cases (Christoph) - decouple the block open flags from fmode_t (Christoph) - ublk updates and cleanups, including adding user copy support (Ming) - BFQ sanity checking (Bart) - convert brd from radix to xarray (Pankaj) - constify various structures (Thomas, Ivan) - more fine grained persistent reservation ioctl capability checks (Jingbo) - misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan, Jordy, Li, Min, Yu, Zhong, Waiman) * tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits) scsi/sg: don't grab scsi host module reference ext4: Fix warning in blkdev_put() block: don't return -EINVAL for not found names in devt_from_devname cdrom: Fix spectre-v1 gadget block: Improve kernel-doc headers blk-mq: don't insert passthrough request into sw queue bsg: make bsg_class a static const structure ublk: make ublk_chr_class a static const structure aoe: make aoe_class a static const structure block/rnbd: make all 'class' structures const block: fix the exclusive open mask in disk_scan_partitions block: add overflow checks for Amiga partition support block: change all __u32 annotations to __be32 in affs_hardblocks.h block: fix signed int overflow in Amiga partition support block: add capacity validation in bdev_add_partition() block: fine-granular CAP_SYS_ADMIN for Persistent Reservation block: disallow Persistent Reservation on partitions reiserfs: fix blkdev_put() warning from release_journal_dev() block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions() block: document the holder argument to blkdev_get_by_path ...
Diffstat (limited to 'block')
-rw-r--r--block/Makefile2
-rw-r--r--block/bdev.c252
-rw-r--r--block/bfq-iosched.c9
-rw-r--r--block/bio.c37
-rw-r--r--block/blk-cgroup-fc-appid.c2
-rw-r--r--block/blk-cgroup.c14
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-flush.c110
-rw-r--r--block/blk-ioc.c36
-rw-r--r--block/blk-iocost.c7
-rw-r--r--block/blk-ioprio.c23
-rw-r--r--block/blk-map.c22
-rw-r--r--block/blk-mq-debugfs.c10
-rw-r--r--block/blk-mq-sched.h8
-rw-r--r--block/blk-mq-tag.c15
-rw-r--r--block/blk-mq.c141
-rw-r--r--block/blk-mq.h14
-rw-r--r--block/blk-rq-qos.c20
-rw-r--r--block/blk-wbt.c2
-rw-r--r--block/blk-zoned.c20
-rw-r--r--block/blk.h40
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/bsg.c26
-rw-r--r--block/disk-events.c19
-rw-r--r--block/early-lookup.c316
-rw-r--r--block/elevator.c2
-rw-r--r--block/fops.c63
-rw-r--r--block/genhd.c187
-rw-r--r--block/ioctl.c107
-rw-r--r--block/mq-deadline.c125
-rw-r--r--block/partitions/amiga.c102
-rw-r--r--block/partitions/core.c50
32 files changed, 1152 insertions, 632 deletions
diff --git a/block/Makefile b/block/Makefile
index b31b05390749..46ada9dc8bbf 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
- disk-events.o blk-ia-ranges.o
+ disk-events.o blk-ia-ranges.o early-lookup.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
diff --git a/block/bdev.c b/block/bdev.c
index 21c63bfef323..979e28a46b98 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(invalidate_bdev);
* Drop all buffers & page cache for given bdev range. This function bails
* with error if bdev has other exclusive owner (such as filesystem).
*/
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
+int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
loff_t lstart, loff_t lend)
{
/*
@@ -101,14 +101,14 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
* while we discard the buffer cache to avoid discarding buffers
* under live filesystem.
*/
- if (!(mode & FMODE_EXCL)) {
- int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
+ if (!(mode & BLK_OPEN_EXCL)) {
+ int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
if (err)
goto invalidate;
}
truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
- if (!(mode & FMODE_EXCL))
+ if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(bdev, truncate_bdev_range);
return 0;
@@ -308,7 +308,7 @@ EXPORT_SYMBOL(thaw_bdev);
* pseudo-fs
*/
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
+static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock);
static struct kmem_cache * bdev_cachep __read_mostly;
static struct inode *bdev_alloc_inode(struct super_block *sb)
@@ -415,6 +415,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
bdev = I_BDEV(inode);
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
+ mutex_init(&bdev->bd_holder_lock);
bdev->bd_partno = partno;
bdev->bd_inode = inode;
bdev->bd_queue = disk->queue;
@@ -463,39 +464,48 @@ long nr_blockdev_pages(void)
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
- * @whole: whole block device containing @bdev, may equal @bdev
* @holder: holder trying to claim @bdev
+ * @hops: holder ops
*
* Test whether @bdev can be claimed by @holder.
*
- * CONTEXT:
- * spin_lock(&bdev_lock).
- *
* RETURNS:
* %true if @bdev can be claimed, %false otherwise.
*/
-static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
+static bool bd_may_claim(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops)
{
- if (bdev->bd_holder == holder)
- return true; /* already a holder */
- else if (bdev->bd_holder != NULL)
- return false; /* held by someone else */
- else if (whole == bdev)
- return true; /* is a whole device which isn't held */
-
- else if (whole->bd_holder == bd_may_claim)
- return true; /* is a partition of a device that is being partitioned */
- else if (whole->bd_holder != NULL)
- return false; /* is a partition of a held device */
- else
- return true; /* is a partition of an un-held device */
+ struct block_device *whole = bdev_whole(bdev);
+
+ lockdep_assert_held(&bdev_lock);
+
+ if (bdev->bd_holder) {
+ /*
+ * The same holder can always re-claim.
+ */
+ if (bdev->bd_holder == holder) {
+ if (WARN_ON_ONCE(bdev->bd_holder_ops != hops))
+ return false;
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * If the whole devices holder is set to bd_may_claim, a partition on
+ * the device is claimed, but not the whole device.
+ */
+ if (whole != bdev &&
+ whole->bd_holder && whole->bd_holder != bd_may_claim)
+ return false;
+ return true;
}
/**
* bd_prepare_to_claim - claim a block device
* @bdev: block device of interest
* @holder: holder trying to claim @bdev
+ * @hops: holder ops.
*
* Claim @bdev. This function fails if @bdev is already claimed by another
* holder and waits if another claiming is in progress. return, the caller
@@ -504,17 +514,18 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
* RETURNS:
* 0 if @bdev can be claimed, -EBUSY otherwise.
*/
-int bd_prepare_to_claim(struct block_device *bdev, void *holder)
+int bd_prepare_to_claim(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops)
{
struct block_device *whole = bdev_whole(bdev);
if (WARN_ON_ONCE(!holder))
return -EINVAL;
retry:
- spin_lock(&bdev_lock);
+ mutex_lock(&bdev_lock);
/* if someone else claimed, fail */
- if (!bd_may_claim(bdev, whole, holder)) {
- spin_unlock(&bdev_lock);
+ if (!bd_may_claim(bdev, holder, hops)) {
+ mutex_unlock(&bdev_lock);
return -EBUSY;
}
@@ -524,7 +535,7 @@ retry:
DEFINE_WAIT(wait);
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
schedule();
finish_wait(wq, &wait);
goto retry;
@@ -532,7 +543,7 @@ retry:
/* yay, all mine */
whole->bd_claiming = holder;
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
return 0;
}
EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
@@ -550,16 +561,18 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
* bd_finish_claiming - finish claiming of a block device
* @bdev: block device of interest
* @holder: holder that has claimed @bdev
+ * @hops: block device holder operations
*
* Finish exclusive open of a block device. Mark the device as exlusively
* open by the holder and wake up all waiters for exclusive open to finish.
*/
-static void bd_finish_claiming(struct block_device *bdev, void *holder)
+static void bd_finish_claiming(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops)
{
struct block_device *whole = bdev_whole(bdev);
- spin_lock(&bdev_lock);
- BUG_ON(!bd_may_claim(bdev, whole, holder));
+ mutex_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, holder, hops));
/*
* Note that for a whole device bd_holders will be incremented twice,
* and bd_holder will be set to bd_may_claim before being set to holder
@@ -567,9 +580,12 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
whole->bd_holders++;
whole->bd_holder = bd_may_claim;
bdev->bd_holders++;
+ mutex_lock(&bdev->bd_holder_lock);
bdev->bd_holder = holder;
+ bdev->bd_holder_ops = hops;
+ mutex_unlock(&bdev->bd_holder_lock);
bd_clear_claiming(whole, holder);
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
}
/**
@@ -583,12 +599,47 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
*/
void bd_abort_claiming(struct block_device *bdev, void *holder)
{
- spin_lock(&bdev_lock);
+ mutex_lock(&bdev_lock);
bd_clear_claiming(bdev_whole(bdev), holder);
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_abort_claiming);
+static void bd_end_claim(struct block_device *bdev, void *holder)
+{
+ struct block_device *whole = bdev_whole(bdev);
+ bool unblock = false;
+
+ /*
+ * Release a claim on the device. The holder fields are protected with
+ * bdev_lock. open_mutex is used to synchronize disk_holder unlinking.
+ */
+ mutex_lock(&bdev_lock);
+ WARN_ON_ONCE(bdev->bd_holder != holder);
+ WARN_ON_ONCE(--bdev->bd_holders < 0);
+ WARN_ON_ONCE(--whole->bd_holders < 0);
+ if (!bdev->bd_holders) {
+ mutex_lock(&bdev->bd_holder_lock);
+ bdev->bd_holder = NULL;
+ bdev->bd_holder_ops = NULL;
+ mutex_unlock(&bdev->bd_holder_lock);
+ if (bdev->bd_write_holder)
+ unblock = true;
+ }
+ if (!whole->bd_holders)
+ whole->bd_holder = NULL;
+ mutex_unlock(&bdev_lock);
+
+ /*
+ * If this was the last claim, remove holder link and unblock evpoll if
+ * it was a write holder.
+ */
+ if (unblock) {
+ disk_unblock_events(bdev->bd_disk);
+ bdev->bd_write_holder = false;
+ }
+}
+
static void blkdev_flush_mapping(struct block_device *bdev)
{
WARN_ON_ONCE(bdev->bd_holders);
@@ -597,13 +648,13 @@ static void blkdev_flush_mapping(struct block_device *bdev)
bdev_write_inode(bdev);
}
-static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
int ret;
if (disk->fops->open) {
- ret = disk->fops->open(bdev, mode);
+ ret = disk->fops->open(disk, mode);
if (ret) {
/* avoid ghost partitions on a removed medium */
if (ret == -ENOMEDIUM &&
@@ -621,22 +672,19 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
return 0;
}
-static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+static void blkdev_put_whole(struct block_device *bdev)
{
if (atomic_dec_and_test(&bdev->bd_openers))
blkdev_flush_mapping(bdev);
if (bdev->bd_disk->fops->release)
- bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+ bdev->bd_disk->fops->release(bdev->bd_disk);
}
-static int blkdev_get_part(struct block_device *part, fmode_t mode)
+static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
{
struct gendisk *disk = part->bd_disk;
int ret;
- if (atomic_read(&part->bd_openers))
- goto done;
-
ret = blkdev_get_whole(bdev_whole(part), mode);
if (ret)
return ret;
@@ -645,26 +693,27 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
if (!bdev_nr_sectors(part))
goto out_blkdev_put;
- disk->open_partitions++;
- set_init_blocksize(part);
-done:
+ if (!atomic_read(&part->bd_openers)) {
+ disk->open_partitions++;
+ set_init_blocksize(part);
+ }
atomic_inc(&part->bd_openers);
return 0;
out_blkdev_put:
- blkdev_put_whole(bdev_whole(part), mode);
+ blkdev_put_whole(bdev_whole(part));
return ret;
}
-static void blkdev_put_part(struct block_device *part, fmode_t mode)
+static void blkdev_put_part(struct block_device *part)
{
struct block_device *whole = bdev_whole(part);
- if (!atomic_dec_and_test(&part->bd_openers))
- return;
- blkdev_flush_mapping(part);
- whole->bd_disk->open_partitions--;
- blkdev_put_whole(whole, mode);
+ if (atomic_dec_and_test(&part->bd_openers)) {
+ blkdev_flush_mapping(part);
+ whole->bd_disk->open_partitions--;
+ }
+ blkdev_put_whole(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
@@ -695,17 +744,17 @@ void blkdev_put_no_open(struct block_device *bdev)
{
put_device(&bdev->bd_device);
}
-
+
/**
* blkdev_get_by_dev - open a block device by device number
* @dev: device number of block device to open
- * @mode: FMODE_* mask
+ * @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
+ * @hops: holder operations
*
- * Open the block device described by device number @dev. If @mode includes
- * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
- * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
- * the same @holder.
+ * Open the block device described by device number @dev. If @holder is not
+ * %NULL, the block device is opened with exclusive access. Exclusive opens may
+ * nest for the same @holder.
*
* Use this interface ONLY if you really do not have anything better - i.e. when
* you are behind a truly sucky interface and all you are given is a device
@@ -717,7 +766,8 @@ void blkdev_put_no_open(struct block_device *bdev)
* RETURNS:
* Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops)
{
bool unblock_events = true;
struct block_device *bdev;
@@ -726,8 +776,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
MAJOR(dev), MINOR(dev),
- ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
- ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
+ ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
return ERR_PTR(ret);
@@ -736,10 +786,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
return ERR_PTR(-ENXIO);
disk = bdev->bd_disk;
- if (mode & FMODE_EXCL) {
- ret = bd_prepare_to_claim(bdev, holder);
+ if (holder) {
+ mode |= BLK_OPEN_EXCL;
+ ret = bd_prepare_to_claim(bdev, holder, hops);
if (ret)
goto put_blkdev;
+ } else {
+ if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) {
+ ret = -EIO;
+ goto put_blkdev;
+ }
}
disk_block_events(disk);
@@ -756,8 +812,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
ret = blkdev_get_whole(bdev, mode);
if (ret)
goto put_module;
- if (mode & FMODE_EXCL) {
- bd_finish_claiming(bdev, holder);
+ if (holder) {
+ bd_finish_claiming(bdev, holder, hops);
/*
* Block event polling for write claims if requested. Any write
@@ -766,7 +822,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
* writeable reference is too fragile given the way @mode is
* used in blkdev_get/put().
*/
- if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+ if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
(disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
bdev->bd_write_holder = true;
unblock_events = false;
@@ -780,7 +836,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
put_module:
module_put(disk->fops->owner);
abort_claiming:
- if (mode & FMODE_EXCL)
+ if (holder)
bd_abort_claiming(bdev, holder);
mutex_unlock(&disk->open_mutex);
disk_unblock_events(disk);
@@ -793,13 +849,13 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
/**
* blkdev_get_by_path - open a block device by name
* @path: path to the block device to open
- * @mode: FMODE_* mask
+ * @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
+ * @hops: holder operations
*
- * Open the block device described by the device file at @path. If @mode
- * includes %FMODE_EXCL, the block device is opened with exclusive access.
- * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
- * nest for the same @holder.
+ * Open the block device described by the device file at @path. If @holder is
+ * not %NULL, the block device is opened with exclusive access. Exclusive opens
+ * may nest for the same @holder.
*
* CONTEXT:
* Might sleep.
@@ -807,8 +863,8 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
* RETURNS:
* Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder)
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hops)
{
struct block_device *bdev;
dev_t dev;
@@ -818,9 +874,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
if (error)
return ERR_PTR(error);
- bdev = blkdev_get_by_dev(dev, mode, holder);
- if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
- blkdev_put(bdev, mode);
+ bdev = blkdev_get_by_dev(dev, mode, holder, hops);
+ if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, holder);
return ERR_PTR(-EACCES);
}
@@ -828,7 +884,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
}
EXPORT_SYMBOL(blkdev_get_by_path);
-void blkdev_put(struct block_device *bdev, fmode_t mode)
+void blkdev_put(struct block_device *bdev, void *holder)
{
struct gendisk *disk = bdev->bd_disk;
@@ -843,36 +899,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
sync_blockdev(bdev);
mutex_lock(&disk->open_mutex);
- if (mode & FMODE_EXCL) {
- struct block_device *whole = bdev_whole(bdev);
- bool bdev_free;
-
- /*
- * Release a claim on the device. The holder fields
- * are protected with bdev_lock. open_mutex is to
- * synchronize disk_holder unlinking.
- */
- spin_lock(&bdev_lock);
-
- WARN_ON_ONCE(--bdev->bd_holders < 0);
- WARN_ON_ONCE(--whole->bd_holders < 0);
-
- if ((bdev_free = !bdev->bd_holders))
- bdev->bd_holder = NULL;
- if (!whole->bd_holders)
- whole->bd_holder = NULL;
-
- spin_unlock(&bdev_lock);
-
- /*
- * If this was the last claim, remove holder link and
- * unblock evpoll if it was a write holder.
- */
- if (bdev_free && bdev->bd_write_holder) {
- disk_unblock_events(disk);
- bdev->bd_write_holder = false;
- }
- }
+ if (holder)
+ bd_end_claim(bdev, holder);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -882,9 +910,9 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
if (bdev_is_partition(bdev))
- blkdev_put_part(bdev, mode);
+ blkdev_put_part(bdev);
else
- blkdev_put_whole(bdev, mode);
+ blkdev_put_whole(bdev);
mutex_unlock(&disk->open_mutex);
module_put(disk->fops->owner);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3164e3177965..09bbbcf9e049 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5403,6 +5403,10 @@ void bfq_put_queue(struct bfq_queue *bfqq)
if (bfqq->bfqd->last_completed_rq_bfqq == bfqq)
bfqq->bfqd->last_completed_rq_bfqq = NULL;
+ WARN_ON_ONCE(!list_empty(&bfqq->fifo));
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&bfqq->sort_list));
+ WARN_ON_ONCE(bfqq->dispatched);
+
kmem_cache_free(bfq_pool, bfqq);
bfqg_and_blkg_put(bfqg);
}
@@ -7135,6 +7139,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
{
struct bfq_data *bfqd = e->elevator_data;
struct bfq_queue *bfqq, *n;
+ unsigned int actuator;
hrtimer_cancel(&bfqd->idle_slice_timer);
@@ -7143,6 +7148,10 @@ static void bfq_exit_queue(struct elevator_queue *e)
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
spin_unlock_irq(&bfqd->lock);
+ for (actuator = 0; actuator < bfqd->num_actuators; actuator++)
+ WARN_ON_ONCE(bfqd->rq_in_driver[actuator]);
+ WARN_ON_ONCE(bfqd->tot_rq_in_driver);
+
hrtimer_cancel(&bfqd->idle_slice_timer);
/* release oom-queue reference to root group */
diff --git a/block/bio.c b/block/bio.c
index 043944fd46eb..8672179213b9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1138,6 +1138,14 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
+void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
+ size_t off)
+{
+ WARN_ON_ONCE(len > UINT_MAX);
+ WARN_ON_ONCE(off > UINT_MAX);
+ __bio_add_page(bio, &folio->page, len, off);
+}
+
/**
* bio_add_folio - Attempt to add part of a folio to a bio.
* @bio: BIO to add to.
@@ -1169,7 +1177,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
+ bio_release_page(bio, bvec->bv_page);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1191,7 +1199,6 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
bio->bi_iter.bi_bvec_done = iter->iov_offset;
bio->bi_iter.bi_size = size;
- bio_set_flag(bio, BIO_NO_PAGE_REF);
bio_set_flag(bio, BIO_CLONED);
}
@@ -1206,7 +1213,7 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
}
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
return 0;
}
@@ -1220,7 +1227,7 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
queue_max_zone_append_sectors(q), &same_page) != len)
return -EINVAL;
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
return 0;
}
@@ -1231,10 +1238,10 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
- * For multi-segment *iter, this function only adds pages from the
- * next non-empty segment of the iov iterator.
+ * Extracts pages from *iter and appends them to @bio's bvec array. The pages
+ * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag.
+ * For a multi-segment *iter, this function only adds pages from the next
+ * non-empty segment of the iov iterator.
*/
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
@@ -1266,9 +1273,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
*/
- size = iov_iter_get_pages(iter, pages,
- UINT_MAX - bio->bi_iter.bi_size,
- nr_pages, &offset, extraction_flags);
+ size = iov_iter_extract_pages(iter, &pages,
+ UINT_MAX - bio->bi_iter.bi_size,
+ nr_pages, extraction_flags, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
@@ -1301,7 +1308,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
iov_iter_revert(iter, left);
out:
while (i < nr_pages)
- put_page(pages[i++]);
+ bio_release_page(bio, pages[i++]);
return ret;
}
@@ -1336,6 +1343,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
return 0;
}
+ if (iov_iter_extract_will_pin(iter))
+ bio_set_flag(bio, BIO_PAGE_PINNED);
do {
ret = __bio_iov_iter_get_pages(bio, iter);
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
@@ -1489,8 +1498,8 @@ void bio_set_pages_dirty(struct bio *bio)
* the BIO and re-dirty the pages in process context.
*
* It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on. It will run one put_page() against each page and will run one
- * bio_put() against the BIO.
+ * here on. It will unpin each page and will run one bio_put() against the
+ * BIO.
*/
static void bio_dirty_fn(struct work_struct *work);
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
index 842e5e1c0f3c..3ec21333f393 100644
--- a/block/blk-cgroup-fc-appid.c
+++ b/block/blk-cgroup-fc-appid.c
@@ -34,7 +34,7 @@ int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
* the vmid from the fabric.
* Adding the overhead of a lock is not necessary.
*/
- strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+ strscpy(blkcg->fc_app_id, app_id, app_id_len);
css_put(css);
out_cgrp_put:
cgroup_put(cgrp);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index dce1548a7a0c..aaf9903ad7b2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -624,8 +624,13 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
struct blkg_iostat_set *bis =
per_cpu_ptr(blkg->iostat_cpu, cpu);
memset(bis, 0, sizeof(*bis));
+
+ /* Re-initialize the cleared blkg_iostat_set */
+ u64_stats_init(&bis->sync);
+ bis->blkg = blkg;
}
memset(&blkg->iostat, 0, sizeof(blkg->iostat));
+ u64_stats_init(&blkg->iostat.sync);
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -762,6 +767,13 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
return -ENODEV;
}
+ mutex_lock(&bdev->bd_queue->rq_qos_mutex);
+ if (!disk_live(bdev->bd_disk)) {
+ blkdev_put_no_open(bdev);
+ mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
+ return -ENODEV;
+ }
+
ctx->body = input;
ctx->bdev = bdev;
return 0;
@@ -906,6 +918,7 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
*/
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
+ __releases(&ctx->bdev->bd_queue->rq_qos_mutex)
{
if (ctx->blkg) {
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
@@ -913,6 +926,7 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
}
if (ctx->bdev) {
+ mutex_unlock(&ctx->bdev