diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 12:47:20 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 12:47:20 -0700 |
| commit | a0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch) | |
| tree | 9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /block | |
| parent | 0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff) | |
| parent | fcaa174a9c995cf0af3967e55644a1543ea07e36 (diff) | |
| download | linux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.tar.gz linux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.tar.bz2 linux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.zip | |
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull request via Keith:
- Various cleanups all around (Irvin, Chaitanya, Christophe)
- Better struct packing (Christophe JAILLET)
- Reduce controller error logs for optional commands (Keith)
- Support for >=64KiB block sizes (Daniel Gomez)
- Fabrics fixes and code organization (Max, Chaitanya, Daniel
Wagner)
- bcache updates via Coly:
- Fix a race at init time (Mingzhe Zou)
- Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye)
- use page pinning in the block layer for dio (David)
- convert old block dio code to page pinning (David, Christoph)
- cleanups for pktcdvd (Andy)
- cleanups for rnbd (Guoqing)
- use the unchecked __bio_add_page() for the initial single page
additions (Johannes)
- fix overflows in the Amiga partition handling code (Michael)
- improve mq-deadline zoned device support (Bart)
- keep passthrough requests out of the IO schedulers (Christoph, Ming)
- improve support for flush requests, making them less special to deal
with (Christoph)
- add bdev holder ops and shutdown methods (Christoph)
- fix the name_to_dev_t() situation and use cases (Christoph)
- decouple the block open flags from fmode_t (Christoph)
- ublk updates and cleanups, including adding user copy support (Ming)
- BFQ sanity checking (Bart)
- convert brd from radix to xarray (Pankaj)
- constify various structures (Thomas, Ivan)
- more fine grained persistent reservation ioctl capability checks
(Jingbo)
- misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan,
Jordy, Li, Min, Yu, Zhong, Waiman)
* tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits)
scsi/sg: don't grab scsi host module reference
ext4: Fix warning in blkdev_put()
block: don't return -EINVAL for not found names in devt_from_devname
cdrom: Fix spectre-v1 gadget
block: Improve kernel-doc headers
blk-mq: don't insert passthrough request into sw queue
bsg: make bsg_class a static const structure
ublk: make ublk_chr_class a static const structure
aoe: make aoe_class a static const structure
block/rnbd: make all 'class' structures const
block: fix the exclusive open mask in disk_scan_partitions
block: add overflow checks for Amiga partition support
block: change all __u32 annotations to __be32 in affs_hardblocks.h
block: fix signed int overflow in Amiga partition support
block: add capacity validation in bdev_add_partition()
block: fine-granular CAP_SYS_ADMIN for Persistent Reservation
block: disallow Persistent Reservation on partitions
reiserfs: fix blkdev_put() warning from release_journal_dev()
block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions()
block: document the holder argument to blkdev_get_by_path
...
Diffstat (limited to 'block')
| -rw-r--r-- | block/Makefile | 2 | ||||
| -rw-r--r-- | block/bdev.c | 252 | ||||
| -rw-r--r-- | block/bfq-iosched.c | 9 | ||||
| -rw-r--r-- | block/bio.c | 37 | ||||
| -rw-r--r-- | block/blk-cgroup-fc-appid.c | 2 | ||||
| -rw-r--r-- | block/blk-cgroup.c | 14 | ||||
| -rw-r--r-- | block/blk-core.c | 1 | ||||
| -rw-r--r-- | block/blk-flush.c | 110 | ||||
| -rw-r--r-- | block/blk-ioc.c | 36 | ||||
| -rw-r--r-- | block/blk-iocost.c | 7 | ||||
| -rw-r--r-- | block/blk-ioprio.c | 23 | ||||
| -rw-r--r-- | block/blk-map.c | 22 | ||||
| -rw-r--r-- | block/blk-mq-debugfs.c | 10 | ||||
| -rw-r--r-- | block/blk-mq-sched.h | 8 | ||||
| -rw-r--r-- | block/blk-mq-tag.c | 15 | ||||
| -rw-r--r-- | block/blk-mq.c | 141 | ||||
| -rw-r--r-- | block/blk-mq.h | 14 | ||||
| -rw-r--r-- | block/blk-rq-qos.c | 20 | ||||
| -rw-r--r-- | block/blk-wbt.c | 2 | ||||
| -rw-r--r-- | block/blk-zoned.c | 20 | ||||
| -rw-r--r-- | block/blk.h | 40 | ||||
| -rw-r--r-- | block/bsg-lib.c | 2 | ||||
| -rw-r--r-- | block/bsg.c | 26 | ||||
| -rw-r--r-- | block/disk-events.c | 19 | ||||
| -rw-r--r-- | block/early-lookup.c | 316 | ||||
| -rw-r--r-- | block/elevator.c | 2 | ||||
| -rw-r--r-- | block/fops.c | 63 | ||||
| -rw-r--r-- | block/genhd.c | 187 | ||||
| -rw-r--r-- | block/ioctl.c | 107 | ||||
| -rw-r--r-- | block/mq-deadline.c | 125 | ||||
| -rw-r--r-- | block/partitions/amiga.c | 102 | ||||
| -rw-r--r-- | block/partitions/core.c | 50 |
32 files changed, 1152 insertions, 632 deletions
diff --git a/block/Makefile b/block/Makefile index b31b05390749..46ada9dc8bbf 100644 --- a/block/Makefile +++ b/block/Makefile @@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \ - disk-events.o blk-ia-ranges.o + disk-events.o blk-ia-ranges.o early-lookup.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o diff --git a/block/bdev.c b/block/bdev.c index 21c63bfef323..979e28a46b98 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -93,7 +93,7 @@ EXPORT_SYMBOL(invalidate_bdev); * Drop all buffers & page cache for given bdev range. This function bails * with error if bdev has other exclusive owner (such as filesystem). */ -int truncate_bdev_range(struct block_device *bdev, fmode_t mode, +int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, loff_t lstart, loff_t lend) { /* @@ -101,14 +101,14 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, * while we discard the buffer cache to avoid discarding buffers * under live filesystem. */ - if (!(mode & FMODE_EXCL)) { - int err = bd_prepare_to_claim(bdev, truncate_bdev_range); + if (!(mode & BLK_OPEN_EXCL)) { + int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL); if (err) goto invalidate; } truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); - if (!(mode & FMODE_EXCL)) + if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(bdev, truncate_bdev_range); return 0; @@ -308,7 +308,7 @@ EXPORT_SYMBOL(thaw_bdev); * pseudo-fs */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); +static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock); static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) @@ -415,6 +415,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) bdev = I_BDEV(inode); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); + mutex_init(&bdev->bd_holder_lock); bdev->bd_partno = partno; bdev->bd_inode = inode; bdev->bd_queue = disk->queue; @@ -463,39 +464,48 @@ long nr_blockdev_pages(void) /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest - * @whole: whole block device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev + * @hops: holder ops * * Test whether @bdev can be claimed by @holder. * - * CONTEXT: - * spin_lock(&bdev_lock). - * * RETURNS: * %true if @bdev can be claimed, %false otherwise. */ -static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, - void *holder) +static bool bd_may_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops) { - if (bdev->bd_holder == holder) - return true; /* already a holder */ - else if (bdev->bd_holder != NULL) - return false; /* held by someone else */ - else if (whole == bdev) - return true; /* is a whole device which isn't held */ - - else if (whole->bd_holder == bd_may_claim) - return true; /* is a partition of a device that is being partitioned */ - else if (whole->bd_holder != NULL) - return false; /* is a partition of a held device */ - else - return true; /* is a partition of an un-held device */ + struct block_device *whole = bdev_whole(bdev); + + lockdep_assert_held(&bdev_lock); + + if (bdev->bd_holder) { + /* + * The same holder can always re-claim. + */ + if (bdev->bd_holder == holder) { + if (WARN_ON_ONCE(bdev->bd_holder_ops != hops)) + return false; + return true; + } + return false; + } + + /* + * If the whole devices holder is set to bd_may_claim, a partition on + * the device is claimed, but not the whole device. + */ + if (whole != bdev && + whole->bd_holder && whole->bd_holder != bd_may_claim) + return false; + return true; } /** * bd_prepare_to_claim - claim a block device * @bdev: block device of interest * @holder: holder trying to claim @bdev + * @hops: holder ops. * * Claim @bdev. This function fails if @bdev is already claimed by another * holder and waits if another claiming is in progress. return, the caller @@ -504,17 +514,18 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ -int bd_prepare_to_claim(struct block_device *bdev, void *holder) +int bd_prepare_to_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); if (WARN_ON_ONCE(!holder)) return -EINVAL; retry: - spin_lock(&bdev_lock); + mutex_lock(&bdev_lock); /* if someone else claimed, fail */ - if (!bd_may_claim(bdev, whole, holder)) { - spin_unlock(&bdev_lock); + if (!bd_may_claim(bdev, holder, hops)) { + mutex_unlock(&bdev_lock); return -EBUSY; } @@ -524,7 +535,7 @@ retry: DEFINE_WAIT(wait); prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&bdev_lock); + mutex_unlock(&bdev_lock); schedule(); finish_wait(wq, &wait); goto retry; @@ -532,7 +543,7 @@ retry: /* yay, all mine */ whole->bd_claiming = holder; - spin_unlock(&bdev_lock); + mutex_unlock(&bdev_lock); return 0; } EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ @@ -550,16 +561,18 @@ static void bd_clear_claiming(struct block_device *whole, void *holder) * bd_finish_claiming - finish claiming of a block device * @bdev: block device of interest * @holder: holder that has claimed @bdev + * @hops: block device holder operations * * Finish exclusive open of a block device. Mark the device as exlusively * open by the holder and wake up all waiters for exclusive open to finish. */ -static void bd_finish_claiming(struct block_device *bdev, void *holder) +static void bd_finish_claiming(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); - spin_lock(&bdev_lock); - BUG_ON(!bd_may_claim(bdev, whole, holder)); + mutex_lock(&bdev_lock); + BUG_ON(!bd_may_claim(bdev, holder, hops)); /* * Note that for a whole device bd_holders will be incremented twice, * and bd_holder will be set to bd_may_claim before being set to holder @@ -567,9 +580,12 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder) whole->bd_holders++; whole->bd_holder = bd_may_claim; bdev->bd_holders++; + mutex_lock(&bdev->bd_holder_lock); bdev->bd_holder = holder; + bdev->bd_holder_ops = hops; + mutex_unlock(&bdev->bd_holder_lock); bd_clear_claiming(whole, holder); - spin_unlock(&bdev_lock); + mutex_unlock(&bdev_lock); } /** @@ -583,12 +599,47 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder) */ void bd_abort_claiming(struct block_device *bdev, void *holder) { - spin_lock(&bdev_lock); + mutex_lock(&bdev_lock); bd_clear_claiming(bdev_whole(bdev), holder); - spin_unlock(&bdev_lock); + mutex_unlock(&bdev_lock); } EXPORT_SYMBOL(bd_abort_claiming); +static void bd_end_claim(struct block_device *bdev, void *holder) +{ + struct block_device *whole = bdev_whole(bdev); + bool unblock = false; + + /* + * Release a claim on the device. The holder fields are protected with + * bdev_lock. open_mutex is used to synchronize disk_holder unlinking. + */ + mutex_lock(&bdev_lock); + WARN_ON_ONCE(bdev->bd_holder != holder); + WARN_ON_ONCE(--bdev->bd_holders < 0); + WARN_ON_ONCE(--whole->bd_holders < 0); + if (!bdev->bd_holders) { + mutex_lock(&bdev->bd_holder_lock); + bdev->bd_holder = NULL; + bdev->bd_holder_ops = NULL; + mutex_unlock(&bdev->bd_holder_lock); + if (bdev->bd_write_holder) + unblock = true; + } + if (!whole->bd_holders) + whole->bd_holder = NULL; + mutex_unlock(&bdev_lock); + + /* + * If this was the last claim, remove holder link and unblock evpoll if + * it was a write holder. + */ + if (unblock) { + disk_unblock_events(bdev->bd_disk); + bdev->bd_write_holder = false; + } +} + static void blkdev_flush_mapping(struct block_device *bdev) { WARN_ON_ONCE(bdev->bd_holders); @@ -597,13 +648,13 @@ static void blkdev_flush_mapping(struct block_device *bdev) bdev_write_inode(bdev); } -static int blkdev_get_whole(struct block_device *bdev, fmode_t mode) +static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode) { struct gendisk *disk = bdev->bd_disk; int ret; if (disk->fops->open) { - ret = disk->fops->open(bdev, mode); + ret = disk->fops->open(disk, mode); if (ret) { /* avoid ghost partitions on a removed medium */ if (ret == -ENOMEDIUM && @@ -621,22 +672,19 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode) return 0; } -static void blkdev_put_whole(struct block_device *bdev, fmode_t mode) +static void blkdev_put_whole(struct block_device *bdev) { if (atomic_dec_and_test(&bdev->bd_openers)) blkdev_flush_mapping(bdev); if (bdev->bd_disk->fops->release) - bdev->bd_disk->fops->release(bdev->bd_disk, mode); + bdev->bd_disk->fops->release(bdev->bd_disk); } -static int blkdev_get_part(struct block_device *part, fmode_t mode) +static int blkdev_get_part(struct block_device *part, blk_mode_t mode) { struct gendisk *disk = part->bd_disk; int ret; - if (atomic_read(&part->bd_openers)) - goto done; - ret = blkdev_get_whole(bdev_whole(part), mode); if (ret) return ret; @@ -645,26 +693,27 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode) if (!bdev_nr_sectors(part)) goto out_blkdev_put; - disk->open_partitions++; - set_init_blocksize(part); -done: + if (!atomic_read(&part->bd_openers)) { + disk->open_partitions++; + set_init_blocksize(part); + } atomic_inc(&part->bd_openers); return 0; out_blkdev_put: - blkdev_put_whole(bdev_whole(part), mode); + blkdev_put_whole(bdev_whole(part)); return ret; } -static void blkdev_put_part(struct block_device *part, fmode_t mode) +static void blkdev_put_part(struct block_device *part) { struct block_device *whole = bdev_whole(part); - if (!atomic_dec_and_test(&part->bd_openers)) - return; - blkdev_flush_mapping(part); - whole->bd_disk->open_partitions--; - blkdev_put_whole(whole, mode); + if (atomic_dec_and_test(&part->bd_openers)) { + blkdev_flush_mapping(part); + whole->bd_disk->open_partitions--; + } + blkdev_put_whole(whole); } struct block_device *blkdev_get_no_open(dev_t dev) @@ -695,17 +744,17 @@ void blkdev_put_no_open(struct block_device *bdev) { put_device(&bdev->bd_device); } - + /** * blkdev_get_by_dev - open a block device by device number * @dev: device number of block device to open - * @mode: FMODE_* mask + * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier + * @hops: holder operations * - * Open the block device described by device number @dev. If @mode includes - * %FMODE_EXCL, the block device is opened with exclusive access. Specifying - * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for - * the same @holder. + * Open the block device described by device number @dev. If @holder is not + * %NULL, the block device is opened with exclusive access. Exclusive opens may + * nest for the same @holder. * * Use this interface ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a device @@ -717,7 +766,8 @@ void blkdev_put_no_open(struct block_device *bdev) * RETURNS: * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ -struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) +struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops) { bool unblock_events = true; struct block_device *bdev; @@ -726,8 +776,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, MAJOR(dev), MINOR(dev), - ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) | - ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0)); + ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | + ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) return ERR_PTR(ret); @@ -736,10 +786,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) return ERR_PTR(-ENXIO); disk = bdev->bd_disk; - if (mode & FMODE_EXCL) { - ret = bd_prepare_to_claim(bdev, holder); + if (holder) { + mode |= BLK_OPEN_EXCL; + ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) goto put_blkdev; + } else { + if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) { + ret = -EIO; + goto put_blkdev; + } } disk_block_events(disk); @@ -756,8 +812,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; - if (mode & FMODE_EXCL) { - bd_finish_claiming(bdev, holder); + if (holder) { + bd_finish_claiming(bdev, holder, hops); /* * Block event polling for write claims if requested. Any write @@ -766,7 +822,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) * writeable reference is too fragile given the way @mode is * used in blkdev_get/put(). */ - if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && + if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder && (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) { bdev->bd_write_holder = true; unblock_events = false; @@ -780,7 +836,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) put_module: module_put(disk->fops->owner); abort_claiming: - if (mode & FMODE_EXCL) + if (holder) bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); @@ -793,13 +849,13 @@ EXPORT_SYMBOL(blkdev_get_by_dev); /** * blkdev_get_by_path - open a block device by name * @path: path to the block device to open - * @mode: FMODE_* mask + * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier + * @hops: holder operations * - * Open the block device described by the device file at @path. If @mode - * includes %FMODE_EXCL, the block device is opened with exclusive access. - * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may - * nest for the same @holder. + * Open the block device described by the device file at @path. If @holder is + * not %NULL, the block device is opened with exclusive access. Exclusive opens + * may nest for the same @holder. * * CONTEXT: * Might sleep. @@ -807,8 +863,8 @@ EXPORT_SYMBOL(blkdev_get_by_dev); * RETURNS: * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ -struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder) +struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, + void *holder, const struct blk_holder_ops *hops) { struct block_device *bdev; dev_t dev; @@ -818,9 +874,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, if (error) return ERR_PTR(error); - bdev = blkdev_get_by_dev(dev, mode, holder); - if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, mode); + bdev = blkdev_get_by_dev(dev, mode, holder, hops); + if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, holder); return ERR_PTR(-EACCES); } @@ -828,7 +884,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); -void blkdev_put(struct block_device *bdev, fmode_t mode) +void blkdev_put(struct block_device *bdev, void *holder) { struct gendisk *disk = bdev->bd_disk; @@ -843,36 +899,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - if (mode & FMODE_EXCL) { - struct block_device *whole = bdev_whole(bdev); - bool bdev_free; - - /* - * Release a claim on the device. The holder fields - * are protected with bdev_lock. open_mutex is to - * synchronize disk_holder unlinking. - */ - spin_lock(&bdev_lock); - - WARN_ON_ONCE(--bdev->bd_holders < 0); - WARN_ON_ONCE(--whole->bd_holders < 0); - - if ((bdev_free = !bdev->bd_holders)) - bdev->bd_holder = NULL; - if (!whole->bd_holders) - whole->bd_holder = NULL; - - spin_unlock(&bdev_lock); - - /* - * If this was the last claim, remove holder link and - * unblock evpoll if it was a write holder. - */ - if (bdev_free && bdev->bd_write_holder) { - disk_unblock_events(disk); - bdev->bd_write_holder = false; - } - } + if (holder) + bd_end_claim(bdev, holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -882,9 +910,9 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); if (bdev_is_partition(bdev)) - blkdev_put_part(bdev, mode); + blkdev_put_part(bdev); else - blkdev_put_whole(bdev, mode); + blkdev_put_whole(bdev); mutex_unlock(&disk->open_mutex); module_put(disk->fops->owner); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3164e3177965..09bbbcf9e049 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5403,6 +5403,10 @@ void bfq_put_queue(struct bfq_queue *bfqq) if (bfqq->bfqd->last_completed_rq_bfqq == bfqq) bfqq->bfqd->last_completed_rq_bfqq = NULL; + WARN_ON_ONCE(!list_empty(&bfqq->fifo)); + WARN_ON_ONCE(!RB_EMPTY_ROOT(&bfqq->sort_list)); + WARN_ON_ONCE(bfqq->dispatched); + kmem_cache_free(bfq_pool, bfqq); bfqg_and_blkg_put(bfqg); } @@ -7135,6 +7139,7 @@ static void bfq_exit_queue(struct elevator_queue *e) { struct bfq_data *bfqd = e->elevator_data; struct bfq_queue *bfqq, *n; + unsigned int actuator; hrtimer_cancel(&bfqd->idle_slice_timer); @@ -7143,6 +7148,10 @@ static void bfq_exit_queue(struct elevator_queue *e) bfq_deactivate_bfqq(bfqd, bfqq, false, false); spin_unlock_irq(&bfqd->lock); + for (actuator = 0; actuator < bfqd->num_actuators; actuator++) + WARN_ON_ONCE(bfqd->rq_in_driver[actuator]); + WARN_ON_ONCE(bfqd->tot_rq_in_driver); + hrtimer_cancel(&bfqd->idle_slice_timer); /* release oom-queue reference to root group */ diff --git a/block/bio.c b/block/bio.c index 043944fd46eb..8672179213b9 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1138,6 +1138,14 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); +void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, + size_t off) +{ + WARN_ON_ONCE(len > UINT_MAX); + WARN_ON_ONCE(off > UINT_MAX); + __bio_add_page(bio, &folio->page, len, off); +} + /** * bio_add_folio - Attempt to add part of a folio to a bio. * @bio: BIO to add to. @@ -1169,7 +1177,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) bio_for_each_segment_all(bvec, bio, iter_all) { if (mark_dirty && !PageCompound(bvec->bv_page)) set_page_dirty_lock(bvec->bv_page); - put_page(bvec->bv_page); + bio_release_page(bio, bvec->bv_page); } } EXPORT_SYMBOL_GPL(__bio_release_pages); @@ -1191,7 +1199,6 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) bio->bi_io_vec = (struct bio_vec *)iter->bvec; bio->bi_iter.bi_bvec_done = iter->iov_offset; bio->bi_iter.bi_size = size; - bio_set_flag(bio, BIO_NO_PAGE_REF); bio_set_flag(bio, BIO_CLONED); } @@ -1206,7 +1213,7 @@ static int bio_iov_add_page(struct bio *bio, struct page *page, } if (same_page) - put_page(page); + bio_release_page(bio, page); return 0; } @@ -1220,7 +1227,7 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, queue_max_zone_append_sectors(q), &same_page) != len) return -EINVAL; if (same_page) - put_page(page); + bio_release_page(bio, page); return 0; } @@ -1231,10 +1238,10 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, * @bio: bio to add pages to * @iter: iov iterator describing the region to be mapped * - * Pins pages from *iter and appends them to @bio's bvec array. The - * pages will have to be released using put_page() when done. - * For multi-segment *iter, this function only adds pages from the - * next non-empty segment of the iov iterator. + * Extracts pages from *iter and appends them to @bio's bvec array. The pages + * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag. + * For a multi-segment *iter, this function only adds pages from the next + * non-empty segment of the iov iterator. */ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { @@ -1266,9 +1273,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * result to ensure the bio's total size is correct. The remainder of * the iov data will be picked up in the next bio iteration. */ - size = iov_iter_get_pages(iter, pages, - UINT_MAX - bio->bi_iter.bi_size, - nr_pages, &offset, extraction_flags); + size = iov_iter_extract_pages(iter, &pages, + UINT_MAX - bio->bi_iter.bi_size, + nr_pages, extraction_flags, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; @@ -1301,7 +1308,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) iov_iter_revert(iter, left); out: while (i < nr_pages) - put_page(pages[i++]); + bio_release_page(bio, pages[i++]); return ret; } @@ -1336,6 +1343,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) return 0; } + if (iov_iter_extract_will_pin(iter)) + bio_set_flag(bio, BIO_PAGE_PINNED); do { ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); @@ -1489,8 +1498,8 @@ void bio_set_pages_dirty(struct bio *bio) * the BIO and re-dirty the pages in process context. * * It is expected that bio_check_pages_dirty() will wholly own the BIO from - * here on. It will run one put_page() against each page and will run one - * bio_put() against the BIO. + * here on. It will unpin each page and will run one bio_put() against the + * BIO. */ static void bio_dirty_fn(struct work_struct *work); diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c index 842e5e1c0f3c..3ec21333f393 100644 --- a/block/blk-cgroup-fc-appid.c +++ b/block/blk-cgroup-fc-appid.c @@ -34,7 +34,7 @@ int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len) * the vmid from the fabric. * Adding the overhead of a lock is not necessary. */ - strlcpy(blkcg->fc_app_id, app_id, app_id_len); + strscpy(blkcg->fc_app_id, app_id, app_id_len); css_put(css); out_cgrp_put: cgroup_put(cgrp); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index dce1548a7a0c..aaf9903ad7b2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -624,8 +624,13 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct blkg_iostat_set *bis = per_cpu_ptr(blkg->iostat_cpu, cpu); memset(bis, 0, sizeof(*bis)); + + /* Re-initialize the cleared blkg_iostat_set */ + u64_stats_init(&bis->sync); + bis->blkg = blkg; } memset(&blkg->iostat, 0, sizeof(blkg->iostat)); + u64_stats_init(&blkg->iostat.sync); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -762,6 +767,13 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) return -ENODEV; } + mutex_lock(&bdev->bd_queue->rq_qos_mutex); + if (!disk_live(bdev->bd_disk)) { + blkdev_put_no_open(bdev); + mutex_unlock(&bdev->bd_queue->rq_qos_mutex); + return -ENODEV; + } + ctx->body = input; ctx->bdev = bdev; return 0; @@ -906,6 +918,7 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); */ void blkg_conf_exit(struct blkg_conf_ctx *ctx) __releases(&ctx->bdev->bd_queue->queue_lock) + __releases(&ctx->bdev->bd_queue->rq_qos_mutex) { if (ctx->blkg) { spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock); @@ -913,6 +926,7 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx) } if (ctx->bdev) { + mutex_unlock(&ctx->bdev |
