From 926597ffce0e3e2f785475df18e1636194209910 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:11 +0100 Subject: block: move disk_{block,unblock,flush}_events to blk.h No need to have these declarations in a public header. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6906a45bc761..504f9a6674ac 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -185,9 +185,6 @@ static inline int bdev_read_only(struct block_device *bdev) return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); } -extern void disk_block_events(struct gendisk *disk); -extern void disk_unblock_events(struct gendisk *disk); -extern void disk_flush_events(struct gendisk *disk, unsigned int mask); bool set_capacity_and_notify(struct gendisk *disk, sector_t size); bool disk_force_media_change(struct gendisk *disk, unsigned int events); -- cgit v1.2.3 From e7243285c0fc87054990fcde630583586ff8ed5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:12 +0100 Subject: block: move blk_drop_partitions to blk.h No need to have this declaration in a public header. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 504f9a6674ac..aa4bd985dbe5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -219,7 +219,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) } int bdev_disk_changed(struct gendisk *disk, bool invalidate); -void blk_drop_partitions(struct gendisk *disk); struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); -- cgit v1.2.3 From 322cbb50de711814c42fb088f6d31901502c711a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:13 +0100 Subject: block: remove genhd.h There is no good reason to keep genhd.h separate from the main blkdev.h header that includes it. So fold the contents of genhd.h into blkdev.h and remove genhd.h entirely. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 273 ++++++++++++++++++++++++++++++++++++++++++- include/linux/genhd.h | 287 ---------------------------------------------- include/linux/part_stat.h | 2 +- 3 files changed, 271 insertions(+), 291 deletions(-) delete mode 100644 include/linux/genhd.h (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..99a4384bb8a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions Copyright (C) 1992 Drew Eckhardt + */ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H -#include -#include +#include +#include +#include #include #include #include @@ -12,11 +16,15 @@ #include #include #include +#include #include #include #include +#include #include #include +#include +#include struct module; struct request_queue; @@ -33,6 +41,10 @@ struct blk_queue_stats; struct blk_stat_callback; struct blk_crypto_profile; +extern const struct device_type disk_type; +extern struct device_type part_type; +extern struct class block_class; + /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -45,6 +57,144 @@ struct blk_crypto_profile; */ #define BLKCG_MAX_POLS 6 +#define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 + +#define PARTITION_META_INFO_VOLNAMELTH 64 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) + +struct partition_meta_info { + char uuid[PARTITION_META_INFO_UUIDLTH]; + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. + * + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. + * + */ +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; + +enum { + DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ + DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ +}; + +enum { + /* Poll even if events_poll_msecs is unset */ + DISK_EVENT_FLAG_POLL = 1 << 0, + /* Forward events to udev */ + DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, +}; + +struct disk_events; +struct badblocks; + +struct blk_integrity { + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; +}; + +struct gendisk { + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. + */ + int major; + int first_minor; + int minors; + + char disk_name[DISK_NAME_LEN]; /* name of major driver */ + + unsigned short events; /* supported events */ + unsigned short event_flags; /* flags related to event processing */ + + struct xarray part_tbl; + struct block_device *part0; + + const struct block_device_operations *fops; + struct request_queue *queue; + void *private_data; + + int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 +#define GD_READ_ONLY 1 +#define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 + + struct mutex open_mutex; /* open/close mutex */ + unsigned open_partitions; /* number of open partitions */ + + struct backing_dev_info *bdi; + struct kobject *slave_dir; +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED + struct list_head slave_bdevs; +#endif + struct timer_rand_state *random; + atomic_t sync_io; /* RAID */ + struct disk_events *ev; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct kobject integrity_kobj; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif + int node_id; + struct badblocks *bb; + struct lockdep_map lockdep_map; + u64 diskseq; +}; + +static inline bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} + +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return MKDEV(disk->major, disk->first_minor); +} + static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) @@ -596,6 +746,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int __must_check add_disk(struct gendisk *disk) +{ + return device_add_disk(NULL, disk, NULL); +} +void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); +void set_disk_ro(struct gendisk *disk, bool read_only); +void disk_uevent(struct gendisk *disk, enum kobject_action action); + +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0->bd_read_only || + test_bit(GD_READ_ONLY, &disk->state); +} + +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); +bool disk_force_media_change(struct gendisk *disk, unsigned int events); + +void add_disk_randomness(struct gendisk *disk) __latent_entropy; +void rand_initialize_disk(struct gendisk *disk); + +static inline sector_t get_start_sect(struct block_device *bdev) +{ + return bdev->bd_start_sect; +} + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; +} + +static inline sector_t get_capacity(struct gendisk *disk) +{ + return bdev_nr_sectors(disk->part0); +} + +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + +int bdev_disk_changed(struct gendisk *disk, bool invalidate); + +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); +void put_disk(struct gendisk *disk); +struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); + +/** + * blk_alloc_disk - allocate a gendisk structure + * @node_id: numa node to allocate on + * + * Allocate and pre-initialize a gendisk structure for use with BIO based + * drivers. + * + * Context: can sleep + */ +#define blk_alloc_disk(node_id) \ +({ \ + static struct lock_class_key __key; \ + \ + __blk_alloc_disk(node_id, &__key); \ +}) +void blk_cleanup_disk(struct gendisk *disk); + +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) +void unregister_blkdev(unsigned int major, const char *name); + +bool bdev_check_media_change(struct block_device *bdev); +int __invalidate_device(struct block_device *bdev, bool kill_dirty); +void set_capacity(struct gendisk *disk, sector_t size); + +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +int bd_register_pending_holders(struct gendisk *disk); +#else +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +static inline int bd_register_pending_holders(struct gendisk *disk) +{ + return 0; +} +#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ + +dev_t part_devt(struct gendisk *disk, u8 partno); +void inc_diskseq(struct gendisk *disk); +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); @@ -1311,6 +1573,7 @@ void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1326,7 +1589,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -#endif +static inline void printk_all_partitions(void) +{ +} +#endif /* CONFIG_BLOCK */ + int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h deleted file mode 100644 index aa4bd985dbe5..000000000000 --- a/include/linux/genhd.h +++ /dev/null @@ -1,287 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_GENHD_H -#define _LINUX_GENHD_H - -/* - * genhd.h Copyright (C) 1992 Drew Eckhardt - * Generic hard disk header file by - * Drew Eckhardt - * - * - */ - -#include -#include -#include -#include -#include -#include - -extern const struct device_type disk_type; -extern struct device_type part_type; -extern struct class block_class; - -#define DISK_MAX_PARTS 256 -#define DISK_NAME_LEN 32 - -#define PARTITION_META_INFO_VOLNAMELTH 64 -/* - * Enough for the string representation of any kind of UUID plus NULL. - * EFI UUID is 36 characters. MSDOS UUID is 11 characters. - */ -#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) - -struct partition_meta_info { - char uuid[PARTITION_META_INFO_UUIDLTH]; - u8 volname[PARTITION_META_INFO_VOLNAMELTH]; -}; - -/** - * DOC: genhd capability flags - * - * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to - * removable media. When set, the device remains present even when media is not - * inserted. Shall not be set for devices which are removed entirely when the - * media is removed. - * - * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, - * doesn't appear in sysfs, and can't be opened from userspace or using - * blkdev_get*. Used for the underlying components of multipath devices. - * - * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not - * scan for partitions from add_disk, and users can't add partitions manually. - * - */ -enum { - GENHD_FL_REMOVABLE = 1 << 0, - GENHD_FL_HIDDEN = 1 << 1, - GENHD_FL_NO_PART = 1 << 2, -}; - -enum { - DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ - DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ -}; - -enum { - /* Poll even if events_poll_msecs is unset */ - DISK_EVENT_FLAG_POLL = 1 << 0, - /* Forward events to udev */ - DISK_EVENT_FLAG_UEVENT = 1 << 1, - /* Block event polling when open for exclusive write */ - DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, -}; - -struct disk_events; -struct badblocks; - -struct blk_integrity { - const struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; -}; - -struct gendisk { - /* - * major/first_minor/minors should not be set by any new driver, the - * block core will take care of allocating them automatically. - */ - int major; - int first_minor; - int minors; - - char disk_name[DISK_NAME_LEN]; /* name of major driver */ - - unsigned short events; /* supported events */ - unsigned short event_flags; /* flags related to event processing */ - - struct xarray part_tbl; - struct block_device *part0; - - const struct block_device_operations *fops; - struct request_queue *queue; - void *private_data; - - int flags; - unsigned long state; -#define GD_NEED_PART_SCAN 0 -#define GD_READ_ONLY 1 -#define GD_DEAD 2 -#define GD_NATIVE_CAPACITY 3 - - struct mutex open_mutex; /* open/close mutex */ - unsigned open_partitions; /* number of open partitions */ - - struct backing_dev_info *bdi; - struct kobject *slave_dir; -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED - struct list_head slave_bdevs; -#endif - struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ - struct disk_events *ev; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct kobject integrity_kobj; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ -#if IS_ENABLED(CONFIG_CDROM) - struct cdrom_device_info *cdi; -#endif - int node_id; - struct badblocks *bb; - struct lockdep_map lockdep_map; - u64 diskseq; -}; - -static inline bool disk_live(struct gendisk *disk) -{ - return !inode_unhashed(disk->part0->bd_inode); -} - -/* - * The gendisk is refcounted by the part0 block_device, and the bd_device - * therein is also used for device model presentation in sysfs. - */ -#define dev_to_disk(device) \ - (dev_to_bdev(device)->bd_disk) -#define disk_to_dev(disk) \ - (&((disk)->part0->bd_device)) - -#if IS_REACHABLE(CONFIG_CDROM) -#define disk_to_cdi(disk) ((disk)->cdi) -#else -#define disk_to_cdi(disk) NULL -#endif - -static inline dev_t disk_devt(struct gendisk *disk) -{ - return MKDEV(disk->major, disk->first_minor); -} - -void disk_uevent(struct gendisk *disk, enum kobject_action action); - -/* block/genhd.c */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups); -static inline int __must_check add_disk(struct gendisk *disk) -{ - return device_add_disk(NULL, disk, NULL); -} -extern void del_gendisk(struct gendisk *gp); - -void invalidate_disk(struct gendisk *disk); - -void set_disk_ro(struct gendisk *disk, bool read_only); - -static inline int get_disk_ro(struct gendisk *disk) -{ - return disk->part0->bd_read_only || - test_bit(GD_READ_ONLY, &disk->state); -} - -static inline int bdev_read_only(struct block_device *bdev) -{ - return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); -} - -bool set_capacity_and_notify(struct gendisk *disk, sector_t size); -bool disk_force_media_change(struct gendisk *disk, unsigned int events); - -/* drivers/char/random.c */ -extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -extern void rand_initialize_disk(struct gendisk *disk); - -static inline sector_t get_start_sect(struct block_device *bdev) -{ - return bdev->bd_start_sect; -} - -static inline sector_t bdev_nr_sectors(struct block_device *bdev) -{ - return bdev->bd_nr_sectors; -} - -static inline loff_t bdev_nr_bytes(struct block_device *bdev) -{ - return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; -} - -static inline sector_t get_capacity(struct gendisk *disk) -{ - return bdev_nr_sectors(disk->part0); -} - -static inline u64 sb_bdev_nr_blocks(struct super_block *sb) -{ - return bdev_nr_sectors(sb->s_bdev) >> - (sb->s_blocksize_bits - SECTOR_SHIFT); -} - -int bdev_disk_changed(struct gendisk *disk, bool invalidate); - -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); -extern void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); - -/** - * blk_alloc_disk - allocate a gendisk structure - * @node_id: numa node to allocate on - * - * Allocate and pre-initialize a gendisk structure for use with BIO based - * drivers. - * - * Context: can sleep - */ -#define blk_alloc_disk(node_id) \ -({ \ - static struct lock_class_key __key; \ - \ - __blk_alloc_disk(node_id, &__key); \ -}) -void blk_cleanup_disk(struct gendisk *disk); - -int __register_blkdev(unsigned int major, const char *name, - void (*probe)(dev_t devt)); -#define register_blkdev(major, name) \ - __register_blkdev(major, name, NULL) -void unregister_blkdev(unsigned int major, const char *name); - -bool bdev_check_media_change(struct block_device *bdev); -int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void set_capacity(struct gendisk *disk, sector_t size); - -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED -int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); -int bd_register_pending_holders(struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -static inline int bd_register_pending_holders(struct gendisk *disk) -{ - return 0; -} -#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ - -dev_t part_devt(struct gendisk *disk, u8 partno); -void inc_diskseq(struct gendisk *disk); -dev_t blk_lookup_devt(const char *name, int partno); -void blk_request_module(dev_t devt); -#ifdef CONFIG_BLOCK -void printk_all_partitions(void); -#else /* CONFIG_BLOCK */ -static inline void printk_all_partitions(void) -{ -} -#endif /* CONFIG_BLOCK */ - -#endif /* _LINUX_GENHD_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 6f7949b2fd8d..abeba356bc3f 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -2,7 +2,7 @@ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H -#include +#include #include struct disk_stats { -- cgit v1.2.3 From 0a3140ea0fae377c9eaa031b7db1670ae422ed47 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 24 Jan 2022 10:11:02 +0100 Subject: block: pass a block_device and opf to blk_next_bio All callers need to set the block_device and operation, so lift that into the common code. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-15-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 117d7f248ac9..edeae54074ed 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -790,6 +790,7 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) bio->bi_opf |= REQ_NOWAIT; } -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); +struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, + unsigned int nr_pages, unsigned int opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ -- cgit v1.2.3 From 609be1066731fea86436f5f91022f82e592ab456 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:03 +0100 Subject: block: pass a block_device and opf to bio_alloc_bioset Pass the block_device and operation that we plan to use this bio for to bio_alloc_bioset to optimize the assigment. NULL/0 can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Also move the gfp_mask argument after the nr_vecs argument for a much more logical calling convention matching what most of the kernel does. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index edeae54074ed..2f63ae9a71e1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,8 +405,9 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); -struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs, - struct bio_set *bs); +struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, + unsigned int opf, gfp_t gfp_mask, + struct bio_set *bs); struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); @@ -419,7 +420,7 @@ extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); + return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); -- cgit v1.2.3 From b77c88c2100ce6a5ec8126c13599b5a7f6663e32 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:04 +0100 Subject: block: pass a block_device and opf to bio_alloc_kiocb Pass the block_device and operation that we plan to use this bio for to bio_alloc_kiocb to optimize the assigment. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2f63ae9a71e1..5c5ada2ebb27 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,8 +408,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask, struct bio_set *bs); -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, - struct bio_set *bs); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -- cgit v1.2.3 From 07888c665b405b1cd3577ddebfeb74f4717a84c4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:05 +0100 Subject: block: pass a block_device and opf to bio_alloc Pass the block_device and operation that we plan to use this bio for to bio_alloc to optimize the assignment. NULL/0 can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Also move the gfp_mask argument after the nr_vecs argument for a much more logical calling convention matching what most of the kernel does. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-18-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 5c5ada2ebb27..be6ac92913d4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -418,9 +418,10 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; -static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) +static inline struct bio *bio_alloc(struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) { - return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set); + return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); -- cgit v1.2.3 From 49add4966d79244013fce35f95c6833fae82b8b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:06 +0100 Subject: block: pass a block_device and opf to bio_init Pass the block_device that we plan to use this bio for and the operation to bio_init to optimize the assignment. A NULL block_device can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-19-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index be6ac92913d4..41bedf727f59 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -456,8 +456,8 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; extern int submit_bio_wait(struct bio *bio); -extern void bio_init(struct bio *bio, struct bio_vec *table, - unsigned short max_vecs); +void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, + unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -- cgit v1.2.3 From a7c50c940477bae89fb2b4f51bd969a2d95d7512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:07 +0100 Subject: block: pass a block_device and opf to bio_reset Pass the block_device that we plan to use this bio for and the operation to bio_reset to optimize the assigment. A NULL block_device can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-20-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 41bedf727f59..18cfe5bb41ea 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -459,7 +459,7 @@ extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); -extern void bio_reset(struct bio *); +void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -517,13 +517,6 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) bio_associate_blkg(bio); } -static inline void bio_copy_dev(struct bio *dst, struct bio *src) -{ - bio_clear_flag(dst, BIO_REMAPPED); - dst->bi_bdev = src->bi_bdev; - bio_clone_blkg_association(dst, src); -} - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * -- cgit v1.2.3 From b1f866b013e6e5583f2f0bf4a61d13eddb9a1799 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:48 +0100 Subject: block: remove blk_needs_flush_plug blk_needs_flush_plug fails to account for the cb_list, which needs flushing as well. Remove it and just check if there is a plug instead of poking into the internals of the plug structure. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127070549.1377856-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99a4384bb8a5..f902a1c2fac0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1055,14 +1055,6 @@ extern void blk_finish_plug(struct blk_plug *); void blk_flush_plug(struct blk_plug *plug, bool from_schedule); -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - return plug && - (plug->mq_list || !list_empty(&plug->cb_list)); -} - int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -1086,11 +1078,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; -- cgit v1.2.3 From aa8dcccaf32bfdc09f2aff089d5d60c37da5b7b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:49 +0100 Subject: block: check that there is a plug in blk_flush_plug Rename blk_flush_plug to __blk_flush_plug and add a wrapper that includes the NULL check instead of open coding that check everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220127070549.1377856-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f902a1c2fac0..654163d3b903 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1053,7 +1053,12 @@ extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -void blk_flush_plug(struct blk_plug *plug, bool from_schedule); +void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); +static inline void blk_flush_plug(struct blk_plug *plug, bool async) +{ + if (plug) + __blk_flush_plug(plug, async); +} int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); -- cgit v1.2.3 From b42c1fc3d55e077d36718ad9800d89100b2aff81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 07:41:25 +0100 Subject: block: fix the kerneldoc for bio_end_io_acct Document the actually existing parameter name. Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127064125.1314347-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 654163d3b903..3bfc75a2a450 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1520,7 +1520,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, /** * bio_end_io_acct - end I/O accounting for bio based drivers * @bio: bio to end account for - * @start: start time returned by bio_start_io_acct() + * @start_time: start time returned by bio_start_io_acct() */ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { -- cgit v1.2.3 From 2651bf680bc2ad9a078b7222b0873145ab4ece07 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Feb 2022 11:28:25 -0800 Subject: block: introduce BLK_STS_OFFLINE Currently, drivers reports BLK_STS_IOERR for devices that are not full online or being removed. This behavior could cause confusion for users, as they are not really I/O errors from the device. Solve this issue with a new state BLK_STS_OFFLINE, which reports "device offline error" in dmesg instead of "I/O error". EIO is intentionally kept to not change user visible return value. Signed-off-by: Song Liu Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220203192827.1370270-2-song@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff..5561e58d158a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -153,6 +153,13 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) +/* + * BLK_STS_OFFLINE is returned from the driver when the target device is offline + * or is being taken offline. This could help differentiate the case where a + * device is intentionally being shut down from a real I/O error. + */ +#define BLK_STS_OFFLINE ((__force blk_status_t)17) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with -- cgit v1.2.3 From 56b4b5abcdab6daf71c5536fca2772f178590e06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:06 +0100 Subject: block: clone crypto and integrity data in __bio_clone_fast __bio_clone_fast should also clone integrity and crypto data, as a clone without those is incomplete. Right now the only caller that can actually support crypto and integrity data (dm) does it manually for the one callchain that supports these, but we better do it properly in the core. Note that all callers except for the above mentioned one also don't need to handle failure at all, given that the integrity and crypto clones are based on mempool allocations that won't fail for sleeping allocations. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 18cfe5bb41ea..b814361c957b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -413,7 +413,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -extern void __bio_clone_fast(struct bio *, struct bio *); +int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; -- cgit v1.2.3 From abfc426d1b2fb2176df59851a64223b58ddae7e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:09 +0100 Subject: block: pass a block_device to bio_clone_fast Pass a block_device to bio_clone_fast and __bio_clone_fast and give the functions more suitable names. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b814361c957b..7523aba4ddf7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -413,8 +413,10 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp); -extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); +struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, + gfp_t gfp, struct bio_set *bs); +int bio_init_clone(struct block_device *bdev, struct bio *bio, + struct bio *bio_src, gfp_t gfp); extern struct bio_set fs_bio_set; -- cgit v1.2.3 From 3301bc53358a0eb0a0db65fd7a513cd4cb50c83a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 10 Jan 2022 15:29:45 +0800 Subject: lib/sbitmap: kill 'depth' from sbitmap_word Only the last sbitmap_word can have different depth, and all the others must have same depth of 1U << sb->shift, so not necessary to store it in sbitmap_word, and it can be retrieved easily and efficiently by adding one internal helper of __map_depth(sb, index). Remove 'depth' field from sbitmap_word, then the annotation of ____cacheline_aligned_in_smp for 'word' isn't needed any more. Not see performance effect when running high parallel IOPS test on null_blk. This way saves us one cacheline(usually 64 words) per each sbitmap_word. Cc: Martin Wilck Signed-off-by: Ming Lei Reviewed-by: Martin Wilck Reviewed-by: John Garry Link: https://lore.kernel.org/r/20220110072945.347535-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 95df357ec009..df3b584b0f0c 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -27,15 +27,10 @@ struct seq_file; * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { - /** - * @depth: Number of bits being used in @word/@cleared - */ - unsigned long depth; - /** * @word: word holding free bits */ - unsigned long word ____cacheline_aligned_in_smp; + unsigned long word; /** * @cleared: word holding cleared bits @@ -164,6 +159,14 @@ struct sbitmap_queue { int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint); +/* sbitmap internal helper */ +static inline unsigned int __map_depth(const struct sbitmap *sb, int index) +{ + if (index == sb->map_nr - 1) + return sb->depth - (index << sb->shift); + return 1U << sb->shift; +} + /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. @@ -251,7 +254,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, - sb->map[index].depth - nr, + __map_depth(sb, index) - nr, sb->depth - scanned); scanned += depth; -- cgit v1.2.3 From 3f607293b74d6acb06571a774a500143c1f0ed2c Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 8 Feb 2022 20:07:04 +0800 Subject: sbitmap: Delete old sbitmap_queue_get_shallow() Since __sbitmap_queue_get_shallow() was introduced in commit c05e66733788 ("sbitmap: add sbitmap_get_shallow() operation"), it has not been used. Delete __sbitmap_queue_get_shallow() and rename public __sbitmap_queue_get_shallow() -> sbitmap_queue_get_shallow() as it is odd to have public __foo but no foo at all. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1644322024-105340-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index df3b584b0f0c..dffeb8281c2d 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -135,7 +135,7 @@ struct sbitmap_queue { /** * @min_shallow_depth: The minimum shallow depth which may be passed to - * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). + * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; }; @@ -463,7 +463,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset); /** - * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. @@ -475,8 +475,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth); +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct @@ -498,32 +498,6 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, return nr; } -/** - * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct - * sbitmap_queue, limiting the depth used from each word. - * @sbq: Bitmap queue to allocate from. - * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to - * sbitmap_queue_clear()). - * @shallow_depth: The maximum number of bits to allocate from a single word. - * See sbitmap_get_shallow(). - * - * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after - * initializing @sbq. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int *cpu, - unsigned int shallow_depth) -{ - int nr; - - *cpu = get_cpu(); - nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); - put_cpu(); - return nr; -} - /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. -- cgit v1.2.3 From 0e51e2ab49a99bc5077760aa083dfa1c3bf9899b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:47 +0800 Subject: block: remove THROTL_IOPS_MAX No one uses THROTL_IOPS_MAX any more, so remove it. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211101149.2368042-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b4de2010fba5..bdc49bd4eef0 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -28,8 +28,6 @@ /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) -/* Max limits for throttle policy */ -#define THROTL_IOPS_MAX UINT_MAX #define FC_APPID_LEN 129 -- cgit v1.2.3 From 672fdcf0e7de3b1e39416ac85abf178f023271f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:49 +0800 Subject: block: partition include/linux/blk-cgroup.h Partition include/linux/blk-cgroup.h into two parts: one is public part, the other is block layer private part. Suggested by Christoph Hellwig. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211101149.2368042-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 459 +-------------------------------------------- 1 file changed, 5 insertions(+), 454 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index bdc49bd4eef0..f2ad8ed8f777 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -25,12 +25,8 @@ #include #include -/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ -#define BLKG_STAT_CPU_BATCH (INT_MAX / 2) - #define FC_APPID_LEN 129 - #ifdef CONFIG_BLK_CGROUP enum blkg_iostat_type { @@ -42,6 +38,7 @@ enum blkg_iostat_type { }; struct blkcg_gq; +struct blkg_policy_data; struct blkcg { struct cgroup_subsys_state css; @@ -74,36 +71,6 @@ struct blkg_iostat_set { struct blkg_iostat last; }; -/* - * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a - * request_queue (q). This is used by blkcg policies which need to track - * information per blkcg - q pair. - * - * There can be multiple active blkcg policies and each blkg:policy pair is - * represented by a blkg_policy_data which is allocated and freed by each - * policy's pd_alloc/free_fn() methods. A policy can allocate private data - * area by allocating larger data structure which embeds blkg_policy_data - * at the beginning. - */ -struct blkg_policy_data { - /* the blkg and policy id this per-policy data belongs to */ - struct blkcg_gq *blkg; - int plid; -}; - -/* - * Policies that need to keep per-blkcg data which is independent from any - * request_queue associated to it should implement cpd_alloc/free_fn() - * methods. A policy can allocate private data area by allocating larger - * data structure which embeds blkcg_policy_data at the beginning. - * cpd_init() is invoked to let each policy handle per-blkcg data. - */ -struct blkcg_policy_data { - /* the blkcg and policy id this per-policy data belongs to */ - struct blkcg *blkcg; - int plid; -}; - /* association between a blk cgroup and a request queue */ struct blkcg_gq { /* Pointer to the associated request_queue */ @@ -139,120 +106,17 @@ struct blkcg_gq { struct rcu_head rcu_head; }; -typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); -typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); -typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, - struct request_queue *q, struct blkcg *blkcg); -typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, - struct seq_file *s); - -struct blkcg_policy { - int plid; - /* cgroup files for the policy */ - struct cftype *dfl_cftypes; - struct cftype *legacy_cftypes; - - /* operations */ - blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; - blkcg_pol_init_cpd_fn *cpd_init_fn; - blkcg_pol_free_cpd_fn *cpd_free_fn; - blkcg_pol_bind_cpd_fn *cpd_bind_fn; - - blkcg_pol_alloc_pd_fn *pd_alloc_fn; - blkcg_pol_init_pd_fn *pd_init_fn; - blkcg_pol_online_pd_fn *pd_online_fn; - blkcg_pol_offline_pd_fn *pd_offline_fn; - blkcg_pol_free_pd_fn *pd_free_fn; - blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; - blkcg_pol_stat_pd_fn *pd_stat_fn; -}; - -extern struct blkcg blkcg_root; extern struct cgroup_subsys_state * const blkcg_root_css; -extern bool blkcg_debug_stats; - -struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, - struct request_queue *q, bool update_hint); -int blkcg_init_queue(struct request_queue *q); -void blkcg_exit_queue(struct request_queue *q); - -/* Blkio controller policy registration */ -int blkcg_policy_register(struct blkcg_policy *pol); -void blkcg_policy_unregister(struct blkcg_policy *pol); -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol); -void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol); - -const char *blkg_dev_name(struct blkcg_gq *blkg); -void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, - u64 (*prfill)(struct seq_file *, - struct blkg_policy_data *, int), - const struct blkcg_policy *pol, int data, - bool show_total); -u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); - -struct blkg_conf_ctx { - struct block_device *bdev; - struct blkcg_gq *blkg; - char *body; -}; - -struct block_device *blkcg_conf_open_bdev(char **inputp); -int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - char *input, struct blkg_conf_ctx *ctx); -void blkg_conf_finish(struct blkg_conf_ctx *ctx); -/** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} +void blkcg_destroy_blkgs(struct blkcg *blkcg); +void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_maybe_throttle_current(void); static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -/** - * __bio_blkcg - internal, inconsistent version to get blkcg - * - * DO NOT USE. - * This function is inconsistent and consequently is dangerous to use. The - * first part of the function returns a blkcg where a reference is owned by the - * bio. This means it does not need to be rcu protected as it cannot go away - * with the bio owning a reference to it. However, the latter potentially gets - * it from task_css(). This can race against task migration and the cgroup - * dying. It is also semantically different as it must be called rcu protected - * and is susceptible to failure when trying to get a reference to it. - * Therefore, it is not ok to assume that *_get() will always succeed on the - * blkcg returned here. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - /** * bio_blkcg - grab the blkcg associated with a bio * @bio: target bio @@ -288,22 +152,6 @@ static inline bool blk_cgroup_congested(void) return ret; } -/** - * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg - * @return: true if this bio needs to be submitted with the root blkg context. - * - * In order to avoid priority inversions we sometimes need to issue a bio as if - * it were attached to the root blkg, and then backcharge to the actual owning - * blkg. The idea is we do bio_blkcg() to look up the actual context for the - * bio and attach the appropriate blkg to the bio. Then we call this helper and - * if it is true run with the root blkg for that queue and then do any - * backcharging to the originating cgroup once the io is complete. - */ -static inline bool bio_issue_as_root_blkg(struct bio *bio) -{ - return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; -} - /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest @@ -315,96 +163,6 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) return css_to_blkcg(blkcg->css.parent); } -/** - * __blkg_lookup - internal version of blkg_lookup() - * @blkcg: blkcg of interest - * @q: request_queue of interest - * @update_hint: whether to update lookup hint with the result or not - * - * This is internal version and shouldn't be used by policy - * implementations. Looks up blkgs for the @blkcg - @q pair regardless of - * @q's bypass state. If @update_hint is %true, the caller should be - * holding @q->queue_lock and lookup hint is updated on success. - */ -static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, - bool update_hint) -{ - struct blkcg_gq *blkg; - - if (blkcg == &blkcg_root) - return q->root_blkg; - - blkg = rcu_dereference(blkcg->blkg_hint); - if (blkg && blkg->q == q) - return blkg; - - return blkg_lookup_slowpath(blkcg, q, update_hint); -} - -/** - * blkg_lookup - lookup blkg for the specified blkcg - q pair - * @blkcg: blkcg of interest - * @q: request_queue of interest - * - * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock. - */ -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return __blkg_lookup(blkcg, q, false); -} - -/** - * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair - * @q: request_queue of interest - * - * Lookup blkg for @q at the root level. See also blkg_lookup(). - */ -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ - return q->root_blkg; -} - -/** - * blkg_to_pdata - get policy private data - * @blkg: blkg of interest - * @pol: policy of interest - * - * Return pointer to private data associated with the @blkg-@pol pair. - */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) -{ - return blkg ? blkg->pd[pol->plid] : NULL; -} - -static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, - struct blkcg_policy *pol) -{ - return blkcg ? blkcg->cpd[pol->plid] : NULL; -} - -/** - * pdata_to_blkg - get blkg associated with policy private data - * @pd: policy private data of interest - * - * @pd is policy private data. Determine the blkg it's associated with. - */ -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) -{ - return pd ? pd->blkg : NULL; -} - -static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) -{ - return cpd ? cpd->blkcg : NULL; -} - -extern void blkcg_destroy_blkgs(struct blkcg *blkcg); - /** * blkcg_pin_online - pin online state * @blkcg: blkcg of interest @@ -437,231 +195,24 @@ static inline void blkcg_unpin_online(struct blkcg *blkcg) } while (blkcg); } -/** - * blkg_path - format cgroup path of blkg - * @blkg: blkg of interest - * @buf: target buffer - * @buflen: target buffer length - * - * Format the path of the cgroup of @blkg into @buf. - */ -static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) -{ - return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); -} - -/** - * blkg_get - get a blkg reference - * @blkg: blkg to get - * - * The caller should be holding an existing reference. - */ -static inline void blkg_get(struct blkcg_gq *blkg) -{ - percpu_ref_get(&blkg->refcnt); -} - -/** - * blkg_tryget - try and get a blkg reference - * @blkg: blkg to get - * - * This is for use when doing an RCU lookup of the blkg. We may be in the midst - * of freeing this blkg, so we can only use it if the refcnt is not zero. - */ -static inline bool blkg_tryget(struct blkcg_gq *blkg) -{ - return blkg && percpu_ref_tryget(&blkg->refcnt); -} - -/** - * blkg_put - put a blkg reference - * @blkg: blkg to put - */ -static inline void blkg_put(struct blkcg_gq *blkg) -{ - percpu_ref_put(&blkg->refcnt); -} - -/** - * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU - * read locked. If called under either blkcg or queue lock, the iteration - * is guaranteed to include all and only online blkgs. The caller may - * update @pos_css by calling css_rightmost_descendant() to skip subtree. - * @p_blkg is included in the iteration and the first node to be visited. - */ -#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -/** - * blkg_for_each_descendant_post - post-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Similar to blkg_for_each_descendant_pre() but performs post-order - * traversal instead. Synchronization rules are the same. @p_blkg is - * included in the iteration and the last node to be visited. - */ -#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - -static inline void blkcg_bio_issue_init(struct bio *bio) -{ - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -} - -static inline void blkcg_use_delay(struct blkcg_gq *blkg) -{ - if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) - return; - if (atomic_add_return(1, &blkg->use_delay) == 1) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); -} - -static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - if (WARN_ON_ONCE(old < 0)) - return 0; - if (old == 0) - return 0; - - /* - * We do this song and dance because we can race with somebody else - * adding or removing delay. If we just did an atomic_dec we'd end up - * negative and we'd already be in trouble. We need to subtract 1 and - * then check to see if we were the last delay so we can drop the - * congestion count on the cgroup. - */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); - if (cur == old) - break; - old = cur; - } - - if (old == 0) - return 0; - if (old == 1) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - return 1; -} - -/** - * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount - * @blkg: target blkg - * @delay: delay duration in nsecs - * - * When enabled with this function, the delay is not decayed and must be - * explicitly cleared with blkcg_clear_delay(). Must not be mixed with - * blkcg_[un]use_delay() and blkcg_add_delay() usages. - */ -static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person setting the congestion count for this blkg. */ - if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); - - atomic64_set(&blkg->delay_nsec, delay); -} - -/** - * blkcg_clear_delay - Disable allocator delay mechanism - * @blkg: target blkg - * - * Disable use_delay mechanism. See blkcg_set_delay(). - */ -static inline void blkcg_clear_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person clearing the congestion count for this blkg. */ - if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); -} - -void blk_cgroup_bio_start(struct bio *bio); -void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); -void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); -void blkcg_maybe_throttle_current(void); #else /* CONFIG_BLK_CGROUP */ struct blkcg { }; -struct blkg_policy_data { -}; - -struct blkcg_policy_data { -}; - struct blkcg_gq { }; -struct blkcg_policy { -}; - #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } #ifdef CONFIG_BLOCK - static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } - -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ return NULL; } -static inline int blkcg_init_queue(struct request_queue *q) { return 0; } -static inline void blkcg_exit_queue(struct request_queue *q) { } -static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } -static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } -static inline int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { return 0; } -static inline void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { } - -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } +#endif /* CONFIG_BLOCK */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) { return NULL; } -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } -static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } -static inline void blkg_get(struct blkcg_gq *blkg) { } -static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } -static inline void blkcg_bio_issue_init(struct bio *bio) { } -static inline void blk_cgroup_bio_start(struct bio *bio) { } - -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) - -#endif /* CONFIG_BLOCK */ #endif /* CONFIG_B