From 2dd209f00fc5a1caafa493066c7cd692fd2fd57c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 9 Mar 2020 21:26:16 -0700 Subject: blk-mq: Fix a comment in include/linux/blk-mq.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'hctx_list' member of struct blk_mq_hw_ctx is not a list head but instead an entry in q->unused_hctx_list. Fix the comment above this struct member. Fixes: d386732bc142 ("blk-mq: fill header with kernel-doc") Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Cc: André Almeida Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 11cfd6470b1a..31344d5f83e2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -162,7 +162,10 @@ struct blk_mq_hw_ctx { struct dentry *sched_debugfs_dir; #endif - /** @hctx_list: List of all hardware queues. */ + /** + * @hctx_list: if this hctx is not in use, this is an entry in + * q->unused_hctx_list. + */ struct list_head hctx_list; /** -- cgit v1.2.3 From 30a2da7b7e225ef6c87a660419ea04d3cef3f6a7 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 11 Mar 2020 16:07:50 +0530 Subject: block: Fix use-after-free issue accessing struct io_cq There is a potential race between ioc_release_fn() and ioc_clear_queue() as shown below, due to which below kernel crash is observed. It also can result into use-after-free issue. context#1: context#2: ioc_release_fn() __ioc_clear_queue() gets the same icq ->spin_lock(&ioc->lock); ->spin_lock(&ioc->lock); ->ioc_destroy_icq(icq); ->list_del_init(&icq->q_node); ->call_rcu(&icq->__rcu_head, icq_free_icq_rcu); ->spin_unlock(&ioc->lock); ->ioc_destroy_icq(icq); ->hlist_del_init(&icq->ioc_node); This results into below crash as this memory is now used by icq->__rcu_head in context#1. There is a chance that icq could be free'd as well. 22150.386550: <6> Unable to handle kernel write to read-only memory at virtual address ffffffaa8d31ca50 ... Call trace: 22150.607350: <2> ioc_destroy_icq+0x44/0x110 22150.611202: <2> ioc_clear_queue+0xac/0x148 22150.615056: <2> blk_cleanup_queue+0x11c/0x1a0 22150.619174: <2> __scsi_remove_device+0xdc/0x128 22150.623465: <2> scsi_forget_host+0x2c/0x78 22150.627315: <2> scsi_remove_host+0x7c/0x2a0 22150.631257: <2> usb_stor_disconnect+0x74/0xc8 22150.635371: <2> usb_unbind_interface+0xc8/0x278 22150.639665: <2> device_release_driver_internal+0x198/0x250 22150.644897: <2> device_release_driver+0x24/0x30 22150.649176: <2> bus_remove_device+0xec/0x140 22150.653204: <2> device_del+0x270/0x460 22150.656712: <2> usb_disable_device+0x120/0x390 22150.660918: <2> usb_disconnect+0xf4/0x2e0 22150.664684: <2> hub_event+0xd70/0x17e8 22150.668197: <2> process_one_work+0x210/0x480 22150.672222: <2> worker_thread+0x32c/0x4c8 Fix this by adding a new ICQ_DESTROYED flag in ioc_destroy_icq() to indicate this icq is once marked as destroyed. Also, ensure __ioc_clear_queue() is accessing icq within rcu_read_lock/unlock so that icq doesn't get free'd up while it is still using it. Signed-off-by: Sahitya Tummala Co-developed-by: Pradeep P V K Signed-off-by: Pradeep P V K Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index dba15ca8e60b..1dcd9198beb7 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -8,6 +8,7 @@ enum { ICQ_EXITED = 1 << 2, + ICQ_DESTROYED = 1 << 3, }; /* -- cgit v1.2.3 From 9243c6f3e012a92dd900d97ef45efaf8a8edc448 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Sat, 7 Mar 2020 15:56:59 +0100 Subject: block: Document genhd capability flags The kernel documentation includes a brief section about genhd capabilities, but it turns out that the only documented capability (GENHD_FL_MEDIA_CHANGE_NOTIFY) isn't used any more. This patch removes that flag, and documents the rest, based on my understanding of the current uses of these flags in the kernel. The documentation is kept in the header file, alongside the declarations, in the hope that it will be kept up-to-date in future; the kernel documentation is changed to include the documentation generated from the header file. Because the ultimate goal is to provide some end-user documentation (or end-administrator documentation), the comments are perhaps more user-oriented than might be expected. Since the values are shown to users in hexadecimal, the documentation lists them in hexadecimal, and the constant declarations are adjusted to match. Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Stephen Kitt Signed-off-by: Jens Axboe --- include/linux/genhd.h | 69 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fbe58538ad6..e7244fb6b6ad 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -133,17 +133,64 @@ struct hd_struct { struct rcu_work rcu_work; }; -#define GENHD_FL_REMOVABLE 1 -/* 2 is unused */ -#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4 -#define GENHD_FL_CD 8 -#define GENHD_FL_UP 16 -#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 -#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ -#define GENHD_FL_NATIVE_CAPACITY 128 -#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256 -#define GENHD_FL_NO_PART_SCAN 512 -#define GENHD_FL_HIDDEN 1024 +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device + * gives access to removable media. + * When set, the device remains present even when media is not + * inserted. + * Must not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style + * device. + * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl. + * + * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up", + * with a similar meaning to network interfaces. + * + * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include + * partition information in ``/proc/partitions`` or in the output of + * printk_all_partitions(). + * Used for the null block device and some MMC devices. + * + * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended + * dynamic ``dev_t``, i.e. it wants extended device numbers + * (``BLOCK_EXT_MAJOR``). + * This affects the maximum number of partitions. + * + * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the + * partition table, the device's capacity has been extended to its + * native capacity; i.e. the device has hidden capacity used by one + * of the partitions (this is a flag used so that native capacity is + * only ever unlocked once). + * + * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is + * blocked whenever a writer holds an exclusive lock. + * + * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled. + * Used for loop devices in their default settings and some MMC + * devices. + * + * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it + * doesn't produce events, doesn't appear in sysfs, and doesn't have + * an associated ``bdev``. + * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and + * ``GENHD_FL_NO_PART_SCAN``. + * Used for multipath devices. + */ +#define GENHD_FL_REMOVABLE 0x0001 +/* 2 is unused (used to be GENHD_FL_DRIVERFS) */ +/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ +#define GENHD_FL_CD 0x0008 +#define GENHD_FL_UP 0x0010 +#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 +#define GENHD_FL_EXT_DEVT 0x0040 +#define GENHD_FL_NATIVE_CAPACITY 0x0080 +#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100 +#define GENHD_FL_NO_PART_SCAN 0x0200 +#define GENHD_FL_HIDDEN 0x0400 enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ -- cgit v1.2.3 From e598a72faeb543599bdf0d930df3a71906404e6f Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 13 Mar 2020 05:30:05 +0000 Subject: block/genhd: Notify udev about capacity change Allow block/genhd to notify user space (via udev) about disk size changes using a new helper set_capacity_revalidate_and_notify(), which is a wrapper on top of set_capacity(). set_capacity_revalidate_and_notify() will only notify via udev if the current capacity or the target capacity is not zero and iff the capacity changes. Suggested-by: Christoph Hellwig Signed-off-by: Someswarudu Sangaraju Signed-off-by: Balbir Singh Reviewed-by: Bob Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e7244fb6b6ad..3c1a816785c8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -508,6 +508,8 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); +extern void set_capacity_revalidate_and_notify(struct gendisk *disk, + sector_t size, bool revalidate); extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ -- cgit v1.2.3 From ea3edd4dc23027083fbb4a73b65114d08fe73a76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:11 +0100 Subject: block: remove __bdevname There is no good reason for __bdevname to exist. Just open code printing the string in the callers. For three of them the format string can be trivially merged into existing printk statements, and in init/do_mounts.c we can at least do the scnprintf once at the start of the function, and unconditional of CONFIG_BLOCK to make the output for tiny configfs a little more helpful. Acked-by: Theodore Ts'o # for ext4 Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..561b35e3b95b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2699,7 +2699,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name) #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_MAX 512 -extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern void blkdev_show(struct seq_file *,off_t); -- cgit v1.2.3 From 3ad5cee5cd000dc05e6c2410b06fc1d818e7b1e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:13 +0100 Subject: block: move sysfs methods shared by disks and partitions to genhd.c Move the sysfs _show methods that are used both on the full disk and partition nodes to genhd.c instead of hiding them in the partitioning code. Also move the declaration for these methods to block/blk.h so that we don't expose them to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3c1a816785c8..612d38fce55c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -706,20 +706,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -extern ssize_t part_size_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); -#ifdef CONFIG_FAIL_MAKE_REQUEST -extern ssize_t part_fail_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_fail_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count); -#endif /* CONFIG_FAIL_MAKE_REQUEST */ - #define alloc_disk_node(minors, node_id) \ ({ \ static struct lock_class_key __key; \ -- cgit v1.2.3 From f17c21c1ecb80e957bafa07d6454836854be7cf2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:14 +0100 Subject: block: remove alloc_part_info and free_part_info There isn't any good reason not to simply open code the allocation and freeing of the partition_meta_info structure. Especially as one of the branches in alloc_part_info is entirely dead code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 612d38fce55c..77f8bb8edfcd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -465,19 +465,6 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw); -static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) -{ - if (disk) - return kzalloc_node(sizeof(struct partition_meta_info), - GFP_KERNEL, disk->node_id); - return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL); -} - -static inline void free_part_info(struct hd_struct *part) -{ - kfree(part->info); -} - void update_io_ticks(struct hd_struct *part, unsigned long now); /* block/genhd.c */ @@ -755,7 +742,7 @@ static inline void hd_struct_kill(struct hd_struct *part) static inline void hd_free_part(struct hd_struct *part) { free_part_stats(part); - free_part_info(part); + kfree(part->info); percpu_ref_exit(&part->ref); } -- cgit v1.2.3 From 1a9fba3a77a5b39d1c9e1611758303f2649474e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:18 +0100 Subject: block: unexport read_dev_sector and put_dev_sector read_dev_sector and put_dev_sector are now only used by the partition parsing code. Remove the export for read_dev_sector and merge it into the only caller. Clean the mess up a bit by using goto labels and the SECTOR_SHIFT constant. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f629d40c645c..53a1325efbc3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1484,15 +1484,6 @@ static inline unsigned int block_size(struct block_device *bdev) return bdev->bd_block_size; } -typedef struct {struct page *v;} Sector; - -unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); - -static inline void put_dev_sector(Sector p) -{ - put_page(p.v); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From 74cc979c3c7f8328b24651daf15280f07533e735 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:19 +0100 Subject: block: cleanup how md_autodetect_dev is called Add a new include/linux/raid/detect.h header to declare the md_autodetect_dev prototype which can be shared between md and the partition code. Then use IS_BUILTIN to call it instead of the ifdef magic. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/raid/detect.h | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 include/linux/raid/detect.h (limited to 'include/linux') diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h new file mode 100644 index 000000000000..37dd3f40cd31 --- /dev/null +++ b/include/linux/raid/detect.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +void md_autodetect_dev(dev_t dev); -- cgit v1.2.3 From 1442f76d4317b420580e11238d20789708c742a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:26 +0100 Subject: block: move struct partition out of genhd.h struct partition is the on-disk format of a MSDOS partition table entry. Move it out of genhd.h into a new msdos_partition.h header and give it a msdos_ prefix to avoid confusion. Also move the magic number from block/partitions/msdos.h to the new header so that it can be used by the SCSI drivers looking at the DOS partition tables. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ------------- include/linux/msdos_partition.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 include/linux/msdos_partition.h (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 77f8bb8edfcd..3d84da9ec0fa 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -70,19 +70,6 @@ enum { #include #include -struct partition { - unsigned char boot_ind; /* 0x80 - active */ - unsigned char head; /* starting head */ - unsigned char sector; /* starting sector */ - unsigned char cyl; /* starting cylinder */ - unsigned char sys_ind; /* What partition type */ - unsigned char end_head; /* end head */ - unsigned char end_sector; /* end sector */ - unsigned char end_cyl; /* end cylinder */ - __le32 start_sect; /* starting sector counting from 0 */ - __le32 nr_sects; /* nr of sectors in partition */ -} __attribute__((packed)); - struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h new file mode 100644 index 000000000000..a8e2c1b4bc66 --- /dev/null +++ b/include/linux/msdos_partition.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MSDOS_PARTITION_H +#define _LINUX_MSDOS_PARTITION_H + +#define MSDOS_LABEL_MAGIC 0xAA55 + +struct msdos_partition { + u8 boot_ind; /* 0x80 - active */ + u8 head; /* starting head */ + u8 sector; /* starting sector */ + u8 cyl; /* starting cylinder */ + u8 sys_ind; /* What partition type */ + u8 end_head; /* end head */ + u8 end_sector; /* end sector */ + u8 end_cyl; /* end cylinder */ + __le32 start_sect; /* starting sector counting from 0 */ + __le32 nr_sects; /* nr of sectors in partition */ +} __packed; + +#endif /* LINUX_MSDOS_PARTITION_H */ -- cgit v1.2.3 From 0226e9ead44b2eb8f2471d24e0b0c5ff60bc322c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:27 +0100 Subject: block: move the *_PARTITION enum out of genhd.h The enum containing the *_PARTITION symbolic names is only relevant for the partition parser. More specifically most values are MSDOS partition table system indicators and thus should go straight into msdos.c. One value is only used by the sun partition parser, and the sun and sgi partition parsers use the same value as the x86 Linux RAID indicator to also indicate RAID autodetection. Duplicate them in sun.c and sgi.c given that the different partition types use entirely different values otherwise. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 30 ------------------------------ include/linux/msdos_partition.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d84da9ec0fa..3050b0ee9cc7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -30,36 +30,6 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; -enum { -/* These three have identical behaviour; use the second one if DOS FDISK gets - confused about extended/logical partitions starting past cylinder 1023. */ - DOS_EXTENDED_PARTITION = 5, - LINUX_EXTENDED_PARTITION = 0x85, - WIN98_EXTENDED_PARTITION = 0x0f, - - SUN_WHOLE_DISK = DOS_EXTENDED_PARTITION, - - LINUX_SWAP_PARTITION = 0x82, - LINUX_DATA_PARTITION = 0x83, - LINUX_LVM_PARTITION = 0x8e, - LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ - - SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, - NEW_SOLARIS_X86_PARTITION = 0xbf, - - DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ - DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ - DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ - EZD_PARTITION = 0x55, /* EZ-DRIVE */ - - FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ - OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ - NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ - BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ - MINIX_PARTITION = 0x81, /* Minix Partition ID */ - UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ -}; - #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h index a8e2c1b4bc66..e151af072cd1 100644 --- a/include/linux/msdos_partition.h +++ b/include/linux/msdos_partition.h @@ -17,4 +17,35 @@ struct msdos_partition { __le32 nr_sects; /* nr of sectors in partition */ } __packed; +enum msdos_sys_ind { + /* + * These three have identical behaviour; use the second one if DOS FDISK + * gets confused about extended/logical partitions starting past + * cylinder 1023. + */ + DOS_EXTENDED_PARTITION = 5, + LINUX_EXTENDED_PARTITION = 0x85, + WIN98_EXTENDED_PARTITION = 0x0f, + + LINUX_SWAP_PARTITION = 0x82, + LINUX_DATA_PARTITION = 0x83, + LINUX_LVM_PARTITION = 0x8e, + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ + + SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, + NEW_SOLARIS_X86_PARTITION = 0xbf, + + DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ + DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ + DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ + EZD_PARTITION = 0x55, /* EZ-DRIVE */ + + FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ + OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ + NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ + BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ + MINIX_PARTITION = 0x81, /* Minix Partition ID */ + UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ +}; + #endif /* LINUX_MSDOS_PARTITION_H */ -- cgit v1.2.3 From cb0ab52652123c47cb72e665ce9fdd3029dcb175 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:28 +0100 Subject: partitions/msdos: remove LINUX_SWAP_PARTITION Just always use NEW_SOLARIS_X86_PARTITION and explain the situation, as that is less confusing than two names for a single value. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/msdos_partition.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h index e151af072cd1..2cb82db2a43c 100644 --- a/include/linux/msdos_partition.h +++ b/include/linux/msdos_partition.h @@ -27,12 +27,11 @@ enum msdos_sys_ind { LINUX_EXTENDED_PARTITION = 0x85, WIN98_EXTENDED_PARTITION = 0x0f, - LINUX_SWAP_PARTITION = 0x82, LINUX_DATA_PARTITION = 0x83, LINUX_LVM_PARTITION = 0x8e, LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ - SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, + SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */ NEW_SOLARIS_X86_PARTITION = 0xbf, DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ -- cgit v1.2.3 From 3f4fc59c1321b74df27dcd5d77b37989ed93265b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:29 +0100 Subject: block: move the various x86 Unix label formats out of genhd.h All these are just used in block/partitions/msdos.c, so move them out of the genhd.h driver included by every driver. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 143 -------------------------------------------------- 1 file changed, 143 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3050b0ee9cc7..da62b44b15be 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -473,149 +473,6 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->part0.nr_sects = size; } -#ifdef CONFIG_SOLARIS_X86_PARTITION - -#define SOLARIS_X86_NUMSLICE 16 -#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) - -struct solaris_x86_slice { - __le16 s_tag; /* ID tag of partition */ - __le16 s_flag; /* permission flags */ - __le32 s_start; /* start sector no of partition */ - __le32 s_size; /* # of blocks in partition */ -}; - -struct solaris_x86_vtoc { - unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */ - __le32 v_sanity; /* to verify vtoc sanity */ - __le32 v_version; /* layout version */ - char v_volume[8]; /* volume name */ - __le16 v_sectorsz; /* sector size in bytes */ - __le16 v_nparts; /* number of partitions */ - unsigned int v_reserved[10]; /* free space */ - struct solaris_x86_slice - v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ - unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */ - char v_asciilabel[128]; /* for compatibility */ -}; - -#endif /* CONFIG_SOLARIS_X86_PARTITION */ - -#ifdef CONFIG_BSD_DISKLABEL -/* - * BSD disklabel support by Yossi Gottlieb - * updated by Marc Espie - */ - -/* check against BSD src/sys/sys/disklabel.h for consistency */ - -#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ -#define BSD_MAXPARTITIONS 16 -#define OPENBSD_MAXPARTITIONS 16 -#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ -struct bsd_disklabel { - __le32 d_magic; /* the magic number */ - __s16 d_type; /* drive type */ - __s16 d_subtype; /* controller/d_type specific */ - char d_typename[16]; /* type name, e.g. "eagle" */ - char d_packname[16]; /* pack identifier */ - __u32 d_secsize; /* # of bytes per sector */ - __u32 d_nsectors; /* # of data sectors per track */ - __u32 d_ntracks; /* # of tracks per cylinder */ - __u32 d_ncylinders; /* # of data cylinders per unit */ - __u32 d_secpercyl; /* # of data sectors per cylinder */ - __u32 d_secperunit; /* # of data sectors per unit */ - __u16 d_sparespertrack; /* # of spare sectors per track */ - __u16 d_sparespercyl; /* # of spare sectors per cylinder */ - __u32 d_acylinders; /* # of alt. cylinders per unit */ - __u16 d_rpm; /* rotational speed */ - __u16 d_interleave; /* hardware sector interleave */ - __u16 d_trackskew; /* sector 0 skew, per track */ - __u16 d_cylskew; /* sector 0 skew, per cylinder */ - __u32 d_headswitch; /* head switch time, usec */ - __u32 d_trkseek; /* track-to-track seek, usec */ - __u32 d_flags; /* generic flags */ -#define NDDATA 5 - __u32 d_drivedata[NDDATA]; /* drive-type specific information */ -#define NSPARE 5 - __u32 d_spare[NSPARE]; /* reserved for future use */ - __le32 d_magic2; /* the magic number (again) */ - __le16 d_checksum; /* xor of data incl. partitions */ - - /* filesystem and partition information: */ - __le16 d_npartitions; /* number of partitions in following */ - __le32 d_bbsize; /* size of boot area at sn0, bytes */ - __le32 d_sbsize; /* max size of fs superblock, bytes */ - struct bsd_partition { /* the partition table */ - __le32 p_size; /* number of sectors in partition */ - __le32 p_offset; /* starting sector */ - __le32 p_fsize; /* filesystem basic fragment size */ - __u8 p_fstype; /* filesystem type, see below */ - __u8 p_frag; /* filesystem fragments per block */ - __le16 p_cpg; /* filesystem cylinders per group */ - } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ -}; - -#endif /* CONFIG_BSD_DISKLABEL */ - -#ifdef CONFIG_UNIXWARE_DISKLABEL -/* - * Unixware slices support by Andrzej Krzysztofowicz - * and Krzysztof G. Baranowski - */ - -#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ -#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ -#define UNIXWARE_NUMSLICE 16 -#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ - -struct unixware_slice { - __le16 s_label; /* label */ - __le16 s_flags; /* permission flags */ - __le32 start_sect; /* starting sector */ - __le32 nr_sects; /* number of sectors in slice */ -}; - -struct unixware_disklabel { - __le32 d_type; /* drive type */ - __le32 d_magic; /* the magic number */ - __le32 d_version; /* version number */ - char d_serial[12]; /* serial number of the device */ - __le32 d_ncylinders; /* # of data cylinders per device */ - __le32 d_ntracks; /* # of tracks per cylinder */ - __le32 d_nsectors; /* # of data sectors per track */ - __le32 d_secsize; /* # of bytes per sector */ - __le32 d_part_start; /* # of first sector of this partition */ - __le32 d_unknown1[12]; /* ? */ - __le32 d_alt_tbl; /* byte offset of alternate table */ - __le32 d_alt_len; /* byte length of alternate table */ - __le32 d_phys_cyl; /* # of physical cylinders per device */ - __le32 d_phys_trk; /* # of physical tracks per cylinder */ - __le32 d_phys_sec; /* # of physical sectors per track */ - __le32 d_phys_bytes; /* # of physical bytes per sector */ - __le32 d_unknown2; /* ? */ - __le32 d_unknown3; /* ? */ - __le32 d_pad[8]; /* pad */ - - struct unixware_vtoc { - __le32 v_magic; /* the magic number */ - __le32 v_version; /* version number */ - char v_name[8]; /* volume name */ - __le16 v_nslices; /* # of slices */ - __le16 v_unknown1; /* ? */ - __le32 v_reserved[10]; /* reserved */ - struct unixware_slice - v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ - } vtoc; - -}; /* 408 */ - -#endif /* CONFIG_UNIXWARE_DISKLABEL */ - -#ifdef CONFIG_MINIX_SUBPARTITION -# define MINIX_NR_SUBPARTITIONS 4 -#endif /* CONFIG_MINIX_SUBPARTITION */ - #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -- cgit v1.2.3 From 2b8bd423614c595540eaadcfbc702afe8e155e50 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:04 +0300 Subject: block/diskstats: more accurate approximation of io_ticks for slow disks Currently io_ticks is approximated by adding one at each start and end of requests if jiffies counter has changed. This works perfectly for requests shorter than a jiffy or if one of requests starts/ends at each jiffy. If disk executes just one request at a time and they are longer than two jiffies then only first and last jiffies will be accounted. Fix is simple: at the end of request add up into io_ticks jiffies passed since last update rather than just one jiffy. Example: common HDD executes random read 4k requests around 12ms. fio --name=test --filename=/dev/sdb --rw=randread --direct=1 --runtime=30 & iostat -x 10 sdb Note changes of iostat's "%util" 8,43% -> 99,99% before/after patch: Before: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sdb 0,00 0,00 82,60 0,00 330,40 0,00 8,00 0,96 12,09 12,09 0,00 1,02 8,43 After: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sdb 0,00 0,00 82,50 0,00 330,00 0,00 8,00 1,00 12,10 12,10 0,00 12,12 99,99 Now io_ticks does not loose time between start and end of requests, but for queue-depth > 1 some I/O time between adjacent starts might be lost. For load estimation "%util" is not as useful as average queue length, but it clearly shows how often disk queue is completely empty. Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index da62b44b15be..13bb51f37b3f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -422,7 +422,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw); -void update_io_ticks(struct hd_struct *part, unsigned long now); +void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, -- cgit v1.2.3 From ea18e0f0a63af9064db3d4065d90fa743ae0991b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:06 +0300 Subject: block/diskstats: accumulate all per-cpu counters in one pass Reading /proc/diskstats iterates over all cpus for summing each field. It's faster to sum all fields in one pass. Hammering /proc/diskstats with fio shows 2x performance improvement: fio --name=test --numjobs=$JOBS --filename=/proc/diskstats \ --size=1k --bs=1k --fallocate=none --create_on_open=1 \ --time_based=1 --runtime=10 --invalidate=0 --group_report JOBS=1 JOBS=10 Before: 7k iops 64k iops After: 18k iops 120k iops Also this way code is more compact: add/remove: 1/0 grow/shrink: 0/2 up/down: 194/-1540 (-1346) Function old new delta part_stat_read_all - 194 +194 diskstats_show 1344 631 -713 part_stat_show 1219 392 -827 Total: Before=14966947, After=14965601, chg -0.01% Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 13bb51f37b3f..b0c588d1aa29 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -380,9 +380,6 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ -#define part_stat_read_msecs(part, which) \ - div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC) - #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ part_stat_read(part, field[STAT_WRITE]) + \ -- cgit v1.2.3 From 8cd5b8fc00716fb71f6b32d594b38a8f286d6c20 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:08 +0300 Subject: block/diskstats: replace time_in_queue with sum of request times Column "time_in_queue" in diskstats is supposed to show total waiting time of all requests. I.e. value should be equal to the sum of times from other columns. But this is not true, because column "time_in_queue" is counted separately in jiffies rather than in nanoseconds as other times. This patch removes redundant counter for "time_in_queue" and shows total time of read, write, discard and flush requests. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b0c588d1aa29..790fdc3e0b3d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -46,7 +46,6 @@ struct disk_stats { unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; - unsigned long time_in_queue; local_t in_flight[2]; }; -- cgit v1.2.3 From 31eb6186797c149665fd44f3847bdf8d539efa59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:35 +0100 Subject: block: mark block_depr static Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 790fdc3e0b3d..b322e805a916 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -27,7 +27,6 @@ #define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; -extern struct kobject *block_depr; extern struct class block_class; #define DISK_MAX_PARTS 256 -- cgit v1.2.3 From 6005771c17db56b6a9acc12bd084134191560e18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:36 +0100 Subject: block: mark part_in_flight and part_in_flight_rw static Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b322e805a916..c0c5bb51fa56 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -409,9 +409,6 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) -unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part); -void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw); void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, -- cgit v1.2.3 From 29125ed624eeb3ac2eb7bca313a8de29c1c84dcd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:40 +0100 Subject: block: move guard_bio_eod to bio.c This is bio layer functionality and not related to buffer heads. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 853d92ceee64..a430e9c1c2d2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -471,6 +471,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); +void guard_bio_eod(struct bio *bio); static inline void zero_fill_bio(struct bio *bio) { -- cgit v1.2.3 From 581e26004a09c50e5017caadc850ea17e374a5ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:41 +0100 Subject: block: move block layer internals out of include/linux/genhd.h None of this needs to be exposed to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 120 -------------------------------------------------- 1 file changed, 120 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c0c5bb51fa56..14354a6e89c2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -298,9 +298,6 @@ extern void disk_part_iter_init(struct disk_part_iter *piter, extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); -extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, - sector_t sector); - /* * Macros to operate on percpu disk statistics: * @@ -409,13 +406,6 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) -void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, - int rw); -void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, - int rw); - -void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); - /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); @@ -465,27 +455,11 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->part0.nr_sects = size; } -#define ADDPART_FLAG_NONE 0 -#define ADDPART_FLAG_RAID 1 -#define ADDPART_FLAG_WHOLEDISK 2 - -extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); -extern void blk_free_devt(dev_t devt); -extern void blk_invalidate_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); -extern char *disk_name (struct gendisk *hd, int partno, char *buf); int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); -extern int disk_expand_part_tbl(struct gendisk *disk, int target); -extern struct hd_struct * __must_check add_partition(struct gendisk *disk, - int partno, sector_t start, - sector_t len, int flags, - struct partition_meta_info - *info); -extern void __delete_partition(struct percpu_ref *); -extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); @@ -517,100 +491,6 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -static inline int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, __delete_partition, 0, - GFP_KERNEL)) - return -ENOMEM; - return 0; -} - -static inline void hd_struct_get(struct hd_struct *part) -{ - percpu_ref_get(&part->ref); -} - -static inline int hd_struct_try_get(struct hd_struct *part) -{ - return percpu_ref_tryget_live(&part->ref); -} - -static inline void hd_struct_put(struct hd_struct *part) -{ - percpu_ref_put(&part->ref); -} - -static inline void hd_struct_kill(struct hd_struct *part) -{ - percpu_ref_kill(&part->ref); -} - -static inline void hd_free_part(struct hd_struct *part) -{ - free_part_stats(part); - kfree(part->info); - percpu_ref_exit(&part->ref); -} - -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} - -#if defined(CONFIG_BLK_DEV_INTEGRITY) -extern void blk_integrity_add(struct gendisk *); -extern void blk_integrity_del(struct gendisk *); -#else /* CONFIG_BLK_DEV_INTEGRITY */ -static inline void blk_integrity_add(struct gendisk *disk) { } -static inline void blk_integrity_del(struct gendisk *disk) { } -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From c6a564ffadc9105880329710164ee493f0de103c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:42 +0100 Subject: block: move the part_stat* helpers from genhd.h to a new header These macros are just used by a few files. Move them out of genhd.h, which is included everywhere into a new standalone header. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 108 ------------------------------------------- include/linux/part_stat.h | 115 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 108 deletions(-) create mode 100644 include/linux/part_stat.h (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 14354a6e89c2..927eed0be179 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -298,114 +298,6 @@ extern void disk_part_iter_init(struct disk_part_iter *piter, extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); -/* - * Macros to operate on percpu disk statistics: - * - * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters - * and should be called between disk_stat_lock() and - * disk_stat_unlock(). - * - * part_stat_read() can be called at any time. - * - * part_stat_{add|set_all}() and {init|free}_part_stats are for - * internal use only. - */ -#ifdef CONFIG_SMP -#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) - -#define part_stat_get(part, field) \ - part_stat_get_cpu(part, field, smp_processor_id()) - -#define part_stat_read(part, field) \ -({ \ - typeof((part)->dkstats->field) res = 0; \ - unsigned int _cpu; \ - for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ - res; \ -}) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, - sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ - free_percpu(part->dkstats); -} - -#else /* !CONFIG_SMP */ -#define part_stat_lock() ({ rcu_read_lock(); 0; }) -#define part_stat_unlock() rcu_read_unlock() - -#define part_stat_get(part, field) ((part)->dkstats.field) -#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) -#define part_stat_read(part, field) part_stat_get(part, field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - memset(&part->dkstats, value, sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ -} - -#endif /* CONFIG_SMP */ - -#define part_stat_read_accum(part, field) \ - (part_stat_read(part, field[STAT_READ]) + \ - part_stat_read(part, field[STAT_WRITE]) + \ - part_stat_read(part, field[STAT_DISCARD])) - -#define __part_stat_add(part, field, addnd) \ - (part_stat_get(part, field) += (addnd)) - -#define part_stat_add(part, field, addnd) do { \ - __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ -} while (0) - -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) - /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h new file mode 100644 index 000000000000..ece607607a86 --- /dev/null +++ b/include/linux/part_stat.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PART_STAT_H +#define _LINUX_PART_STAT_H + +#include + +/* + * Macros to operate on percpu disk statistics: + * + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. + */ +#ifdef CONFIG_SMP +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define part_stat_get_cpu(part, field, cpu) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field) + +#define part_stat_get(part, field) \ + part_stat_get_cpu(part, field, smp_processor_id()) + +#define part_stat_read(part, field) \ +({ \ + typeof((part)->dkstats->field) res = 0; \ + unsigned int _cpu; \ + for_each_possible_cpu(_cpu) \ + res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res; \ +}) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + int i; + + for_each_possible_cpu(i) + memset(per_cpu_ptr(part->dkstats, i), value, + sizeof(struct disk_stats)); +} + +static inline int init_part_stats(struct hd_struct *part) +{ + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ + free_percpu(part->dkstats); +} + +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() + +#define part_stat_get(part, field) ((part)->dkstats.field) +#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) +#define part_stat_read(part, field) part_stat_get(part, field) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + memset(&part->dkstats, value, sizeof(struct disk_stats)); +} + +static inline int init_part_stats(struct hd_struct *part) +{ + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ +} + +#endif /* CONFIG_SMP */ + +#define part_stat_read_accum(part, field) \ + (part_stat_read(part, field[STAT_READ]) + \ + part_stat_read(part, field[STAT_WRITE]) + \ + part_stat_read(part, field[STAT_DISCARD])) + +#define __part_stat_add(part, field, addnd) \ + (part_stat_get(part, field) += (addnd)) + +#define part_stat_add(part, field, addnd) do { \ + __part_stat_add((part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add(&part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) + +#define part_stat_dec(gendiskp, field) \ + part_stat_add(gendiskp, field, -1) +#define part_stat_inc(gendiskp, field) \ + part_stat_add(gendiskp, field, 1) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define part_stat_local_dec(gendiskp, field) \ + local_dec(&(part_stat_get(gendiskp, field))) +#define part_stat_local_inc(gendiskp, field) \ + local_inc(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read(gendiskp, field) \ + local_read(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read_cpu(gendiskp, field, cpu) \ + local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) + +#endif /* _LINUX_PART_STAT_H */ -- cgit v1.2.3 From 348e114bbd4dce430eae70f01a04c8fc259b4cf1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 09:07:17 +0100 Subject: block: move the ->devnode callback to struct block_device_operations There really isn't any good reason to stash a method directly into struct gendisk. Move it together with the other block device operations. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/genhd.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53a1325efbc3..e8defd718d62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1697,6 +1697,7 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); + char *(*devnode)(struct gendisk *disk, umode_t *mode); struct module *owner; const struct pr_ops *pr_ops; }; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 927eed0be179..85b9e253cd39 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -191,7 +191,6 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned short events; /* supported events */ unsigned short event_flags; /* flags related to event processing */ -- cgit v1.2.3 From 2f227bb99934a4faa6dfe2cda2594bce8897a323 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 09:30:08 +0100 Subject: block: add a blk_mq_init_queue_data helper This allows a driver to pass a queuedata member before ->init_hctx is called. null_blk currently open codes this logic, but I'd rather have it in the core to ease future maintainance. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 31344d5f83e2..f389d7c724bd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -412,6 +412,8 @@ enum { << BLK_MQ_F_ALLOC_POLICY_START_BIT) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, + void *queuedata); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q, bool elevator_init); -- cgit v1.2.3 From 3d745ea5b095a3985129e162900b7e6c22518a9d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 09:30:11 +0100 Subject: block: simplify queue allocation Current make_request based drivers use either blk_alloc_queue_node or blk_alloc_queue to allocate a queue, and then set up the make_request_fn function pointer and a few parameters using the blk_queue_make_request helper. Simplify this by passing the make_request pointer to blk_alloc_queue, and while at it merge the _node variant into the main helper by always passing a node_id, and remove the superfluous gfp_mask parameter. A lower-level __blk_alloc_queue is kept for the blk-mq case. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e8defd718d62..3f27ff08483e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1063,7 +1063,6 @@ extern void blk_abort_request(struct request *); * Access functions for manipulating queue properties */ extern void blk_cleanup_queue(struct request_queue *); -extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); @@ -1140,8 +1139,7 @@ extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); +struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- cgit v1.2.3 From 130879f1ee0e25b0391b8c78b3baac6fe41f4d38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 18:48:37 +0100 Subject: block: move bio_map_* to blk-map.c The bio_map_* helpers are just the low-level helpers for the blk_rq_map_* APIs. Move them together for better logical grouping, as no there isn't much overlap with other code in bio.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a430e9c1c2d2..c1c0f9ea4e63 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -441,14 +441,6 @@ void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_release_pages(struct bio *bio, bool mark_dirty); -struct rq_map_data; -extern struct bio *bio_map_user_iov(struct request_queue *, - struct iov_iter *, gfp_t); -extern void bio_unmap_user(struct bio *); -extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, - gfp_t); -extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, - gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); @@ -463,12 +455,6 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); - -extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, - struct iov_iter *, - gfp_t); -extern int bio_uncopy_user(struct bio *); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); -- cgit v1.2.3