// SPDX-License-Identifier: GPL-2.0
/*
* gendisk handling
*
* Portions Copyright (C) 2020 Christoph Hellwig
*/
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/log2.h>
#include <linux/pm_runtime.h>
#include <linux/badblocks.h>
#include <linux/part_stat.h>
#include <linux/blktrace_api.h>
#include "blk-throttle.h"
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
#include "blk-cgroup.h"
static struct kobject *block_depr;
/*
* Unique, monotonically increasing sequential number associated with block
* devices instances (i.e. incremented each time a device is attached).
* Associating uevents with block devices in userspace is difficult and racy:
* the uevent netlink socket is lossy, and on slow and overloaded systems has
* a very high latency.
* Block devices do not have exclusive owners in userspace, any process can set
* one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
* can be reused again and again).
* A userspace process setting up a block device and watching for its events
* cannot thus reliably tell whether an event relates to the device it just set
* up or another earlier instance with the same name.
* This sequential number allows userspace processes to solve this problem, and
* uniquely associate an uevent to the lifetime to a device.
*/
static atomic64_t diskseq;
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
static DEFINE_IDA(ext_devt_ida);
void set_capacity(struct gendisk *disk, sector_t sectors)
{
if (sectors > BLK_DEV_MAX_SECTORS) {
pr_warn_once("%s: truncate capacity from %lld to %lld\n",
disk->disk_name, sectors,
BLK_DEV_MAX_SECTORS);
sectors = BLK_DEV_MAX_SECTORS;
}
bdev_set_nr_sectors(disk->part0, sectors);
}
EXPORT_SYMBOL(set_capacity);
/*
* Set disk capacity and notify if the size is not currently zero and will not
* be set to zero. Returns true if a uevent was sent, otherwise false.
*/
bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
{
sector_t capacity = get_capacity(disk);
char *envp[] = { "RESIZE=1", NULL };
set_capacity(disk, size);
/*
* Only print a message and send a uevent if the gendisk is user visible
* and alive. This avoids spamming the log and udev when setting the
* initial capacity during probing.
*/
if (size == capacity ||
!disk_live(disk) ||
(disk->flags & GENHD_FL_HIDDEN))
return false;
pr_info_ratelimited("%s: detected capacity change from %lld to %lld\n",
disk->disk_name, capacity, size);
/*
* Historically we did not send a uevent for changes to/from an empty
* device.
*/
if (!capacity || !size)
return false;
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
return true;
}
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
static void part_stat_read_all(struct block_device *part,
struct disk_stats *stat)
{
int cpu;
memset(stat, 0, sizeof(struct disk_stats));
for_each_possible_cpu(cpu) {
struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
int group;
for (group = 0; group < NR_STAT_GROUPS; group++) {
stat->nsecs[group] += ptr->nsecs[group];
stat->sectors[group] += ptr->sectors[group];
stat->ios[group] += ptr->ios[group];
stat->merges[group] += ptr->merges[group];
}
stat->io_ticks += ptr->io_ticks;
}
}
static void bdev_count_inflight_rw(struct block_device *part,
unsigned int inflight[2], bool mq_driver)
{
int write = 0;
int read = 0;
int cpu;
if (mq_driver) {
blk_mq_in_driver_rw(part, inflight);
return;
}
for_each_possible_cpu(cpu) {
read += part_stat_local_read_cpu(part, in_flight[READ], cpu);
write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu);
}
/*
* While iterating all CPUs, some IOs may be issued from a CPU already
* traversed and complete on a CPU that has not yet been traversed,
* causing the inflight number to be negative.
*/
inflight[READ] = read > 0 ? read : 0;
inflight[WRITE] = write > 0 ? write : 0;
}
/**
* bdev_count_inflight - get the number of inflight IOs for a block device.
*
* @part: the block device.
*
* Inflight here means started IO accounting, from bdev_start_io_acct() for
* bio-based block device, and from blk_account_io_start() for rq-based block
* device.
*/
unsigned int bdev_count_inflight(struct block_device *part)
{
unsigned int inflight[2] = {0};
bdev_count_inflight_rw(part, inflight, false);
return inflight[READ] + inflight[WRITE];
}
EXPORT_SYMBOL_GPL(bdev_count_inflight);
/*
* Can be deleted altogether. Later.
*
*/
#define BLKDEV_MAJOR_HASH_SIZE 255
static struct blk_major_name {
struct blk_major_name *next;
int major;
char name[16];
#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
void (*probe)(dev_t devt);
#endif
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
static DEFINE_MUTEX(major_names_lock);
static DEFINE_SPINLOCK(major_names_spinlock);
/* index in the above - for now: assume no multimajor ranges */
static inline int major_to_index(unsigned major)
{
return major % BLKDEV_MAJOR_HASH_SIZE;
}
#ifdef CONFIG_PROC_FS
void blkdev_show(struct seq_file *seqf, off_t offset)
{
struct blk_major_name *dp;
spin_lock(&major_names_spinlock);
for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
if (dp->major == offset)
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
spin_unlock(&major_names
|