summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-11 10:19:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-11 10:19:51 -0700
commitc0f7e49fc480a97770e448e0c0493e7ba46a9852 (patch)
treebf53acb3db8c5b4d33320ff89ae3111460717dab
parent8177a5c96229ff24da1e362789e359b68b4f34f5 (diff)
parent221e8360834c59f0c9952630fa5904a94ebd2bb8 (diff)
downloadlinux-c0f7e49fc480a97770e448e0c0493e7ba46a9852.tar.gz
linux-c0f7e49fc480a97770e448e0c0493e7ba46a9852.tar.bz2
linux-c0f7e49fc480a97770e448e0c0493e7ba46a9852.zip
Merge tag 'block-5.15-2021-09-11' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe pull request from Christoph: - fix nvmet command set reporting for passthrough controllers (Adam Manzanares) - update a MAINTAINERS email address (Chaitanya Kulkarni) - set QUEUE_FLAG_NOWAIT for nvme-multipth (me) - handle errors from add_disk() (Luis Chamberlain) - update the keep alive interval when kato is modified (Tatsuya Sasaki) - fix a buffer overrun in nvmet_subsys_attr_serial (Hannes Reinecke) - do not reset transport on data digest errors in nvme-tcp (Daniel Wagner) - only call synchronize_srcu when clearing current path (Daniel Wagner) - revalidate paths during rescan (Hannes Reinecke) - Split out the fs/block_dev into block/fops.c and block/bdev.c, which has been long overdue. Do this now before -rc1, to avoid annoying conflicts due to this (Christoph) - blk-throtl use-after-free fix (Li) - Improve plug depth for multi-device plugs, greatly increasing md resync performance (Song) - blkdev_show() locking fix (Tetsuo) - n64cart error check fix (Yang) * tag 'block-5.15-2021-09-11' of git://git.kernel.dk/linux-block: n64cart: fix return value check in n64cart_probe() blk-mq: allow 4x BLK_MAX_REQUEST_COUNT at blk_plug for multiple_queues block: move fs/block_dev.c to block/bdev.c block: split out operations on block special files blk-throttle: fix UAF by deleteing timer in blk_throtl_exit() block: genhd: don't call blkdev_show() with major_names_lock held nvme: update MAINTAINERS email address nvme: add error handling support for add_disk() nvme: only call synchronize_srcu when clearing current path nvme: update keep alive interval when kato is modified nvme-tcp: Do not reset transport on data digest errors nvmet: fixup buffer overrun in nvmet_subsys_attr_serial() nvmet: return bool from nvmet_passthru_ctrl and nvmet_is_passthru_req nvmet: looks at the passthrough controller when initializing CAP nvme: move nvme_multi_css into nvme.h nvme-multipath: revalidate paths during rescan nvme-multipath: set QUEUE_FLAG_NOWAIT
-rw-r--r--Documentation/core-api/kernel-api.rst3
-rw-r--r--Documentation/filesystems/api-summary.rst3
-rw-r--r--MAINTAINERS3
-rw-r--r--block/Makefile2
-rw-r--r--block/bdev.c (renamed from fs/block_dev.c)641
-rw-r--r--block/blk-mq.c14
-rw-r--r--block/blk-throttle.c1
-rw-r--r--block/blk.h2
-rw-r--r--block/fops.c640
-rw-r--r--block/genhd.c9
-rw-r--r--drivers/block/n64cart.c4
-rw-r--r--drivers/nvme/host/core.c68
-rw-r--r--drivers/nvme/host/multipath.c19
-rw-r--r--drivers/nvme/host/nvme.h10
-rw-r--r--drivers/nvme/host/tcp.c22
-rw-r--r--drivers/nvme/target/admin-cmd.c2
-rw-r--r--drivers/nvme/target/configfs.c5
-rw-r--r--drivers/nvme/target/core.c10
-rw-r--r--drivers/nvme/target/nvmet.h11
-rw-r--r--drivers/nvme/target/passthru.c14
-rw-r--r--fs/Makefile2
-rw-r--r--fs/internal.h2
22 files changed, 805 insertions, 682 deletions
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 2a7444e3a4c2..2e7186805148 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -315,6 +315,9 @@ Block Devices
.. kernel-doc:: block/genhd.c
:export:
+.. kernel-doc:: block/bdev.c
+ :export:
+
Char devices
============
diff --git a/Documentation/filesystems/api-summary.rst b/Documentation/filesystems/api-summary.rst
index 7e5c04c98619..98db2ea5fa12 100644
--- a/Documentation/filesystems/api-summary.rst
+++ b/Documentation/filesystems/api-summary.rst
@@ -71,9 +71,6 @@ Other Functions
.. kernel-doc:: fs/fs-writeback.c
:export:
-.. kernel-doc:: fs/block_dev.c
- :export:
-
.. kernel-doc:: fs/anon_inodes.c
:export:
diff --git a/MAINTAINERS b/MAINTAINERS
index 51be43b2495b..7d46f03e5037 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3313,7 +3313,6 @@ S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
F: block/
F: drivers/block/
-F: fs/block_dev.c
F: include/linux/blk*
F: kernel/trace/blktrace.c
F: lib/sbitmap.c
@@ -13409,7 +13408,7 @@ F: include/linux/nvme-fc.h
NVM EXPRESS TARGET DRIVER
M: Christoph Hellwig <hch@lst.de>
M: Sagi Grimberg <sagi@grimberg.me>
-M: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+M: Chaitanya Kulkarni <kch@nvidia.com>
L: linux-nvme@lists.infradead.org
S: Supported
W: http://git.infradead.org/nvme.git
diff --git a/block/Makefile b/block/Makefile
index 6cf40276d3cf..41aa1ba69c90 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -3,7 +3,7 @@
# Makefile for the kernel block layer
#
-obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
+obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
diff --git a/fs/block_dev.c b/block/bdev.c
index 45df6cbccf12..cf2780cb44a7 100644
--- a/fs/block_dev.c
+++ b/block/bdev.c
@@ -7,12 +7,10 @@
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/fcntl.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/major.h>
#include <linux/device_cgroup.h>
-#include <linux/highmem.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/module.h>
@@ -20,30 +18,22 @@
#include <linux/magic.h>
#include <linux/buffer_head.h>
#include <linux/swap.h>
-#include <linux/pagevec.h>
#include <linux/writeback.h>
-#include <linux/mpage.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/uio.h>
#include <linux/namei.h>
-#include <linux/log2.h>
#include <linux/cleancache.h>
-#include <linux/task_io_accounting_ops.h>
-#include <linux/falloc.h>
#include <linux/part_stat.h>
#include <linux/uaccess.h>
-#include <linux/suspend.h>
-#include "internal.h"
-#include "../block/blk.h"
+#include "../fs/internal.h"
+#include "blk.h"
struct bdev_inode {
struct block_device bdev;
struct inode vfs_inode;
};
-static const struct address_space_operations def_blk_aops;
-
static inline struct bdev_inode *BDEV_I(struct inode *inode)
{
return container_of(inode, struct bdev_inode, vfs_inode);
@@ -194,332 +184,6 @@ int sb_min_blocksize(struct super_block *sb, int size)
EXPORT_SYMBOL(sb_min_blocksize);
-static int
-blkdev_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- bh->b_bdev = I_BDEV(inode);
- bh->b_blocknr = iblock;
- set_buffer_mapped(bh);
- return 0;
-}
-
-static struct inode *bdev_file_inode(struct file *file)
-{
- return file->f_mapping->host;
-}
-
-static unsigned int dio_bio_write_op(struct kiocb *iocb)
-{
- unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
-
- /* avoid the need for a I/O completion work item */
- if (iocb->ki_flags & IOCB_DSYNC)
- op |= REQ_FUA;
- return op;
-}
-
-#define DIO_INLINE_BIO_VECS 4
-
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
- struct task_struct *waiter = bio->bi_private;
-
- WRITE_ONCE(bio->bi_private, NULL);
- blk_wake_io_task(waiter);
-}
-
-static ssize_t
-__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
- unsigned int nr_pages)
-{
- struct file *file = iocb->ki_filp;
- struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
- loff_t pos = iocb->ki_pos;
- bool should_dirty = false;
- struct bio bio;
- ssize_t ret;
- blk_qc_t qc;
-
- if ((pos | iov_iter_alignment(iter)) &
- (bdev_logical_block_size(bdev) - 1))
- return -EINVAL;
-
- if (nr_pages <= DIO_INLINE_BIO_VECS)
- vecs = inline_vecs;
- else {
- vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
- GFP_KERNEL);
- if (!vecs)
- return -ENOMEM;
- }
-
- bio_init(&bio, vecs, nr_pages);
- bio_set_dev(&bio, bdev);
- bio.bi_iter.bi_sector = pos >> 9;
- bio.bi_write_hint = iocb->ki_hint;
- bio.bi_private = current;
- bio.bi_end_io = blkdev_bio_end_io_simple;
- bio.bi_ioprio = iocb->ki_ioprio;
-
- ret = bio_iov_iter_get_pages(&bio, iter);
- if (unlikely(ret))
- goto out;
- ret = bio.bi_iter.bi_size;
-
- if (iov_iter_rw(iter) == READ) {
- bio.bi_opf = REQ_OP_READ;
- if (iter_is_iovec(iter))
- should_dirty = true;
- } else {
- bio.bi_opf = dio_bio_write_op(iocb);
- task_io_account_write(ret);
- }
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio.bi_opf |= REQ_NOWAIT;
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(&bio, iocb);
-
- qc = submit_bio(&bio);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio.bi_private))
- break;
- if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc, true))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
-
- bio_release_pages(&bio, should_dirty);
- if (unlikely(bio.bi_status))
- ret = blk_status_to_errno(bio.bi_status);
-
-out:
- if (vecs != inline_vecs)
- kfree(vecs);
-
- bio_uninit(&bio);
-
- return ret;
-}
-
-struct blkdev_dio {
- union {
- struct kiocb *iocb;
- struct task_struct *waiter;
- };
- size_t size;
- atomic_t ref;
- bool multi_bio : 1;
- bool should_dirty : 1;
- bool is_sync : 1;
- struct bio bio;
-};
-
-static struct bio_set blkdev_dio_pool;
-
-static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
-{
- struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
- struct request_queue *q = bdev_get_queue(bdev);
-
- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
-}
-
-static void blkdev_bio_end_io(struct bio *bio)
-{
- struct blkdev_dio *dio = bio->bi_private;
- bool should_dirty = dio->should_dirty;
-
- if (bio->bi_status && !dio->bio.bi_status)
- dio->bio.bi_status = bio->bi_status;
-
- if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
- if (!dio->is_sync) {
- struct kiocb *iocb = dio->iocb;
- ssize_t ret;
-
- if (likely(!dio->bio.bi_status)) {
- ret = dio->size;
- iocb->ki_pos += ret;
- } else {
- ret = blk_status_to_errno(dio->bio.bi_status);
- }
-
- dio->iocb->ki_complete(iocb, ret, 0);
- if (dio->multi_bio)
- bio_put(&dio->bio);
- } else {
- struct task_struct *waiter = dio->waiter;
-
- WRITE_ONCE(dio->waiter, NULL);
- blk_wake_io_task(waiter);
- }
- }
-
- if (should_dirty) {
- bio_check_pages_dirty(bio);
- } else {
- bio_release_pages(bio, false);
- bio_put(bio);
- }
-}
-
-static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
- unsigned int nr_pages)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = bdev_file_inode(file);
- struct block_device *bdev = I_BDEV(inode);
- struct blk_plug plug;
- struct blkdev_dio *dio;
- struct bio *bio;
- bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
- bool is_read = (iov_iter_rw(iter) == READ), is_sync;
- loff_t pos = iocb->ki_pos;
- blk_qc_t qc = BLK_QC_T_NONE;
- int ret = 0;
-
- if ((pos | iov_iter_alignment(iter)) &
- (bdev_logical_block_size(bdev) - 1))
- return -EINVAL;
-
- bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
-
- dio = container_of(bio, struct blkdev_dio, bio);
- dio->is_sync = is_sync = is_sync_kiocb(iocb);
- if (dio->is_sync) {
- dio->waiter = current;
- bio_get(bio);
- } else {
- dio->iocb = iocb;
- }
-
- dio->size = 0;
- dio->multi_bio = false;
- dio->should_dirty = is_read && iter_is_iovec(iter);
-
- /*
- * Don't plug for HIPRI/polled IO, as those should go straight
- * to issue
- */
- if (!is_poll)
- blk_start_plug(&plug);
-
- for (;;) {
- bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = pos >> 9;
- bio->bi_write_hint = iocb->ki_hint;
- bio->bi_private = dio;
- bio->bi_end_io = blkdev_bio_end_io;
- bio->bi_ioprio = iocb->ki_ioprio;
-
- ret = bio_iov_iter_get_pages(bio, iter);
- if (unlikely(ret)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- break;
- }
-
- if (is_read) {
- bio->bi_opf = REQ_OP_READ;
- if (dio->should_dirty)
- bio_set_pages_dirty(bio);
- } else {
- bio->bi_opf = dio_bio_write_op(iocb);
- task_io_account_write(bio->bi_iter.bi_size);
- }
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio->bi_opf |= REQ_NOWAIT;
-
- dio->size += bio->bi_iter.bi_size;
- pos += bio->bi_iter.bi_size;
-
- nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
- if (!nr_pages) {
- bool polled = false;
-
- if (iocb->ki_flags & IOCB_HIPRI) {
- bio_set_polled(bio, iocb);
- polled = true;
- }
-
- qc = submit_bio(bio);
-
- if (polled)
- WRITE_ONCE(iocb->ki_cookie, qc);
- break;
- }
-
- if (!dio->multi_bio) {
- /*
- * AIO needs an extra reference to ensure the dio
- * structure which is embedded into the first bio
- * stays around.
- */
- if (!is_sync)
- bio_get(bio);
- dio->multi_bio = true;
- atomic_set(&dio->ref, 2);
- } else {
- atomic_inc(&dio->ref);
- }
-
- submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL, nr_pages);
- }
-
- if (!is_poll)
- blk_finish_plug(&plug);
-
- if (!is_sync)
- return -EIOCBQUEUED;
-
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(dio->waiter))
- break;
-
- if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc, true))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
-
- if (!ret)
- ret = blk_status_to_errno(dio->bio.bi_status);
- if (likely(!ret))
- ret = dio->size;
-
- bio_put(&dio->bio);
- return ret;
-}
-
-static ssize_t
-blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- unsigned int nr_pages;
-
- if (!iov_iter_count(iter))
- return 0;
-
- nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
- if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
- return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-
- return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
-}
-
-static __init int blkdev_init(void)
-{
- return bioset_init(&blkdev_dio_pool, 4,
- offsetof(struct blkdev_dio, bio),
- BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
-}
-module_init(blkdev_init);
-
int __sync_blockdev(struct block_device *bdev, int wait)
{
if (!bdev)
@@ -637,81 +301,6 @@ out:
}
EXPORT_SYMBOL(thaw_bdev);
-static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, blkdev_get_block, wbc);
-}
-
-static int blkdev_readpage(struct file * file, struct page * page)
-{
- return block_read_full_page(page, blkdev_get_block);
-}
-
-static void blkdev_readahead(struct readahead_control *rac)
-{
- mpage_readahead(rac, blkdev_get_block);
-}
-
-static int blkdev_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- return block_write_begin(mapping, pos, len, flags, pagep,
- blkdev_get_block);
-}
-
-static int blkdev_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- int ret;
- ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
- unlock_page(page);
- put_page(page);
-
- return ret;
-}
-
-/*
- * private llseek:
- * for a block special file file_inode(file)->i_size is zero
- * so we compute the size by hand (just as in block_read/write above)
- */
-static loff_t block_llseek(struct file *file, loff_t offset, int whence)
-{
- struct inode *bd_inode = bdev_file_inode(file);
- loff_t retval;
-
- inode_lock(bd_inode);
- retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
- inode_unlock(bd_inode);
- return retval;
-}
-
-static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync)
-{
- struct inode *bd_inode = bdev_file_inode(filp);
- struct block_device *bdev = I_BDEV(bd_inode);
- int error;
-
- error = file_write_and_wait_range(filp, start, end);
- if (error)
- return error;
-
- /*
- * There is no need to serialise calls to blkdev_issue_flush with
- * i_mutex and doing so causes performance issues with concurrent
- * O_SYNC writers to a block device.
- */
- error = blkdev_issue_flush(bdev);
- if (error == -EOPNOTSUPP)
- error = 0;
-
- return error;
-}
-
/**
* bdev_read_page() - Start reading a page from a block device
* @bdev: The device to read the page from
@@ -1305,35 +894,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
}
EXPORT_SYMBOL(blkdev_get_by_path);
-static int blkdev_open(struct inode * inode, struct file * filp)
-{
- struct block_device *bdev;
-
- /*
- * Preserve backwards compatibility and allow large file access
- * even if userspace doesn't ask for it explicitly. Some mkfs
- * binary needs it. We might want to drop this workaround
- * during an unstable branch.
- */
- filp->f_flags |= O_LARGEFILE;
-
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
-
- if (filp->f_flags & O_NDELAY)
- filp->f_mode |= FMODE_NDELAY;
- if (filp->f_flags & O_EXCL)
- filp->f_mode |= FMODE_EXCL;
- if ((filp->f_flags & O_ACCMODE) == 3)
- filp->f_mode |= FMODE_WRITE_IOCTL;
-
- bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- filp->f_mapping = bdev->bd_inode->i_mapping;
- filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
- return 0;
-}
-
void blkdev_put(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
@@ -1397,203 +957,6 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
}
EXPORT_SYMBOL(blkdev_put);
-static int blkdev_close(struct inode * inode, struct file * filp)
-{
- struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
- blkdev_put(bdev, filp->f_mode);
- return 0;
-}
-
-static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-{
- struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- fmode_t mode = file->f_mode;
-
- /*
- * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
- * to updated it before every ioctl.
- */
- if (file->f_flags & O_NDELAY)
- mode |= FMODE_NDELAY;
- else
- mode &= ~FMODE_NDELAY;
-
- return blkdev_ioctl(bdev, mode, cmd, arg);
-}
-
-/*
- * Write data to the block device. Only intended for the block device itself
- * and the raw driver which basically is a fake block device.
- *
- * Does not take i_mutex for the write and thus is not for general purpose
- * use.
- */
-static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct file *file = iocb->ki_filp;
- struct inode *bd_inode = bdev_file_inode(file);
- loff_t size = i_size_read(bd_inode);
- struct blk_plug plug;
- size_t shorted = 0;
- ssize_t ret;
-
- if (bdev_read_only(I_BDEV(bd_inode)))
- return -EPERM;
-
- if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
- return -ETXTBSY;
-
- if (!iov_iter_count(from))
- return 0;
-
- if (iocb->ki_pos >= size)
- return -ENOSPC;
-
- if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
- return -EOPNOTSUPP;
-
- size -= iocb->ki_pos;
- if (iov_iter_count(from) > size) {
- shorted = iov_iter_count(from) - size;
- iov_iter_truncate(from, size);
- }
-
- blk_start_plug(&plug);
- ret = __generic_file_write_iter(iocb, from);
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
- iov_iter_reexpand(from, iov_iter_count(from) + shorted);
- blk_finish_plug(&plug);
- return ret;
-}
-
-static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- struct file *file = iocb->ki_filp;
- struct inode *bd_inode = bdev_file_inode(file);
- loff_t size = i_size_read(bd_inode);
- loff_t pos = iocb->ki_pos;
- size_t shorted = 0;
- ssize_t ret;
-
- if (pos >= size)
- return 0;
-
- size -= pos;
- if (iov_iter_count(to) > size) {
- shorted = iov_iter_count(to) - size;
- iov_iter_truncate(to, size);
- }
-
- ret = generic_file_read_iter(iocb, to);
- iov_iter_reexpand(to, iov_iter_count(to) + shorted);
- return ret;
-}
-
-static int blkdev_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- return generic_writepages(mapping, wbc);
-}
-
-static const struct address_space_operations def_blk_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
- .readpage = blkdev_readpage,
- .readahead = blkdev_readahead,
- .writepage = blkdev_writepage,
- .write_begin = blkdev_write_begin,
- .write_end = blkdev_write_end,
- .writepages = blkdev_writepages,
- .direct_IO = blkdev_direct_IO,
- .migratepage = buffer_migrate_page_norefs,
- .is_dirty_writeback = buffer_check_dirty_writeback,
-};
-
-#define BLKDEV_FALLOC_FL_SUPPORTED \
- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
-
-static long blkdev_fallocate(struct file *file, int mode, loff_t start,
- loff_t len)
-{
- struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- loff_t end = start + len - 1;
- loff_t isize;
- int error;
-
- /* Fail if we don't recognize the flags. */
- if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
- return -EOPNOTSUPP;
-
- /* Don't go off the end of the device. */
- isize = i_size_read(bdev->bd_inode);
- if (start >= isize)
- return -EINVAL;
- if (end >= isize) {
- if (mode & FALLOC_FL_KEEP_SIZE) {
- len = isize - start;
- end = start + len - 1;
- } else
- return -EINVAL;
- }
-
- /*
- * Don't allow IO that isn't aligned to logical block size.
- */
- if ((start | len) & (bdev_logical_block_size(bdev) - 1))
- return -EINVAL;
-
- /* Invalidate the page cache, including dirty pages. */
- error = truncate_bdev_range(bdev, file->f_mode, start, end);
- if (error)
- return error;
-
- switch (mode) {
- case FALLOC_FL_ZERO_RANGE:
- case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
- GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
- break;
- case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
- GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
- break;
- case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
- error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
- GFP_KERNEL, 0);
- break;
- default:
- return -EOPNOTSUPP;
- }
- if (error)
- return error;
-
- /*
- * Invalidate the page cache again; if someone wandered in and dirtied
- * a page, we just discard it - userspace has no way of knowing whether
- * the write happened before or after discard completing...
- */
- return truncate_bdev_range(bdev, file->f_mode, start, end);
-}
-
-const struct file_operations def_blk_fops = {
- .open = blkdev_open,
- .release = blkdev_close,
- .llseek = block_llseek,
- .read_iter = blkdev_read_iter,
- .write_iter = blkdev_write_iter,
- .iopoll = blkdev_iopoll,
- .mmap = generic_file_mmap,
- .fsync = blkdev_fsync,
- .unlocked_ioctl = block_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = compat_blkdev_ioctl,
-#endif
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fallocate = blkdev_fallocate,
-};
-
/**
* lookup_bdev - lookup a struct block_device by name
* @pathname: special file representing the block device
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 65d3a63aecc6..108a352051be 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2135,6 +2135,18 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
}
}
+/*
+ * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+ if (plug->multiple_queues)
+ return BLK_MAX_REQUEST_COUNT * 4;
+ return BLK_MAX_REQUEST_COUNT;
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
@@ -2231,7 +2243,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
else
last = list_entry_rq(plug->mq_list.prev);
- if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
+ if (request_count >= blk_plug_max_rq_count(plug) || (last &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 55c49015e533..7c4e7993ba97 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2458,6 +2458,7 @@ int blk_throtl_init(struct request_queue *q)
void blk_throtl_exit(struct request_queue *q)
{
BUG_ON(!q->td);
+ del_timer_sync(&q->td->service_queue.pending_timer);
throtl_shutdown_wq(q);
blkcg_deactivate_policy(q, &blkcg_policy_throtl);
free_percpu(q->td->latency_buckets[READ]);
diff --git a/block/blk.h b/block/blk.h
index 8c96b0c90c48..7d2a0ba7ed21 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -373,4 +373,6 @@ static inline void bio_clear_hipri(struct bio *bio)
bio->bi_opf &= ~REQ_HIPRI;
}
+extern const struct address_space_operations def_blk_aops;
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/fops.c b/block/fops.c
new file mode 100644
index 000000000000..ffce6f6c68dd
--- /dev/null
+++ b/block/fops.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2016 - 2020 Christoph Hellwig
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/uio.h>
+#include <linux/namei.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/falloc.h>
+#include <linux/suspend.h>
+#include "blk.h"
+
+static struct inode *bdev_file_inode(struct file *file)
+{
+ return file->f_mapping->host;
+}
+
+static int blkdev_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ bh->b_bdev = I_BDEV(inode);
+ bh->b_blocknr = iblock;
+ set_buffer_mapped(bh);
+ return 0;
+}
+
+static unsigned int dio_bio_write_op(struct kiocb *iocb)
+{
+ unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+
+ /* avoid the need for a I/O completion work item */
+ if (iocb->ki_flags & IOCB_DSYNC)
+ op |= REQ_FUA;
+ return op;
+}
+
+#define DIO_INLINE_BIO_VECS 4
+
+static void blkdev_bio_end_io_simple(struct bio *bio)
+{
+ struct task_struct *waiter = bio->bi_private;
+
+ WRITE_ONCE(bio->bi_private, NULL);
+ blk_wake_io_task(waiter);
+}
+
+static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
+ struct iov_iter *iter, unsigned int nr_pages)
+{
+ struct file *file = iocb->ki_filp;
+ struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+ struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+ loff_t pos = iocb->ki_pos;
+ bool should_dirty = false;
+ struct bio bio;
+ ssize_t ret;
+ blk_qc_t qc;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ if (nr_pages <= DIO_INLINE_BIO_VECS)
+ vecs = inline_vecs;
+ else {
+ vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ if (!vecs)
+ return -ENOMEM;
+ }
+
+ bio_init(&bio, vecs, nr_pages);
+ bio_set_dev(&bio, bdev);
+ bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_write_hint = iocb->ki_hint;
+ bio.bi_private = current;
+ bio.bi_end_io = blkdev_bio_end_io_simple;
+ bio.bi_ioprio = iocb->ki_ioprio;
+
+ ret = bio_iov_iter_get_pages(&bio, iter);
+ if (unlikely(ret))
+ goto out;
+ ret = bio.bi_iter.bi_size;
+
+ if (iov_iter_rw(iter) == READ) {
+ bio.bi_opf = REQ_OP_READ;
+ if (iter_is_iovec(iter))
+ should_dirty = true;
+ } else {
+ bio.bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(ret);
+ }
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio.bi_opf |= REQ_NOWAIT;
+ if (iocb->ki_flags & IOCB_HIPRI)