summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 14:12:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 14:12:17 -0800
commit80201fe175cbf7f3e372f53eba0a881a702ad926 (patch)
tree8026c68d52763614268a9c3c80759ad386bd5967
parent4221b807d1f73c03d22543416d303b60a5d1ef31 (diff)
parentaaeee62c841cc1e48231e1d60c304d2da9c4e41c (diff)
downloadlinux-80201fe175cbf7f3e372f53eba0a881a702ad926.tar.gz
linux-80201fe175cbf7f3e372f53eba0a881a702ad926.tar.bz2
linux-80201fe175cbf7f3e372f53eba0a881a702ad926.zip
Merge tag 'for-5.1/block-20190302' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "Not a huge amount of changes in this round, the biggest one is that we finally have Mings multi-page bvec support merged. Apart from that, this pull request contains: - Small series that avoids quiescing the queue for sysfs changes that match what we currently have (Aleksei) - Series of bcache fixes (via Coly) - Series of lightnvm fixes (via Mathias) - NVMe pull request from Christoph. Nothing major, just SPDX/license cleanups, RR mp policy (Hannes), and little fixes (Bart, Chaitanya). - BFQ series (Paolo) - Save blk-mq cpu -> hw queue mapping, removing a pointer indirection for the fast path (Jianchao) - fops->iopoll() added for async IO polling, this is a feature that the upcoming io_uring interface will use (Christoph, me) - Partition scan loop fixes (Dongli) - mtip32xx conversion from managed resource API (Christoph) - cdrom registration race fix (Guenter) - MD pull from Song, two minor fixes. - Various documentation fixes (Marcos) - Multi-page bvec feature. This brings a lot of nice improvements with it, like more efficient splitting, larger IOs can be supported without growing the bvec table size, and so on. (Ming) - Various little fixes to core and drivers" * tag 'for-5.1/block-20190302' of git://git.kernel.dk/linux-block: (117 commits) block: fix updating bio's front segment size block: Replace function name in string with __func__ nbd: propagate genlmsg_reply return code floppy: remove set but not used variable 'q' null_blk: fix checking for REQ_FUA block: fix NULL pointer dereference in register_disk fs: fix guard_bio_eod to check for real EOD errors blk-mq: use HCTX_TYPE_DEFAULT but not 0 to index blk_mq_tag_set->map block: optimize bvec iteration in bvec_iter_advance block: introduce mp_bvec_for_each_page() for iterating over page block: optimize blk_bio_segment_split for single-page bvec block: optimize __blk_segment_map_sg() for single-page bvec block: introduce bvec_nth_page() iomap: wire up the iopoll method block: add bio_set_polled() helper block: wire up block device iopoll method fs: add an iopoll method to struct file_operations loop: set GENHD_FL_NO_PART_SCAN after blkdev_reread_part() loop: do not print warn message if partition scan is successful block: bounce: make sure that bvec table is updated ...
-rw-r--r--Documentation/block/biovecs.txt25
-rw-r--r--Documentation/filesystems/vfs.txt3
-rw-r--r--block/bfq-iosched.c705
-rw-r--r--block/bfq-iosched.h11
-rw-r--r--block/bfq-wf2q.c18
-rw-r--r--block/bio.c49
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-merge.c231
-rw-r--r--block/blk-mq-debugfs.c3
-rw-r--r--block/blk-mq-sched.c2
-rw-r--r--block/blk-mq-tag.c2
-rw-r--r--block/blk-mq.c33
-rw-r--r--block/blk-mq.h20
-rw-r--r--block/blk-settings.c9
-rw-r--r--block/blk-sysfs.c22
-rw-r--r--block/blk.h2
-rw-r--r--block/bounce.c10
-rw-r--r--block/elevator.c5
-rw-r--r--block/genhd.c18
-rw-r--r--drivers/ata/libata-scsi.c2
-rw-r--r--drivers/block/floppy.c3
-rw-r--r--drivers/block/loop.c48
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c37
-rw-r--r--drivers/block/nbd.c5
-rw-r--r--drivers/block/null_blk_main.c3
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/skd_main.c1
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/cdrom/cdrom.c7
-rw-r--r--drivers/lightnvm/pblk-core.c8
-rw-r--r--drivers/lightnvm/pblk-gc.c20
-rw-r--r--drivers/lightnvm/pblk-init.c4
-rw-r--r--drivers/lightnvm/pblk-map.c1
-rw-r--r--drivers/lightnvm/pblk-rb.c26
-rw-r--r--drivers/lightnvm/pblk-recovery.c64
-rw-r--r--drivers/lightnvm/pblk-rl.c5
-rw-r--r--drivers/lightnvm/pblk-trace.h2
-rw-r--r--drivers/lightnvm/pblk-write.c1
-rw-r--r--drivers/lightnvm/pblk.h17
-rw-r--r--drivers/md/bcache/btree.c3
-rw-r--r--drivers/md/bcache/extents.c13
-rw-r--r--drivers/md/bcache/request.c7
-rw-r--r--drivers/md/bcache/stats.c2
-rw-r--r--drivers/md/bcache/super.c30
-rw-r--r--drivers/md/bcache/sysfs.c81
-rw-r--r--drivers/md/bcache/sysfs.h23
-rw-r--r--drivers/md/bcache/util.c6
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-crypt.c3
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-table.c13
-rw-r--r--drivers/md/md-linear.c3
-rw-r--r--drivers/md/raid1.c9
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/nvme/host/core.c122
-rw-r--r--drivers/nvme/host/fabrics.c11
-rw-r--r--drivers/nvme/host/fabrics.h10
-rw-r--r--drivers/nvme/host/fault_inject.c2
-rw-r--r--drivers/nvme/host/fc.c14
-rw-r--r--drivers/nvme/host/lightnvm.c16
-rw-r--r--drivers/nvme/host/multipath.c96
-rw-r--r--drivers/nvme/host/nvme.h21
-rw-r--r--drivers/nvme/host/pci.c12
-rw-r--r--drivers/nvme/host/rdma.c26
-rw-r--r--drivers/nvme/host/tcp.c10
-rw-r--r--drivers/nvme/host/trace.c10
-rw-r--r--drivers/nvme/host/trace.h10
-rw-r--r--drivers/nvme/target/admin-cmd.c10
-rw-r--r--drivers/nvme/target/configfs.c10
-rw-r--r--drivers/nvme/target/core.c10
-rw-r--r--drivers/nvme/target/discovery.c12
-rw-r--r--drivers/nvme/target/fabrics-cmd.c10
-rw-r--r--drivers/nvme/target/fc.c14
-rw-r--r--drivers/nvme/target/fcloop.c13
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c10
-rw-r--r--drivers/nvme/target/loop.c10
-rw-r--r--drivers/nvme/target/nvmet.h10
-rw-r--r--drivers/nvme/target/rdma.c10
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/staging/erofs/data.c3
-rw-r--r--drivers/staging/erofs/unzip_vle.c3
-rw-r--r--fs/block_dev.c28
-rw-r--r--fs/btrfs/compression.c3
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent_io.c16
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/raid56.c3
-rw-r--r--fs/buffer.c12
-rw-r--r--fs/crypto/bio.c3
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/exofs/ore.c3
-rw-r--r--fs/exofs/ore_raid.c3
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/ext4/readpage.c3
-rw-r--r--fs/f2fs/data.c9
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/lops.c6
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/iomap.c53
-rw-r--r--fs/mpage.c3
-rw-r--r--fs/xfs/xfs_aops.c9
-rw-r--r--fs/xfs/xfs_file.c1
-rw-r--r--include/linux/bio.h51
-rw-r--r--include/linux/blk-mq.h1
-rw-r--r--include/linux/blkdev.h68
-rw-r--r--include/linux/bvec.h123
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/nvme-fc-driver.h10
-rw-r--r--include/linux/nvme-fc.h14
-rw-r--r--include/linux/nvme-rdma.h10
-rw-r--r--include/linux/nvme-tcp.h2
-rw-r--r--include/linux/nvme.h10
-rw-r--r--include/uapi/linux/nvme_ioctl.h9
114 files changed, 1470 insertions, 1123 deletions
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt
index 25689584e6e0..ce6eccaf5df7 100644
--- a/Documentation/block/biovecs.txt
+++ b/Documentation/block/biovecs.txt
@@ -117,3 +117,28 @@ Other implications:
size limitations and the limitations of the underlying devices. Thus
there's no need to define ->merge_bvec_fn() callbacks for individual block
drivers.
+
+Usage of helpers:
+=================
+
+* The following helpers whose names have the suffix of "_all" can only be used
+on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
+shouldn't use them because the bio may have been split before it reached the
+driver.
+
+ bio_for_each_segment_all()
+ bio_first_bvec_all()
+ bio_first_page_all()
+ bio_last_bvec_all()
+
+* The following helpers iterate over single-page segment. The passed 'struct
+bio_vec' will contain a single-page IO vector during the iteration
+
+ bio_for_each_segment()
+ bio_for_each_segment_all()
+
+* The following helpers iterate over multi-page bvec. The passed 'struct
+bio_vec' will contain a multi-page IO vector during the iteration
+
+ bio_for_each_bvec()
+ rq_for_each_bvec()
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 8dc8e9c2913f..761c6fd24a53 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -857,6 +857,7 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ int (*iopoll)(struct kiocb *kiocb, bool spin);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
@@ -902,6 +903,8 @@ otherwise noted.
write_iter: possibly asynchronous write with iov_iter as source
+ iopoll: called when aio wants to poll for completions on HIPRI iocbs
+
iterate: called when the VFS needs to read the directory contents
iterate_shared: called when the VFS needs to read the directory contents
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index cd307767a134..4c592496a16a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -230,11 +230,16 @@ static struct kmem_cache *bfq_pool;
#define BFQ_MIN_TT (2 * NSEC_PER_MSEC)
/* hw_tag detection: parallel requests threshold and min samples needed. */
-#define BFQ_HW_QUEUE_THRESHOLD 4
+#define BFQ_HW_QUEUE_THRESHOLD 3
#define BFQ_HW_QUEUE_SAMPLES 32
#define BFQQ_SEEK_THR (sector_t)(8 * 100)
#define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
+#define BFQ_RQ_SEEKY(bfqd, last_pos, rq) \
+ (get_sdist(last_pos, rq) > \
+ BFQQ_SEEK_THR && \
+ (!blk_queue_nonrot(bfqd->queue) || \
+ blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT))
#define BFQQ_CLOSE_THR (sector_t)(8 * 1024)
#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19)
@@ -624,26 +629,6 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
}
/*
- * Tell whether there are active queues with different weights or
- * active groups.
- */
-static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
-{
- /*
- * For queue weights to differ, queue_weights_tree must contain
- * at least two nodes.
- */
- return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
- (bfqd->queue_weights_tree.rb_node->rb_left ||
- bfqd->queue_weights_tree.rb_node->rb_right)
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
- ) ||
- (bfqd->num_groups_with_pending_reqs > 0
-#endif
- );
-}
-
-/*
* The following function returns true if every queue must receive the
* same share of the throughput (this condition is used when deciding
* whether idling may be disabled, see the comments in the function
@@ -651,25 +636,48 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
*
* Such a scenario occurs when:
* 1) all active queues have the same weight,
- * 2) all active groups at the same level in the groups tree have the same
- * weight,
+ * 2) all active queues belong to the same I/O-priority class,
* 3) all active groups at the same level in the groups tree have the same
+ * weight,
+ * 4) all active groups at the same level in the groups tree have the same
* number of children.
*
* Unfortunately, keeping the necessary state for evaluating exactly
* the last two symmetry sub-conditions above would be quite complex
- * and time consuming. Therefore this function evaluates, instead,
- * only the following stronger two sub-conditions, for which it is
+ * and time consuming. Therefore this function evaluates, instead,
+ * only the following stronger three sub-conditions, for which it is
* much easier to maintain the needed state:
* 1) all active queues have the same weight,
- * 2) there are no active groups.
+ * 2) all active queues belong to the same I/O-priority class,
+ * 3) there are no active groups.
* In particular, the last condition is always true if hierarchical
* support or the cgroups interface are not enabled, thus no state
* needs to be maintained in this case.
*/
static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
{
- return !bfq_varied_queue_weights_or_active_groups(bfqd);
+ /*
+ * For queue weights to differ, queue_weights_tree must contain
+ * at least two nodes.
+ */
+ bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
+ (bfqd->queue_weights_tree.rb_node->rb_left ||
+ bfqd->queue_weights_tree.rb_node->rb_right);
+
+ bool multiple_classes_busy =
+ (bfqd->busy_queues[0] && bfqd->busy_queues[1]) ||
+ (bfqd->busy_queues[0] && bfqd->busy_queues[2]) ||
+ (bfqd->busy_queues[1] && bfqd->busy_queues[2]);