summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-09-24 13:44:39 -0600
committerJens Axboe <axboe@kernel.dk>2020-09-24 13:44:39 -0600
commitac8f7a0264404c3e982ccabdc173d46d89ee7ea1 (patch)
tree97873bf5946b541fbf203bc88c0281ace1981ef2
parent805c6d3c19210c90c109107d189744e960eae025 (diff)
parentf56753ac2a90810726334df04d735e9f8f5a32d9 (diff)
downloadlinux-ac8f7a0264404c3e982ccabdc173d46d89ee7ea1.tar.gz
linux-ac8f7a0264404c3e982ccabdc173d46d89ee7ea1.tar.bz2
linux-ac8f7a0264404c3e982ccabdc173d46d89ee7ea1.zip
Merge branch 'for-5.10/block' into for-5.10/drivers
* for-5.10/block: (140 commits) bdi: replace BDI_CAP_NO_{WRITEBACK,ACCT_DIRTY} with a single flag bdi: invert BDI_CAP_NO_ACCT_WB bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag mm: use SWP_SYNCHRONOUS_IO more intelligently bdi: remove BDI_CAP_SYNCHRONOUS_IO bdi: remove BDI_CAP_CGROUP_WRITEBACK block: lift setting the readahead size into the block layer md: update the optimal I/O size on reshape bdi: initialize ->ra_pages and ->io_pages in bdi_init aoe: set an optimal I/O size bcache: inherit the optimal I/O size drbd: remove dead code in device_to_statistics fs: remove the unused SB_I_MULTIROOT flag block: mark blkdev_get static PM: mm: cleanup swsusp_swap_check mm: split swap_type_of PM: rewrite is_hibernate_resume_dev to not require an inode mm: cleanup claim_swapfile ocfs2: cleanup o2hb_region_dev_store dasd: cleanup dasd_scan_partitions ...
-rw-r--r--Documentation/filesystems/locking.rst3
-rw-r--r--block/Kconfig2
-rw-r--r--block/bfq-iosched.c9
-rw-r--r--block/blk-cgroup.c32
-rw-r--r--block/blk-core.c239
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-iocost.c1547
-rw-r--r--block/blk-map.c177
-rw-r--r--block/blk-merge.c201
-rw-r--r--block/blk-mq-debugfs.c11
-rw-r--r--block/blk-mq-sched.c124
-rw-r--r--block/blk-mq-sched.h3
-rw-r--r--block/blk-mq-tag.c152
-rw-r--r--block/blk-mq-tag.h56
-rw-r--r--block/blk-mq.c86
-rw-r--r--block/blk-mq.h76
-rw-r--r--block/blk-settings.c40
-rw-r--r--block/blk-sysfs.c277
-rw-r--r--block/blk-throttle.c59
-rw-r--r--block/blk.h25
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/genhd.c156
-rw-r--r--block/ioctl.c29
-rw-r--r--block/ioprio.c2
-rw-r--r--block/kyber-iosched.c6
-rw-r--r--block/mq-deadline.c6
-rw-r--r--block/partitions/core.c27
-rw-r--r--block/scsi_ioctl.c2
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/aoe/aoeblk.c3
-rw-r--r--drivers/block/aoe/aoecmd.c4
-rw-r--r--drivers/block/ataflop.c7
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/drbd/drbd_nl.c16
-rw-r--r--drivers/block/floppy.c8
-rw-r--r--drivers/block/loop.c4
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/block/pktcdvd.c94
-rw-r--r--drivers/block/rbd.c4
-rw-r--r--drivers/block/rnbd/rnbd-clt.c12
-rw-r--r--drivers/block/swim.c22
-rw-r--r--drivers/block/swim3.c4
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/block/xsysace.c26
-rw-r--r--drivers/block/zram/zram_drv.c30
-rw-r--r--drivers/cdrom/gdrom.c2
-rw-r--r--drivers/char/raw.c56
-rw-r--r--drivers/ide/ide-cd.c16
-rw-r--r--drivers/ide/ide-disk.c5
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-gd.c48
-rw-r--r--drivers/md/bcache/request.c10
-rw-r--r--drivers/md/bcache/super.c5
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-table.c9
-rw-r--r--drivers/md/dm.c15
-rw-r--r--drivers/md/md-cluster.c6
-rw-r--r--drivers/md/md-linear.c2
-rw-r--r--drivers/md/md.c20
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid0.c16
-rw-r--r--drivers/md/raid10.c46
-rw-r--r--drivers/md/raid5.c31
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/nvdimm/blk.c3
-rw-r--r--drivers/nvdimm/btt.c5
-rw-r--r--drivers/nvdimm/bus.c9
-rw-r--r--drivers/nvdimm/nd.h2
-rw-r--r--drivers/nvdimm/pmem.c4
-rw-r--r--drivers/nvme/host/core.c53
-rw-r--r--drivers/nvme/host/multipath.c10
-rw-r--r--drivers/nvme/host/nvme.h13
-rw-r--r--drivers/s390/block/dasd_genhd.c15
-rw-r--r--drivers/s390/block/dasd_ioctl.c9
-rw-r--r--drivers/scsi/iscsi_tcp.c4
-rw-r--r--drivers/scsi/sd.c13
-rw-r--r--drivers/scsi/sr.c36
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/9p/vfs_super.c6
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/block_dev.c175
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/buffer.c16
-rw-r--r--fs/fs-writeback.c7
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/namei.c4
-rw-r--r--fs/nfs/super.c9
-rw-r--r--fs/ocfs2/cluster/heartbeat.c28
-rw-r--r--fs/super.c2
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/vboxsf/super.c2
-rw-r--r--include/linux/backing-dev.h78
-rw-r--r--include/linux/blk-mq.h15
-rw-r--r--include/linux/blk_types.h7
-rw-r--r--include/linux/blkdev.h44
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/genhd.h15
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/suspend.h4
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/trace/events/iocost.h26
-rw-r--r--include/uapi/linux/capability.h2
-rw-r--r--kernel/power/swap.c21
-rw-r--r--kernel/power/user.c26
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--mm/backing-dev.c14
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/page-writeback.c18
-rw-r--r--mm/page_io.c18
-rw-r--r--mm/swapfile.c49
-rw-r--r--tools/cgroup/iocost_monitor.py54
117 files changed, 2663 insertions, 2094 deletions
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 64f94a18d97e..c0f2c7586531 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -488,9 +488,6 @@ getgeo: no
swap_slot_free_notify: no (see below)
======================= ===================
-unlock_native_capacity and revalidate_disk are called only from
-check_disk_change().
-
swap_slot_free_notify is called with swap_lock and sometimes the page lock
held.
diff --git a/block/Kconfig b/block/Kconfig
index bbad5e8bbffe..a2297edfdde8 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -161,8 +161,6 @@ config BLK_WBT_MQ
depends on BLK_WBT
help
Enable writeback throttling by default on multiqueue devices.
- Multiqueue currently doesn't have support for IO scheduling,
- enabling this option is recommended.
config BLK_DEBUG_FS
bool "Block layer debugging information in debugfs"
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fa98470df3f0..9e81d1052091 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4640,6 +4640,9 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+ if (!atomic_read(&hctx->elevator_queued))
+ return false;
+
/*
* Avoiding lock: a race on bfqd->busy_queues should cause at
* most a call to dispatch for nothing
@@ -5554,6 +5557,7 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist);
bfq_insert_request(hctx, rq, at_head);
+ atomic_inc(&hctx->elevator_queued);
}
}
@@ -5921,6 +5925,7 @@ static void bfq_finish_requeue_request(struct request *rq)
bfq_completed_request(bfqq, bfqd);
bfq_finish_requeue_request_body(bfqq);
+ atomic_dec(&rq->mq_hctx->elevator_queued);
spin_unlock_irqrestore(&bfqd->lock, flags);
} else {
@@ -6360,8 +6365,8 @@ static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
struct blk_mq_tags *tags = hctx->sched_tags;
unsigned int min_shallow;
- min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
- sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
+ min_shallow = bfq_update_depths(bfqd, tags->bitmap_tags);
+ sbitmap_queue_min_shallow_depth(tags->bitmap_tags, min_shallow);
}
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c195365c9817..f9b55614d67d 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -119,6 +119,8 @@ static void blkg_async_bio_workfn(struct work_struct *work)
async_bio_work);
struct bio_list bios = BIO_EMPTY_LIST;
struct bio *bio;
+ struct blk_plug plug;
+ bool need_plug = false;
/* as long as there are pending bios, @blkg can't go away */
spin_lock_bh(&blkg->async_bio_lock);
@@ -126,8 +128,15 @@ static void blkg_async_bio_workfn(struct work_struct *work)
bio_list_init(&blkg->async_bios);
spin_unlock_bh(&blkg->async_bio_lock);
+ /* start plug only when bio_list contains at least 2 bios */
+ if (bios.head && bios.head->bi_next) {
+ need_plug = true;
+ blk_start_plug(&plug);
+ }
while ((bio = bio_list_pop(&bios)))
submit_bio(bio);
+ if (need_plug)
+ blk_finish_plug(&plug);
}
/**
@@ -1613,16 +1622,24 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
{
unsigned long pflags;
+ bool clamp;
u64 now = ktime_to_ns(ktime_get());
u64 exp;
u64 delay_nsec = 0;
int tok;
while (blkg->parent) {
- if (atomic_read(&blkg->use_delay)) {
+ int use_delay = atomic_read(&blkg->use_delay);
+
+ if (use_delay) {
+ u64 this_delay;
+
blkcg_scale_delay(blkg, now);
- delay_nsec = max_t(u64, delay_nsec,
- atomic64_read(&blkg->delay_nsec));
+ this_delay = atomic64_read(&blkg->delay_nsec);
+ if (this_delay > delay_nsec) {
+ delay_nsec = this_delay;
+ clamp = use_delay > 0;
+ }
}
blkg = blkg->parent;
}
@@ -1634,10 +1651,13 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
* Let's not sleep for all eternity if we've amassed a huge delay.
* Swapping or metadata IO can accumulate 10's of seconds worth of
* delay, and we want userspace to be able to do _something_ so cap the
- * delays at 1 second. If there's 10's of seconds worth of delay then
- * the tasks will be delayed for 1 second for every syscall.
+ * delays at 0.25s. If there's 10's of seconds worth of delay then the
+ * tasks will be delayed for 0.25 second for every syscall. If
+ * blkcg_set_delay() was used as indicated by negative use_delay, the
+ * caller is responsible for regulating the range.
*/
- delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
+ if (clamp)
+ delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
if (use_memdelay)
psi_memstall_enter(&pflags);
diff --git a/block/blk-core.c b/block/blk-core.c
index 10c08ac50697..1cc4fa6bc7fe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -116,8 +116,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->__sector = (sector_t) -1;
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
- rq->tag = -1;
- rq->internal_tag = -1;
+ rq->tag = BLK_MQ_NO_TAG;
+ rq->internal_tag = BLK_MQ_NO_TAG;
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
refcount_set(&rq->ref, 1);
@@ -538,11 +538,10 @@ struct request_queue *blk_alloc_queue(int node_id)
if (!q->stats)