summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-block3
-rw-r--r--Documentation/block/capability.rst10
-rw-r--r--Documentation/block/index.rst1
-rw-r--r--Documentation/block/ublk.rst55
-rw-r--r--MAINTAINERS1
-rw-r--r--block/Kconfig.iosched1
-rw-r--r--block/bfq-cgroup.c105
-rw-r--r--block/bfq-iosched.c629
-rw-r--r--block/bfq-iosched.h146
-rw-r--r--block/bfq-wf2q.c2
-rw-r--r--block/bio-integrity.c8
-rw-r--r--block/bio.c15
-rw-r--r--block/blk-cgroup.c150
-rw-r--r--block/blk-cgroup.h14
-rw-r--r--block/blk-core.c36
-rw-r--r--block/blk-crypto-sysfs.c2
-rw-r--r--block/blk-ia-ranges.c4
-rw-r--r--block/blk-integrity.c2
-rw-r--r--block/blk-iocost.c78
-rw-r--r--block/blk-iolatency.c37
-rw-r--r--block/blk-ioprio.c6
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c35
-rw-r--r--block/blk-mq-debugfs.c10
-rw-r--r--block/blk-mq-sched.c7
-rw-r--r--block/blk-mq-sysfs.c30
-rw-r--r--block/blk-mq.c152
-rw-r--r--block/blk-rq-qos.c67
-rw-r--r--block/blk-rq-qos.h66
-rw-r--r--block/blk-settings.c10
-rw-r--r--block/blk-stat.c3
-rw-r--r--block/blk-sysfs.c28
-rw-r--r--block/blk-throttle.c11
-rw-r--r--block/blk-wbt.c116
-rw-r--r--block/blk-wbt.h98
-rw-r--r--block/blk-zoned.c4
-rw-r--r--block/elevator.c4
-rw-r--r--block/fops.c21
-rw-r--r--block/genhd.c5
-rw-r--r--drivers/block/brd.c67
-rw-r--r--drivers/block/drbd/Makefile2
-rw-r--r--drivers/block/drbd/drbd_buildtag.c22
-rw-r--r--drivers/block/drbd/drbd_debugfs.c2
-rw-r--r--drivers/block/drbd/drbd_int.h13
-rw-r--r--drivers/block/drbd/drbd_interval.c6
-rw-r--r--drivers/block/drbd/drbd_main.c20
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_vli.h2
-rw-r--r--drivers/block/loop.c14
-rw-r--r--drivers/block/null_blk/main.c3
-rw-r--r--drivers/block/ps3vram.c7
-rw-r--r--drivers/block/rbd.c7
-rw-r--r--drivers/block/ublk_drv.c405
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/block/zram/zram_drv.c15
-rw-r--r--drivers/md/md.c65
-rw-r--r--drivers/md/md.h9
-rw-r--r--drivers/nvme/host/auth.c30
-rw-r--r--drivers/nvme/host/constants.c16
-rw-r--r--drivers/nvme/host/core.c123
-rw-r--r--drivers/nvme/host/fabrics.c19
-rw-r--r--drivers/nvme/host/ioctl.c9
-rw-r--r--drivers/nvme/host/nvme.h16
-rw-r--r--drivers/nvme/host/pci.c104
-rw-r--r--drivers/nvme/host/tcp.c7
-rw-r--r--drivers/nvme/target/admin-cmd.c2
-rw-r--r--drivers/nvme/target/io-cmd-file.c10
-rw-r--r--drivers/nvme/target/passthru.c5
-rw-r--r--drivers/nvme/target/tcp.c5
-rw-r--r--drivers/nvme/target/zns.c3
-rw-r--r--drivers/s390/block/dasd.c5
-rw-r--r--drivers/s390/block/dasd_3990_erp.c10
-rw-r--r--drivers/s390/block/dasd_alias.c6
-rw-r--r--drivers/s390/block/dasd_eckd.c104
-rw-r--r--drivers/s390/block/dasd_eer.c2
-rw-r--r--drivers/s390/block/dasd_fba.c14
-rw-r--r--drivers/s390/block/dcssblk.c4
-rw-r--r--drivers/scsi/sd.c36
-rw-r--r--drivers/target/target_core_file.c18
-rw-r--r--drivers/vhost/vringh.c5
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/ceph/file.c12
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/fscache.c16
-rw-r--r--fs/cifs/misc.c5
-rw-r--r--fs/cifs/smb2ops.c6
-rw-r--r--fs/coredump.c7
-rw-r--r--fs/nfs/fscache.c16
-rw-r--r--fs/orangefs/inode.c22
-rw-r--r--fs/splice.c5
-rw-r--r--include/linux/blkdev.h27
-rw-r--r--include/linux/bvec.h41
-rw-r--r--include/linux/drbd.h7
-rw-r--r--include/linux/drbd_config.h16
-rw-r--r--include/linux/drbd_genl_api.h2
-rw-r--r--include/linux/drbd_limits.h204
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/uapi/linux/ublk_cmd.h49
-rw-r--r--io_uring/rsrc.c4
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--lib/sbitmap.c102
-rw-r--r--mm/page_io.c8
-rw-r--r--mm/swapfile.c2
-rw-r--r--net/ceph/messenger_v1.c7
-rw-r--r--net/ceph/messenger_v2.c28
-rw-r--r--net/rxrpc/rxperf.c8
-rw-r--r--net/sunrpc/svcsock.c7
-rw-r--r--net/sunrpc/xdr.c5
109 files changed, 2224 insertions, 1599 deletions
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index cd14ecb3c9a5..ac1e519272aa 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -432,7 +432,8 @@ Contact: linux-block@vger.kernel.org
Description:
[RW] This is the maximum number of kilobytes that the block
layer will allow for a filesystem request. Must be smaller than
- or equal to the maximum size allowed by the hardware.
+ or equal to the maximum size allowed by the hardware. Write 0
+ to use default kernel settings.
What: /sys/block/<disk>/queue/max_segment_size
diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
deleted file mode 100644
index 2ae7f064736a..000000000000
--- a/Documentation/block/capability.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-===============================
-Generic Block Device Capability
-===============================
-
-This file documents the sysfs file ``block/<disk>/capability``.
-
-``capability`` is a bitfield, printed in hexadecimal, indicating which
-capabilities a specific block device supports:
-
-.. kernel-doc:: include/linux/blkdev.h
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
index c4c73db748a8..102953166429 100644
--- a/Documentation/block/index.rst
+++ b/Documentation/block/index.rst
@@ -10,7 +10,6 @@ Block
bfq-iosched
biovecs
blk-mq
- capability
cmdline-partition
data-integrity
deadline-iosched
diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst
index ba45c46cc0da..1713b2890abb 100644
--- a/Documentation/block/ublk.rst
+++ b/Documentation/block/ublk.rst
@@ -144,6 +144,43 @@ managing and controlling ublk devices with help of several control commands:
For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
responsibility to save IO target specific info in userspace.
+- ``UBLK_CMD_GET_DEV_INFO2``
+ Same purpose with ``UBLK_CMD_GET_DEV_INFO``, but ublk server has to
+ provide path of the char device of ``/dev/ublkc*`` for kernel to run
+ permission check, and this command is added for supporting unprivileged
+ ublk device, and introduced with ``UBLK_F_UNPRIVILEGED_DEV`` together.
+ Only the user owning the requested device can retrieve the device info.
+
+ How to deal with userspace/kernel compatibility:
+
+ 1) if kernel is capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
+
+ If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ublk server should send ``UBLK_CMD_GET_DEV_INFO2``, given anytime
+ unprivileged application needs to query devices the current user owns,
+ when the application has no idea if ``UBLK_F_UNPRIVILEGED_DEV`` is set
+ given the capability info is stateless, and application should always
+ retrieve it via ``UBLK_CMD_GET_DEV_INFO2``
+
+ If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
+ UBLK_F_UNPRIVILEGED_DEV isn't available for user
+
+ 2) if kernel isn't capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
+
+ If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ``UBLK_CMD_GET_DEV_INFO2`` is tried first, and will be failed, then
+ ``UBLK_CMD_GET_DEV_INFO`` needs to be retried given
+ ``UBLK_F_UNPRIVILEGED_DEV`` can't be set
+
+ If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
+ ``UBLK_F_UNPRIVILEGED_DEV`` isn't available for user
+
- ``UBLK_CMD_START_USER_RECOVERY``
This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
@@ -180,6 +217,15 @@ managing and controlling ublk devices with help of several control commands:
double-write since the driver may issue the same I/O request twice. It
might be useful to a read-only FS or a VM backend.
+Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
+Once the flag is set, all control commands can be sent by unprivileged
+user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
+the specified char device(``/dev/ublkc*``) is done for all other control
+commands by ublk driver, for doing that, path of the char device has to
+be provided in these commands' payload from ublk server. With this way,
+ublk device becomes container-ware, and device created in one container
+can be controlled/accessed just inside this container.
+
Data plane
----------
@@ -254,15 +300,6 @@ with specified IO tag in the command data:
Future development
==================
-Container-aware ublk deivice
-----------------------------
-
-ublk driver doesn't handle any IO logic. Its function is well defined
-for now and very limited userspace interfaces are needed, which is also
-well defined too. It is possible to make ublk devices container-aware block
-devices in future as Stefan Hajnoczi suggested [#stefan]_, by removing
-ADMIN privilege.
-
Zero copy
---------
diff --git a/MAINTAINERS b/MAINTAINERS
index 82938ca70466..23a8b5cde775 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6425,6 +6425,7 @@ T: git git://git.linbit.com/linux-drbd.git
T: git git://git.linbit.com/drbd-8.4.git
F: Documentation/admin-guide/blockdev/
F: drivers/block/drbd/
+F: include/linux/drbd*
F: lib/lru_cache.c
DRIVER COMPONENT FRAMEWORK
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 615516146086..27f11320b8d1 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -30,6 +30,7 @@ config IOSCHED_BFQ
config BFQ_GROUP_IOSCHED
bool "BFQ hierarchical scheduling support"
depends on IOSCHED_BFQ && BLK_CGROUP
+ default y
select BLK_CGROUP_RWSTAT
help
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 0fbde0fc0628..89ffb3aa992c 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -513,12 +513,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd)
kfree(cpd_to_bfqgd(cpd));
}
-static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
- struct blkcg *blkcg)
+static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk,
+ struct blkcg *blkcg, gfp_t gfp)
{
struct bfq_group *bfqg;
- bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
+ bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id);
if (!bfqg)
return NULL;
@@ -551,7 +551,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
bfqg->bfqd = bfqd;
bfqg->active_entities = 0;
bfqg->num_queues_with_pending_reqs = 0;
- bfqg->online = true;
bfqg->rq_pos_tree = RB_ROOT;
}
@@ -614,7 +613,7 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
continue;
}
bfqg = blkg_to_bfqg(blkg);
- if (bfqg->online) {
+ if (bfqg->pd.online) {
bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
return bfqg;
}
@@ -706,12 +705,52 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_activate_bfqq(bfqd, bfqq);
}
- if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
+ if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver)
bfq_schedule_dispatch(bfqd);
/* release extra ref taken above, bfqq may happen to be freed now */
bfq_put_queue(bfqq);
}
+static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
+ struct bfq_queue *sync_bfqq,
+ struct bfq_io_cq *bic,
+ struct bfq_group *bfqg,
+ unsigned int act_idx)
+{
+ struct bfq_queue *bfqq;
+
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ return;
+ }
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data != &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is not valid
+ * anymore. We cannot easily just cancel the merge (by
+ * clearing new_bfqq) as there may be other processes
+ * using this queue and holding refs to all queues
+ * below sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from bfqq now
+ * so that we cannot merge bio to a request from the
+ * old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bic_set_bfqq(bic, NULL, true, act_idx);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ }
+}
+
/**
* __bfq_bic_change_cgroup - move @bic to @bfqg.
* @bfqd: the queue descriptor.
@@ -726,53 +765,20 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
struct bfq_io_cq *bic,
struct bfq_group *bfqg)
{
- struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false);
- struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true);
- struct bfq_entity *entity;
+ unsigned int act_idx;
- if (async_bfqq) {
- entity = &async_bfqq->entity;
+ for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx);
+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx);
- if (entity->sched_data != &bfqg->sched_data)