diff options
109 files changed, 2224 insertions, 1599 deletions
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index cd14ecb3c9a5..ac1e519272aa 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -432,7 +432,8 @@ Contact: linux-block@vger.kernel.org Description: [RW] This is the maximum number of kilobytes that the block layer will allow for a filesystem request. Must be smaller than - or equal to the maximum size allowed by the hardware. + or equal to the maximum size allowed by the hardware. Write 0 + to use default kernel settings. What: /sys/block/<disk>/queue/max_segment_size diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst deleted file mode 100644 index 2ae7f064736a..000000000000 --- a/Documentation/block/capability.rst +++ /dev/null @@ -1,10 +0,0 @@ -=============================== -Generic Block Device Capability -=============================== - -This file documents the sysfs file ``block/<disk>/capability``. - -``capability`` is a bitfield, printed in hexadecimal, indicating which -capabilities a specific block device supports: - -.. kernel-doc:: include/linux/blkdev.h diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst index c4c73db748a8..102953166429 100644 --- a/Documentation/block/index.rst +++ b/Documentation/block/index.rst @@ -10,7 +10,6 @@ Block bfq-iosched biovecs blk-mq - capability cmdline-partition data-integrity deadline-iosched diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index ba45c46cc0da..1713b2890abb 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -144,6 +144,43 @@ managing and controlling ublk devices with help of several control commands: For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's responsibility to save IO target specific info in userspace. +- ``UBLK_CMD_GET_DEV_INFO2`` + Same purpose with ``UBLK_CMD_GET_DEV_INFO``, but ublk server has to + provide path of the char device of ``/dev/ublkc*`` for kernel to run + permission check, and this command is added for supporting unprivileged + ublk device, and introduced with ``UBLK_F_UNPRIVILEGED_DEV`` together. + Only the user owning the requested device can retrieve the device info. + + How to deal with userspace/kernel compatibility: + + 1) if kernel is capable of handling ``UBLK_F_UNPRIVILEGED_DEV`` + + If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``: + + ublk server should send ``UBLK_CMD_GET_DEV_INFO2``, given anytime + unprivileged application needs to query devices the current user owns, + when the application has no idea if ``UBLK_F_UNPRIVILEGED_DEV`` is set + given the capability info is stateless, and application should always + retrieve it via ``UBLK_CMD_GET_DEV_INFO2`` + + If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``: + + ``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of + UBLK_F_UNPRIVILEGED_DEV isn't available for user + + 2) if kernel isn't capable of handling ``UBLK_F_UNPRIVILEGED_DEV`` + + If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``: + + ``UBLK_CMD_GET_DEV_INFO2`` is tried first, and will be failed, then + ``UBLK_CMD_GET_DEV_INFO`` needs to be retried given + ``UBLK_F_UNPRIVILEGED_DEV`` can't be set + + If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``: + + ``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of + ``UBLK_F_UNPRIVILEGED_DEV`` isn't available for user + - ``UBLK_CMD_START_USER_RECOVERY`` This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This @@ -180,6 +217,15 @@ managing and controlling ublk devices with help of several control commands: double-write since the driver may issue the same I/O request twice. It might be useful to a read-only FS or a VM backend. +Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``. +Once the flag is set, all control commands can be sent by unprivileged +user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on +the specified char device(``/dev/ublkc*``) is done for all other control +commands by ublk driver, for doing that, path of the char device has to +be provided in these commands' payload from ublk server. With this way, +ublk device becomes container-ware, and device created in one container +can be controlled/accessed just inside this container. + Data plane ---------- @@ -254,15 +300,6 @@ with specified IO tag in the command data: Future development ================== -Container-aware ublk deivice ----------------------------- - -ublk driver doesn't handle any IO logic. Its function is well defined -for now and very limited userspace interfaces are needed, which is also -well defined too. It is possible to make ublk devices container-aware block -devices in future as Stefan Hajnoczi suggested [#stefan]_, by removing -ADMIN privilege. - Zero copy --------- diff --git a/MAINTAINERS b/MAINTAINERS index 82938ca70466..23a8b5cde775 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6425,6 +6425,7 @@ T: git git://git.linbit.com/linux-drbd.git T: git git://git.linbit.com/drbd-8.4.git F: Documentation/admin-guide/blockdev/ F: drivers/block/drbd/ +F: include/linux/drbd* F: lib/lru_cache.c DRIVER COMPONENT FRAMEWORK diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 615516146086..27f11320b8d1 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -30,6 +30,7 @@ config IOSCHED_BFQ config BFQ_GROUP_IOSCHED bool "BFQ hierarchical scheduling support" depends on IOSCHED_BFQ && BLK_CGROUP + default y select BLK_CGROUP_RWSTAT help diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 0fbde0fc0628..89ffb3aa992c 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -513,12 +513,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd) kfree(cpd_to_bfqgd(cpd)); } -static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; - bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node); + bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; @@ -551,7 +551,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd) bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; - bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -614,7 +613,7 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) continue; } bfqg = blkg_to_bfqg(blkg); - if (bfqg->online) { + if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } @@ -706,12 +705,52 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_activate_bfqq(bfqd, bfqq); } - if (!bfqd->in_service_queue && !bfqd->rq_in_driver) + if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } +static void bfq_sync_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *sync_bfqq, + struct bfq_io_cq *bic, + struct bfq_group *bfqg, + unsigned int act_idx) +{ + struct bfq_queue *bfqq; + + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + return; + } + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is not valid + * anymore. We cannot easily just cancel the merge (by + * clearing new_bfqq) as there may be other processes + * using this queue and holding refs to all queues + * below sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from bfqq now + * so that we cannot merge bio to a request from the + * old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bic_set_bfqq(bic, NULL, true, act_idx); + bfq_release_process_ref(bfqd, sync_bfqq); + } +} + /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. @@ -726,53 +765,20 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { - struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false); - struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true); - struct bfq_entity *entity; + unsigned int act_idx; - if (async_bfqq) { - entity = &async_bfqq->entity; + for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { + struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); + struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); - if (entity->sched_data != &bfqg->sched_data) |
