summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-mq.c29
-rw-r--r--drivers/nvme/host/Kconfig13
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/nvme/host/ioctl.c68
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/nvme/target/pci-epf.c63
6 files changed, 94 insertions, 84 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ae8494d88897..c2697db59109 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2965,8 +2965,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,
static struct request *blk_mq_get_new_requests(struct request_queue *q,
struct blk_plug *plug,
- struct bio *bio,
- unsigned int nsegs)
+ struct bio *bio)
{
struct blk_mq_alloc_data data = {
.q = q,
@@ -3125,7 +3124,7 @@ new_request:
if (rq) {
blk_mq_use_cached_rq(rq, plug, bio);
} else {
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+ rq = blk_mq_get_new_requests(q, plug, bio);
if (unlikely(!rq)) {
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
@@ -4465,14 +4464,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
return NULL;
}
-static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
- struct request_queue *q)
+static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+ struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i, j;
- /* protect against switching io scheduler */
- mutex_lock(&q->elevator_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@@ -4505,7 +4502,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
- mutex_unlock(&q->elevator_lock);
+}
+
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+ struct request_queue *q, bool lock)
+{
+ if (lock) {
+ /* protect against switching io scheduler */
+ mutex_lock(&q->elevator_lock);
+ __blk_mq_realloc_hw_ctxs(set, q);
+ mutex_unlock(&q->elevator_lock);
+ } else {
+ __blk_mq_realloc_hw_ctxs(set, q);
+ }
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4537,7 +4546,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
xa_init(&q->hctx_table);
- blk_mq_realloc_hw_ctxs(set, q);
+ blk_mq_realloc_hw_ctxs(set, q, false);
if (!q->nr_hw_queues)
goto err_hctxs;
@@ -5033,7 +5042,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_realloc_hw_ctxs(set, q);
+ blk_mq_realloc_hw_ctxs(set, q, true);
if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 10e453b2436e..d47dfa80fb95 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -18,10 +18,15 @@ config NVME_MULTIPATH
bool "NVMe multipath support"
depends on NVME_CORE
help
- This option enables support for multipath access to NVMe
- subsystems. If this option is enabled only a single
- /dev/nvmeXnY device will show up for each NVMe namespace,
- even if it is accessible through multiple controllers.
+ This option controls support for multipath access to NVMe
+ subsystems. If this option is enabled support for NVMe multipath
+ access is included in the kernel. If this option is disabled support
+ for NVMe multipath access is excluded from the kernel. When this
+ option is disabled each controller/namespace receives its
+ own /dev/nvmeXnY device entry and NVMe multipath access is
+ not supported.
+
+ If unsure, say Y.
config NVME_VERBOSE_ERRORS
bool "NVMe verbose error reporting"
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c2d89fac86c5..cc23035148b4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
- "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
+ "Shared namespace support requires core_nvme.multipath=Y.\n");
}
}
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index ecf136489044..ca86d3bf7ea4 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- struct io_uring_cmd *ioucmd, unsigned int flags,
- unsigned int iou_issue_flags)
+ struct iov_iter *iter, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
@@ -129,37 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (!nvme_ctrl_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
if (has_metadata) {
- if (!supports_metadata) {
- ret = -EINVAL;
- goto out;
- }
+ if (!supports_metadata)
+ return -EINVAL;
+
if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device,
"using unchecked metadata buffer\n");
}
- if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
- struct iov_iter iter;
-
- /* fixedbufs is only for non-vectored io */
- if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) {
- ret = -EINVAL;
- goto out;
- }
- ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
- rq_data_dir(req), &iter, ioucmd,
- iou_issue_flags);
- if (ret < 0)
- goto out;
- ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
- } else {
+ if (iter)
+ ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
+ else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
- }
if (ret)
- goto out;
+ return ret;
bio = req->bio;
if (bdev)
@@ -176,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
out_unmap:
if (bio)
blk_rq_unmap_user(bio);
-out:
- blk_mq_free_request(req);
return ret;
}
@@ -200,9 +183,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
- meta_len, NULL, flags, 0);
+ meta_len, NULL, flags);
if (ret)
- return ret;
+ goto out_free_req;
}
bio = req->bio;
@@ -218,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,
if (effects)
nvme_passthru_end(ctrl, ns, effects, cmd, ret);
+ return ret;
+out_free_req:
+ blk_mq_free_request(req);
return ret;
}
@@ -469,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
+ struct iov_iter iter;
+ struct iov_iter *map_iter = NULL;
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
@@ -504,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
+ if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
+ /* fixedbufs is only for non-vectored io */
+ if (vec)
+ return -EINVAL;
+
+ ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
+ nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
+ issue_flags);
+ if (ret < 0)
+ return ret;
+
+ map_iter = &iter;
+ }
+
if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
@@ -517,11 +519,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
if (d.data_len) {
- ret = nvme_map_user_request(req, d.addr,
- d.data_len, nvme_to_user_ptr(d.metadata),
- d.metadata_len, ioucmd, vec, issue_flags);
+ ret = nvme_map_user_request(req, d.addr, d.data_len,
+ nvme_to_user_ptr(d.metadata), d.metadata_len,
+ map_iter, vec);
if (ret)
- return ret;
+ goto out_free_req;
}
/* to free bio on completion, as req->bio will be null at that time */
@@ -531,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;
+
+out_free_req:
+ blk_mq_free_request(req);
+ return ret;
}
static bool is_ctrl_ioctl(unsigned int cmd)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2883d17ee1eb..b178d52eac1b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
{
struct request *req;
+ if (rq_list_empty(rqlist))
+ return;
+
spin_lock(&nvmeq->sq_lock);
while ((req = rq_list_pop(rqlist))) {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c
index b54b3fdbe389..51c27b32248d 100644
--- a/drivers/nvme/target/pci-epf.c
+++ b/drivers/nvme/target/pci-epf.c
@@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
+ int ret;
if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
@@ -1298,6 +1299,24 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
if (status != NVME_SC_SUCCESS)
goto err;
+ /*
+ * Map the CQ PCI address space and since PCI endpoint controllers may
+ * return a partial mapping, check that the mapping is large enough.
+ */
+ ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size,
+ &cq->pci_map);
+ if (ret) {
+ dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n",
+ cq->qid, ret);
+ goto err_internal;
+ }
+
+ if (cq->pci_map.pci_size < cq->pci_size) {
+ dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
+ cq->qid);
+ goto err_unmap_queue;
+ }
+
set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
@@ -1305,6 +1324,10 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
return NVME_SC_SUCCESS;
+err_unmap_queue:
+ nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
+err_internal:
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
err:
if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
@@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
+ nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
return NVME_SC_SUCCESS;
}
@@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
ctrl->cq = NULL;
}
-static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
- struct nvmet_pci_epf_queue *queue)
-{
- struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
- int ret;
-
- ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
- queue->pci_size, &queue->pci_map);
- if (ret) {
- dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
- queue->qid, ret);
- return ret;
- }
-
- if (queue->pci_map.pci_size < queue->pci_size) {
- dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
- queue->qid);
- nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
- struct nvmet_pci_epf_queue *queue)
-{
- nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
-}
-
static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
{
struct nvmet_pci_epf_iod *iod =
@@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
struct nvme_completion *cqe;
struct nvmet_pci_epf_iod *iod;
unsigned long flags;
- int ret, n = 0;
-
- ret = nvmet_pci_epf_map_queue(ctrl, cq);
- if (ret)
- goto again;
+ int ret = 0, n = 0;
while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {
@@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
n++;
}
- nvmet_pci_epf_unmap_queue(ctrl, cq);
-
/*
* We do not support precise IRQ coalescing time (100ns units as per
* NVMe specifications). So if we have posted completion entries without
@@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
if (n)
nvmet_pci_epf_raise_irq(ctrl, cq, true);
-again:
if (ret < 0)
queue_delayed_work(system_highpri_wq, &cq->work,
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);