// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2017-2018 Christoph Hellwig.
*/
#include <linux/backing-dev.h>
#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
#include <trace/events/block.h>
#include "nvme.h"
bool multipath = true;
module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
static const char *nvme_iopolicy_names[] = {
[NVME_IOPOLICY_NUMA] = "numa",
[NVME_IOPOLICY_RR] = "round-robin",
[NVME_IOPOLICY_QD] = "queue-depth",
};
static int iopolicy = NVME_IOPOLICY_NUMA;
static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
{
if (!val)
return -EINVAL;
if (!strncmp(val, "numa", 4))
iopolicy = NVME_IOPOLICY_NUMA;
else if (!strncmp(val, "round-robin", 11))
iopolicy = NVME_IOPOLICY_RR;
else if (!strncmp(val, "queue-depth", 11))
iopolicy = NVME_IOPOLICY_QD;
else
return -EINVAL;
return 0;
}
static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
{
return sprintf(buf, "%s\n", nvme_iopolicy_names[iopolicy]);
}
module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
&iopolicy, 0644);
MODULE_PARM_DESC(iopolicy,
"Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
{
subsys->iopolicy = iopolicy;
}
void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
blk_mq_unfreeze_queue(h->disk->queue);
}
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
blk_mq_freeze_queue_wait(h->disk->queue);
}
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
blk_freeze_queue_start(h->disk->queue);
}
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
unsigned long flags;
struct bio *bio;
nvme_mpath_clear_current_path(ns);
/*
* If we got back an ANA error, we know the controller is alive but not
* ready to serve this namespace. Kick of a re-read of the ANA
* information page, and just try any other available path for now.
*/
if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
queue_work(nvme_wq, &ns->ctrl->ana_work);
}
spin_lock_irqsave(&ns->head->requeue_lock, flags);
for (bio = req->bio; bio; bio = bio->bi_next) {
bio_set_dev(bio, ns->head->disk->part0);
if (bio->bi_opf & REQ_POLLED) {
bio->bi_opf &= ~REQ_POLLED;
bio->bi_cookie = BLK_QC_T_NONE;
}
/*
* The alternate request queue that we may end up submitting
* the bio to may be frozen temporarily, in this case REQ_NOWAIT
* will fail the I/O immediately with EAGAIN to the issuer.
* We are not in the issuer context which cannot block. Clear
* the flag to avoid spurious EAGAIN I/O failures.
*/
bio->bi_opf &= ~REQ_NOWAIT;
}
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
nvme_req(req)->status = 0;
nvme_end_req(req);
kblockd_schedule_work(&ns->head->requeue_work);
}
void nvme_mpath_start_request(struct request *rq)
{
struct nvme_ns *ns = rq->q->queuedata;
struct gendisk *disk = ns->head->disk;
if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) {
atomic_inc(&ns->ctrl->nr_active);
nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
}
if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
return;
nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,