// SPDX-License-Identifier: GPL-2.0
/*
* MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
* for the blk-mq scheduling framework
*
* Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/sbitmap.h>
#include <trace/events/block.h>
#include "elevator.h"
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
/*
* See Documentation/block/deadline-iosched.rst
*/
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
/*
* Time after which to dispatch lower priority requests even if higher
* priority requests are pending.
*/
static const int prio_aging_expire = 10 * HZ;
static const int writes_starved = 2; /* max times reads can starve a write */
static const int fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
enum dd_data_dir {
DD_READ = READ,
DD_WRITE = WRITE,
};
enum { DD_DIR_COUNT = 2 };
enum dd_prio {
DD_RT_PRIO = 0,
DD_BE_PRIO = 1,
DD_IDLE_PRIO = 2,
DD_PRIO_MAX = 2,
};
enum { DD_PRIO_COUNT = 3 };
/*
* I/O statistics per I/O priority. It is fine if these counters overflow.
* What matters is that these counters are at least as wide as
* log2(max_outstanding_requests).
*/
struct io_stats_per_prio {
uint32_t inserted;
uint32_t merged;
uint32_t dispatched;
atomic_t completed;
};
/*
* Deadline scheduler data per I/O priority (enum dd_prio). Requests are
* present on both sort_list[] and fifo_list[].
*/
struct dd_per_prio {
struct list_head dispatch;
struct rb_root sort_list[DD_DIR_COUNT];
struct list_head fifo_list[DD_DIR_COUNT];
/* Position of the most recently dispatched request. */
sector_t latest_pos[DD_DIR_COUNT];
struct io_stats_per_prio stats;
};
struct deadline_data {
/*
* run time data
*/
struct dd_per_prio per_prio[DD_PRIO_COUNT];
/* Data direction of latest dispatched request. */
enum dd_data_dir last_dir;
unsigned int batching; /* number of sequential requests made */
unsigned int starved; /* times reads have starved writes */
/*
* settings that change how the i/o scheduler behaves
*/
int fifo_expire[DD_DIR_COUNT];
int fifo_batch;
int writes_starved;
int front_merges;
u32 async_depth;
int prio_aging_expire;
spinlock_t lock;
};
/* Maps an I/O priority class to a deadline scheduler priority. */
static const enum dd_prio ioprio_class_to_prio[] = {
[IOPRIO_CLASS_NONE] = DD_BE_PRIO,
[IOPRIO_CLASS_RT] = DD_RT_PRIO,
[IOPRIO_CLASS_BE] = DD_BE_PRIO,
[IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
};
static inline struct rb_root *
deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
{
return &per_prio->sort_list[rq_data_dir(rq)];
}
/*
* Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
* request.
*/
static u8 dd_rq_ioclass(struct request *rq)
{
return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
}
/*
* Return the first request for which blk_rq_pos() >= @pos.
*/
static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
enum dd_data_dir data_dir, sector_t pos)
{
struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
struct request *rq, *res = NULL;
if (!node)
return NULL;
rq = rb_entry_rq(node);
while (node) {
rq = rb_entry_rq(node);
if (blk_rq_pos(rq) >= pos) {
res = rq;
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return res;
}
static void
deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
{
struct rb_root *root = deadline_rb_root(per_prio, rq);
elv_rb_add(root, rq);
}
static inline void
deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
{
elv_rb_del(deadline_rb_root(per_prio, rq), rq);
}
/*
* remove rq from rbtree and fifo.
*/
static void deadline_remove_request(struct request_queue *q,
struct dd_per_prio *per_prio,
struct request *rq)
{
list_del_init(&rq->queuelist);
/*
* We might not be on the rbtree, if we are doing an insert merge
*/
if (!RB_EMPTY_NODE(&rq->rb_node))
deadline_del_rq_rb(per_prio, rq);
elv_rqhash_del(q, rq);
if (q->last_merge == rq)
q->last_merge = NULL;
}
static void dd_request_merged(struct request_queue *q, struct request *req,
enum elv_merge type)
{
struct deadline_data *dd = q->elevator->elevator_data;
const u8 ioprio_class = dd_rq_ioclass(req);
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
struct dd_per_prio *per_prio = &dd->per_prio[