// SPDX-License-Identifier: GPL-2.0
/*
* Basic worker thread pool for io_uring
*
* Copyright (C) 2019 Jens Axboe
*
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/sched/signal.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/sched/mm.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/rculist_nulls.h>
#include "io-wq.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ)
enum {
IO_WORKER_F_UP = 1, /* up and active */
IO_WORKER_F_RUNNING = 2, /* account as running */
IO_WORKER_F_FREE = 4, /* worker on free list */
IO_WORKER_F_EXITING = 8, /* worker exiting */
IO_WORKER_F_FIXED = 16, /* static idle worker */
IO_WORKER_F_BOUND = 32, /* is doing bounded work */
};
enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */
IO_WQ_BIT_CANCEL = 1, /* cancel work on list */
IO_WQ_BIT_ERROR = 2, /* error on setup */
};
enum {
IO_WQE_FLAG_STALLED = 1, /* stalled on hash */
};
/*
* One for each thread in a wqe pool
*/
struct io_worker {
refcount_t ref;
unsigned flags;
struct hlist_nulls_node nulls_node;
struct list_head all_list;
struct task_struct *task;
struct io_wqe *wqe;
struct io_wq_work *cur_work;
spinlock_t lock;
struct rcu_head rcu;
struct mm_struct *mm;
const struct cred *creds;
struct files_struct *restore_files;
};
#if BITS_PER_LONG == 64
#define IO_WQ_HASH_ORDER 6
#else
#define IO_WQ_HASH_ORDER 5
#endif
struct io_wqe_acct {
unsigned nr_workers;
unsigned max_workers;
atomic_t nr_running;
};
enum {
IO_WQ_ACCT_BOUND,
IO_WQ_ACCT_UNBOUND,
};
/*
* Per-node worker thread pool
*/
struct io_wqe {
struct {
spinlock_t lock;
struct io_wq_work_list work_list;
unsigned long hash_map;
unsigned flags;
} ____cacheline_aligned_in_smp;
int node;
struct io_wqe_acct acct[2];
struct hlist_nulls_head free_list;
struct list_head all_list;
struct io_wq *wq;
};
/*
* Per io_wq state
*/
struct io_wq {
struct io_wqe **wqes;
unsigned long state;
get_work_fn *get_work;
put_work_fn *put_work;
struct task_struct *manager;
struct user_struct *user;
const struct cred *creds;
struct mm_struct *mm;
refcount_t refs;
struct completion done;
};
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
}
static void io_worker_release(struct io_worker *worker)
{
if (refcount_dec_and_test(&worker->ref))
wake_up_process(worker->task);
}
/*
* Note: drops the wqe->lock if returning true! The caller must re-acquire
* the lock in that case. Some callers need to restart handling if this
* happens, so we can't just re-acquire the lock on behalf of the caller.
*/
static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
{
bool dropped_lock = false;
if (worker->creds) {
revert_creds(worker->creds);
worker->creds = NULL;
}
if (current->files != worker->restore_files) {
__acquire(&wqe->lock);
spin_unlock_irq(&wqe->lock);
dropped_lock = true;
task_lock(current);
current->files = worker->restore_files;
task_unlock(current);
}
/*
* If we have an active mm, we need to drop the wq lock before unusing
* it. If we do, return true and let the caller retry the idle loop.
*/
if (worker->mm) {
if (!dropped_lock) {
__acquire(&wqe->lock);
spin_unlock_irq(&wqe->lock);
dropped_lock = true;
}
__set_current_state(TASK_RUNNING);
set_fs(KERNEL_DS);
unuse_mm(worker->mm);
mmput(worker->mm);
worker->mm = NULL;
}
return dropped_lock;
}
static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
struct io_wq_work *work)
{
if (work->flags & IO_WQ_WORK_UNBOUND)
return &wqe->acct[IO_WQ_ACCT_UNBOUND];
return &wqe->acct[IO_WQ_ACCT_BOUND];
}
static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
struct io_worker *worker)
{
if (worker->flags & IO_WORKER_F_BOUND)
return &wqe->acct[IO_WQ_ACCT_BOUND];
return &wqe->acct[IO_WQ_ACCT_UNBOUND];
}
static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
unsigned nr_workers;
/*
* If we're not at zero, someone else is holding a brief reference
* to the worker. Wait for that to go away.
*/
set_current_state(TASK_INTERRUPTIBLE);
if (!refcount_dec_and_test(&worker->ref))
schedule();
__set_current_state(TASK_RUNNING)