diff options
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/alloc_cache.h | 39 | ||||
-rw-r--r-- | io_uring/filetable.c | 21 | ||||
-rw-r--r-- | io_uring/io-wq.c | 524 | ||||
-rw-r--r-- | io_uring/io_uring.c | 348 | ||||
-rw-r--r-- | io_uring/io_uring.h | 49 | ||||
-rw-r--r-- | io_uring/kbuf.c | 160 | ||||
-rw-r--r-- | io_uring/kbuf.h | 7 | ||||
-rw-r--r-- | io_uring/net.h | 5 | ||||
-rw-r--r-- | io_uring/notif.c | 8 | ||||
-rw-r--r-- | io_uring/notif.h | 3 | ||||
-rw-r--r-- | io_uring/poll.c | 32 | ||||
-rw-r--r-- | io_uring/rsrc.c | 350 | ||||
-rw-r--r-- | io_uring/rsrc.h | 72 | ||||
-rw-r--r-- | io_uring/rw.c | 8 | ||||
-rw-r--r-- | io_uring/timeout.c | 71 | ||||
-rw-r--r-- | io_uring/uring_cmd.c | 18 |
16 files changed, 902 insertions, 813 deletions
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index c2cde88aeed5..241245cb54a6 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -7,47 +7,60 @@ #define IO_ALLOC_CACHE_MAX 512 struct io_cache_entry { - struct hlist_node node; + struct io_wq_work_node node; }; static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, struct io_cache_entry *entry) { - if (cache->nr_cached < IO_ALLOC_CACHE_MAX) { + if (cache->nr_cached < cache->max_cached) { cache->nr_cached++; - hlist_add_head(&entry->node, &cache->list); + wq_stack_add_head(&entry->node, &cache->list); + /* KASAN poisons object */ + kasan_slab_free_mempool(entry); return true; } return false; } +static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache) +{ + return !cache->list.next; +} + static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache) { - if (!hlist_empty(&cache->list)) { - struct hlist_node *node = cache->list.first; + if (cache->list.next) { + struct io_cache_entry *entry; - hlist_del(node); + entry = container_of(cache->list.next, struct io_cache_entry, node); + kasan_unpoison_range(entry, cache->elem_size); + cache->list.next = cache->list.next->next; cache->nr_cached--; - return container_of(node, struct io_cache_entry, node); + return entry; } return NULL; } -static inline void io_alloc_cache_init(struct io_alloc_cache *cache) +static inline void io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, size_t size) { - INIT_HLIST_HEAD(&cache->list); + cache->list.next = NULL; cache->nr_cached = 0; + cache->max_cached = max_nr; + cache->elem_size = size; } static inline void io_alloc_cache_free(struct io_alloc_cache *cache, void (*free)(struct io_cache_entry *)) { - while (!hlist_empty(&cache->list)) { - struct hlist_node *node = cache->list.first; + while (1) { + struct io_cache_entry *entry = io_alloc_cache_get(cache); - hlist_del(node); - free(container_of(node, struct io_cache_entry, node)); + if (!entry) + break; + free(entry); } cache->nr_cached = 0; } diff --git a/io_uring/filetable.c b/io_uring/filetable.c index b80614e7d605..0f6fa791a47d 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -64,7 +64,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, u32 slot_index) __must_hold(&req->ctx->uring_lock) { - bool needs_switch = false; struct io_fixed_file *file_slot; int ret; @@ -81,18 +80,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (file_slot->file_ptr) { struct file *old_file; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - goto err; - old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, slot_index, - ctx->rsrc_node, old_file); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, old_file); if (ret) - goto err; + return ret; + file_slot->file_ptr = 0; io_file_bitmap_clear(&ctx->file_table, slot_index); - needs_switch = true; } ret = io_scm_file_account(ctx, file); @@ -101,9 +95,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, slot_index); } -err: - if (needs_switch) - io_rsrc_node_switch(ctx, ctx->file_data); return ret; } @@ -156,9 +147,6 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) return -ENXIO; if (offset >= ctx->nr_user_files) return -EINVAL; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; offset = array_index_nospec(offset, ctx->nr_user_files); file_slot = io_fixed_file_slot(&ctx->file_table, offset); @@ -166,13 +154,12 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) return -EBADF; file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + ret = io_queue_rsrc_removal(ctx->file_data, offset, file); if (ret) return ret; file_slot->file_ptr = 0; io_file_bitmap_clear(&ctx->file_table, offset); - io_rsrc_node_switch(ctx, ctx->file_data); return 0; } diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index f81c0a7136a5..b2715988791e 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -15,6 +15,7 @@ #include <linux/cpu.h> #include <linux/task_work.h> #include <linux/audit.h> +#include <linux/mmu_context.h> #include <uapi/linux/io_uring.h> #include "io-wq.h" @@ -39,7 +40,7 @@ enum { }; /* - * One for each thread in a wqe pool + * One for each thread in a wq pool */ struct io_worker { refcount_t ref; @@ -47,7 +48,7 @@ struct io_worker { struct hlist_nulls_node nulls_node; struct list_head all_list; struct task_struct *task; - struct io_wqe *wqe; + struct io_wq *wq; struct io_wq_work *cur_work; struct io_wq_work *next_work; @@ -73,7 +74,7 @@ struct io_worker { #define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) -struct io_wqe_acct { +struct io_wq_acct { unsigned nr_workers; unsigned max_workers; int index; @@ -90,26 +91,6 @@ enum { }; /* - * Per-node worker thread pool - */ -struct io_wqe { - raw_spinlock_t lock; - struct io_wqe_acct acct[IO_WQ_ACCT_NR]; - - int node; - - struct hlist_nulls_head free_list; - struct list_head all_list; - - struct wait_queue_entry wait; - - struct io_wq *wq; - struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; - - cpumask_var_t cpu_mask; -}; - -/* * Per io_wq state */ struct io_wq { @@ -127,7 +108,19 @@ struct io_wq { struct task_struct *task; - struct io_wqe *wqes[]; + struct io_wq_acct acct[IO_WQ_ACCT_NR]; + + /* lock protects access to elements below */ + raw_spinlock_t lock; + + struct hlist_nulls_head free_list; + struct list_head all_list; + + struct wait_queue_entry wait; + + struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; + + cpumask_var_t cpu_mask; }; static enum cpuhp_state io_wq_online; @@ -140,10 +133,10 @@ struct io_cb_cancel_data { bool cancel_all; }; -static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); -static void io_wqe_dec_running(struct io_worker *worker); -static bool io_acct_cancel_pending_work(struct io_wqe *wqe, - struct io_wqe_acct *acct, +static bool create_io_worker(struct io_wq *wq, int index); +static void io_wq_dec_running(struct io_worker *worker); +static bool io_acct_cancel_pending_work(struct io_wq *wq, + struct io_wq_acct *acct, struct io_cb_cancel_data *match); static void create_worker_cb(struct callback_head *cb); static void io_wq_cancel_tw_create(struct io_wq *wq); @@ -159,20 +152,20 @@ static void io_worker_release(struct io_worker *worker) complete(&worker->ref_done); } -static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) +static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound) { - return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; + return &wq->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; } -static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, - struct io_wq_work *work) +static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, + struct io_wq_work *work) { - return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); + return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND)); } -static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) +static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) { - return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); + return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND); } static void io_worker_ref_put(struct io_wq *wq) @@ -183,14 +176,13 @@ static void io_worker_ref_put(struct io_wq *wq) static void io_worker_cancel_cb(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; atomic_dec(&acct->nr_running); - raw_spin_lock(&worker->wqe->lock); + raw_spin_lock(&wq->lock); acct->nr_workers--; - raw_spin_unlock(&worker->wqe->lock); + raw_spin_unlock(&wq->lock); io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); @@ -208,8 +200,7 @@ static bool io_task_worker_match(struct callback_head *cb, void *data) static void io_worker_exit(struct io_worker *worker) { - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq *wq = worker->wq; while (1) { struct callback_head *cb = task_work_cancel_match(wq->task, @@ -223,23 +214,23 @@ static void io_worker_exit(struct io_worker *worker) io_worker_release(worker); wait_for_completion(&worker->ref_done); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); - raw_spin_unlock(&wqe->lock); - io_wqe_dec_running(worker); + raw_spin_unlock(&wq->lock); + io_wq_dec_running(worker); worker->flags = 0; preempt_disable(); current->flags &= ~PF_IO_WORKER; preempt_enable(); kfree_rcu(worker, rcu); - io_worker_ref_put(wqe->wq); + io_worker_ref_put(wq); do_exit(0); } -static inline bool io_acct_run_queue(struct io_wqe_acct *acct) +static inline bool io_acct_run_queue(struct io_wq_acct *acct) { bool ret = false; @@ -256,8 +247,8 @@ static inline bool io_acct_run_queue(struct io_wqe_acct *acct) * Check head of free list for an available worker. If one isn't available, * caller must create one. */ -static bool io_wqe_activate_free_worker(struct io_wqe *wqe, - struct io_wqe_acct *acct) +static bool io_wq_activate_free_worker(struct io_wq *wq, + struct io_wq_acct *acct) __must_hold(RCU) { struct hlist_nulls_node *n; @@ -268,10 +259,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe, * activate. If a given worker is on the free_list but in the process * of exiting, keep trying. */ - hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + hlist_nulls_for_each_entry_rcu(worker, n, &wq->free_list, nulls_node) { if (!io_worker_get(worker)) continue; - if (io_wqe_get_acct(worker) != acct) { + if (io_wq_get_acct(worker) != acct) { io_worker_release(worker); continue; } @@ -289,7 +280,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe, * We need a worker. If we find a free one, we're good. If not, and we're * below the max number of workers, create one. */ -static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) +static bool io_wq_create_worker(struct io_wq *wq, struct io_wq_acct *acct) { /* * Most likely an attempt to queue unbounded work on an io_wq that @@ -298,21 +289,21 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) if (unlikely(!acct->max_workers)) pr_warn_once("io-wq is not configured for unbound workers"); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); if (acct->nr_workers >= acct->max_workers) { - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); return true; } acct->nr_workers++; - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - return create_io_worker(wqe->wq, wqe, acct->index); + atomic_inc(&wq->worker_refs); + return create_io_worker(wq, acct->index); } -static void io_wqe_inc_running(struct io_worker *worker) +static void io_wq_inc_running(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wq_acct *acct = io_wq_get_acct(worker); atomic_inc(&acct->nr_running); } @@ -321,22 +312,22 @@ static void create_worker_cb(struct callback_head *cb) { struct io_worker *worker; struct io_wq *wq; - struct io_wqe *wqe; - struct io_wqe_acct *acct; + + struct io_wq_acct *acct; bool do_create = false; worker = container_of(cb, struct io_worker, create_work); - wqe = worker->wqe; - wq = wqe->wq; - acct = &wqe->acct[worker->create_index]; - raw_spin_lock(&wqe->lock); + wq = worker->wq; + acct = &wq->acct[worker->create_index]; + raw_spin_lock(&wq->lock); + if (acct->nr_workers < acct->max_workers) { acct->nr_workers++; do_create = true; } - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); if (do_create) { - create_io_worker(wq, wqe, worker->create_index); + create_io_worker(wq, worker->create_index); } else { atomic_dec(&acct->nr_running); io_worker_ref_put(wq); @@ -346,11 +337,10 @@ static void create_worker_cb(struct callback_head *cb) } static bool io_queue_worker_create(struct io_worker *worker, - struct io_wqe_acct *acct, + struct io_wq_acct *acct, task_work_func_t func) { - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq *wq = worker->wq; /* raced with exit, just ignore create call */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) @@ -392,10 +382,10 @@ fail: return false; } -static void io_wqe_dec_running(struct io_worker *worker) +static void io_wq_dec_running(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; if (!(worker->flags & IO_WORKER_F_UP)) return; @@ -406,7 +396,7 @@ static void io_wqe_dec_running(struct io_worker *worker) return; atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); + atomic_inc(&wq->worker_refs); io_queue_worker_create(worker, acct, create_worker_cb); } @@ -414,29 +404,25 @@ static void io_wqe_dec_running(struct io_worker *worker) * Worker will start processing some work. Move it to the busy list, if * it's currently on the freelist */ -static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker) +static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) { if (worker->flags & IO_WORKER_F_FREE) { worker->flags &= ~IO_WORKER_F_FREE; - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); hlist_nulls_del_init_rcu(&worker->nulls_node); - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); } } /* - * No work, worker going to sleep. Move to freelist, and unuse mm if we - * have one attached. Dropping the mm may potentially sleep, so we drop - * the lock in that case and return success. Since the caller has to - * retry the loop in that case (we changed task state), we don't regrab - * the lock if we return success. + * No work, worker going to sleep. Move to freelist. */ -static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) - __must_hold(wqe->lock) +static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker) + __must_hold(wq->lock) { if (!(worker->flags & IO_WORKER_F_FREE)) { worker->flags |= IO_WORKER_F_FREE; - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); } } @@ -445,17 +431,16 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work) return work->flags >> IO_WQ_HASH_SHIFT; } -static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) +static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash) { - struct io_wq *wq = wqe->wq; bool ret = false; spin_lock_irq(&wq->hash->wait.lock); - if (list_empty(&wqe->wait.entry)) { - __add_wait_queue(&wq->hash->wait, &wqe->wait); + if (list_empty(&wq->wait.entry)) { + __add_wait_queue(&wq->hash->wait, &wq->wait); if (!test_bit(hash, &wq->hash->map)) { __set_current_state(TASK_RUNNING); - list_del_init(&wqe->wait.entry); + list_del_init(&wq->wait.entry); ret = true; } } @@ -463,14 +448,14 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) return ret; } -static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, +static struct io_wq_work *io_get_next_work(struct io_wq_acct *acct, struct io_worker *worker) __must_hold(acct->lock) { struct io_wq_work_node *node, *prev; struct io_wq_work *work, *tail; unsigned int stall_hash = -1U; - struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = worker->wq; wq_list_for_each(node, prev, &acct->work_list) { unsigned int hash; @@ -485,11 +470,11 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, hash = io_get_work_hash(work); /* all items with this hash lie in [work, tail] */ - tail = wqe->hash_tail[hash]; + tail = wq->hash_tail[hash]; /* hashed, can run if not already running */ - if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { - wqe->hash_tail[hash] = NULL; + if (!test_and_set_bit(hash, &wq->hash->map)) { + wq->hash_tail[hash] = NULL; wq_list_cut(&acct->work_list, &tail->list, prev); return work; } @@ -508,12 +493,12 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, */ set_bit(IO_ACCT_STALLED_BIT, &acct->flags); raw_spin_unlock(&acct->lock); - unstalled = io_wait_on_hash(wqe, stall_hash); + unstalled = io_wait_on_hash(wq, stall_hash); raw_spin_lock(&acct->lock); if (unstalled) { clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); - if (wq_has_sleeper(&wqe->wq->hash->wait)) - wake_up(&wqe->wq->hash->wait); + if (wq_has_sleeper(&wq->hash->wait)) + wake_up(&wq->hash->wait); } } @@ -534,13 +519,10 @@ static void io_assign_current_work(struct io_worker *worker, raw_spin_unlock(&worker->lock); } -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); - static void io_worker_handle_work(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); do { @@ -557,7 +539,7 @@ static void io_worker_handle_work(struct io_worker *worker) work = io_get_next_work(acct, worker); raw_spin_unlock(&acct->lock); if (work) { - __io_worker_busy(wqe, worker); + __io_worker_busy(wq, worker); /* * Make sure cancelation can find this, even before @@ -595,7 +577,7 @@ static void io_worker_handle_work(struct io_worker *worker) } io_assign_current_work(worker, work); if (linked) - io_wqe_enqueue(wqe, linked); + io_wq_enqueue(wq, linked); if (hash != -1U && !next_hashed) { /* serialize hash clear with wake_up() */ @@ -610,12 +592,11 @@ static void io_worker_handle_work(struct io_worker *worker) } while (1); } -static int io_wqe_worker(void *data) +static int io_wq_worker(void *data) { struct io_worker *worker = data; - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; bool exit_mask = false, last_timeout = false; char buf[TASK_COMM_LEN]; @@ -631,20 +612,20 @@ static int io_wqe_worker(void *data) while (io_acct_run_queue(acct)) io_worker_handle_work(worker); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); /* * Last sleep timed out. Exit if we're not the last worker, * or if someone modified our affinity. */ if (last_timeout && (exit_mask || acct->nr_workers > 1)) { acct->nr_workers--; - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); __set_current_state(TASK_RUNNING); break; } last_timeout = false; - __io_worker_idle(wqe, worker); - raw_spin_unlock(&wqe->lock); + __io_worker_idle(wq, worker); + raw_spin_unlock(&wq->lock); if (io_run_task_work()) continue; ret = schedule_timeout(WORKER_IDLE_TIMEOUT); @@ -658,7 +639,7 @@ static int io_wqe_worker(void *data) if (!ret) { last_timeout = true; exit_mask = !cpumask_test_cpu(raw_smp_processor_id(), - wqe->cpu_mask); + wq->cpu_mask); } } @@ -683,7 +664,7 @@ void io_wq_worker_running(struct task_struct *tsk) if (worker->flags & IO_WORKER_F_RUNNING) return; worker->flags |= IO_WORKER_F_RUNNING; - io_wqe_inc_running(worker); + io_wq_inc_running(worker); } /* @@ -702,21 +683,21 @@ void io_wq_worker_sleeping(struct task_struct *tsk) return; worker->flags &= ~IO_WORKER_F_RUNNING; - io_wqe_dec_running(worker); + io_wq_dec_running(worker); } -static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, +static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker, struct task_struct *tsk) { tsk->worker_private = worker; worker->task = tsk; - set_cpus_allowed_ptr(tsk, wqe->cpu_mask); + set_cpus_allowed_ptr(tsk, wq->cpu_mask); - raw_spin_lock(&wqe->lock); - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - list_add_tail_rcu(&worker->all_list, &wqe->all_list); + raw_spin_lock(&wq->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); + list_add_tail_rcu(&worker->all_list, &wq->all_list); worker->flags |= IO_WORKER_F_FREE; - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); wake_up_new_task(tsk); } @@ -749,21 +730,21 @@ static void create_worker_cont(struct callback_head *cb) { struct io_worker *worker; struct task_struct *tsk; - struct io_wqe *wqe; + struct io_wq *wq; worker = container_of(cb, struct io_worker, create_work); clear_bit_unlock(0, &worker->create_state); - wqe = worker->wqe; - tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + wq = worker->wq; + tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); if (!IS_ERR(tsk)) { - io_init_new_worker(wqe, worker, tsk); + io_init_new_worker(wq, worker, tsk); io_worker_release(worker); return; } else if (!io_should_retry_thread(PTR_ERR(tsk))) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wq_acct *acct = io_wq_get_acct(worker); atomic_dec(&acct->nr_running); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); acct->nr_workers--; if (!acct->nr_workers) { struct io_cb_cancel_data match = { @@ -771,13 +752,13 @@ static void create_worker_cont(struct callback_head *cb) .cancel_all = true, }; - raw_spin_unlock(&wqe->lock); - while (io_acct_cancel_pending_work(wqe, acct, &match)) + raw_spin_unlock(&wq->lock); + while (io_acct_cancel_pending_work(wq, acct, &match)) ; } else { - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); } - io_worker_ref_put(wqe->wq); + io_worker_ref_put(wq); kfree(worker); return; } @@ -790,42 +771,42 @@ static void create_worker_cont(struct callback_head *cb) static void io_workqueue_create(struct work_struct *work) { struct io_worker *worker = container_of(work, struct io_worker, work); - struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wq_acct *acct = io_wq_get_acct(worker); if (!io_queue_worker_create(worker, acct, create_worker_cont)) kfree(worker); } -static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +static bool create_io_worker(struct io_wq *wq, int index) { - struct io_wqe_acct *acct = &wqe->acct[index]; + struct io_wq_acct *acct = &wq->acct[index]; struct io_worker *worker; struct task_struct *tsk; __set_current_state(TASK_RUNNING); - worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); + worker = kzalloc(sizeof(*worker), GFP_KERNEL); if (!worker) { fail: atomic_dec(&acct->nr_running); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); acct->nr_workers--; - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); io_worker_ref_put(wq); return false; } refcount_set(&worker->ref, 1); - worker->wqe = wqe; + worker->wq = wq; raw_spin_lock_init(&worker->lock); init_completion(&worker->ref_done); if (index == IO_WQ_ACCT_BOUND) worker->flags |= IO_WORKER_F_BOUND; - tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); if (!IS_ERR(tsk)) { - io_init_new_worker(wqe, worker, tsk); + io_init_new_worker(wq, worker, tsk); } else if (!io_should_retry_thread(PTR_ERR(tsk))) { kfree(worker); goto fail; @@ -841,14 +822,14 @@ fail: * Iterate the passed in list and call the specific function for each * worker that isn't exiting */ -static bool io_wq_for_each_worker(struct io_wqe *wqe, +static bool io_wq_for_each_worker(struct io_wq *wq, bool (*func)(struct io_worker *, void *), void *data) { struct io_worker *worker; bool ret = false; - list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { + list_for_each_entry_rcu(worker, &wq->all_list, all_list) { if (io_worker_get(worker)) { /* no task if node is/was offline */ if (worker->task) @@ -869,10 +850,8 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data) return false; } -static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) +static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq) { - struct io_wq *wq = wqe->wq; - do { work->flags |= IO_WQ_WORK_CANCEL; wq->do_work(work); @@ -880,9 +859,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) } while (work); } -static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) +static void io_wq_insert_work(struct io_wq *wq, struct io_wq_work *work) { - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + struct io_wq_acct *acct = io_work_get_acct(wq, work); unsigned int hash; struct io_wq_work *tail; @@ -893,8 +872,8 @@ append: } hash = io_get_work_hash(work); - tail = wqe->hash_tail[hash]; - wqe->hash_tail[hash] = work; + tail = wq->hash_tail[hash]; + wq->hash_tail[hash] = work; if (!tail) goto append; @@ -906,9 +885,9 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) return work == data; } -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) +void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) { - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + struct io_wq_acct *acct = io_work_get_acct(wq, work); struct io_cb_cancel_data match; unsigned work_flags = work->flags; bool do_create; @@ -917,55 +896,48 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) * If io-wq is exiting for this task, or if the request has explicitly * been marked as one that should not get executed, cancel it here. */ - if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || + if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || (work->flags & IO_WQ_WORK_CANCEL)) { - io_run_cancel(work, wqe); + io_run_cancel(work, wq); return; } raw_spin_lock(&acct->lock); - io_wqe_insert_work(wqe, work); + io_wq_insert_work(wq, work); clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); raw_spin_unlock(&acct->lock); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); rcu_read_lock(); - do_create = !io_wqe_activate_free_worker(wqe, acct); + do_create = !io_wq_activate_free_worker(wq, acct); rcu_read_unlock(); - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || !atomic_read(&acct->nr_running))) { bool did_create; - did_create = io_wqe_create_worker(wqe, acct); + did_create = io_wq_create_worker(wq, acct); if (likely(did_create)) return; - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); if (acct->nr_workers) { - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); return; } - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); /* fatal condition, failed to create the first worker */ match.fn = io_wq_work_match_item, match.data = work, match.cancel_all = false, - io_acct_cancel_pending_work(wqe, acct, &match); + io_acct_cancel_pending_work(wq, acct, &match); } } -void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) -{ - struct io_wqe *wqe = wq->wqes[numa_node_id()]; - - io_wqe_enqueue(wqe, work); -} - /* * Work items that hash to the same value will not be done in parallel. * Used to limit concurrent writes, generally hashed by inode. @@ -1008,27 +980,27 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) return match->nr_running && !match->cancel_all; } -static inline void io_wqe_remove_pending(struct io_wqe *wqe, +static inline void io_wq_remove_pending(struct io_wq *wq, struct io_wq_work *work, struct io_wq_work_node *prev) { - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + struct io_wq_acct *acct = io_work_get_acct(wq, work); unsigned int hash = io_get_work_hash(work); struct io_wq_work *prev_work = NULL; - if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { + if (io_wq_is_hashed(work) && work == wq->hash_tail[hash]) { if (prev) prev_work = container_of(prev, struct io_wq_work, list); if (prev_work && io_get_work_hash(prev_work) == hash) - wqe->hash_tail[hash] = prev_work; + wq->hash_tail[hash] = prev_work; else - wqe->hash_tail[hash] = NULL; + wq->hash_tail[hash] = NULL; } wq_list_del(&acct->work_list, &work->list, prev); } -static bool io_acct_cancel_pending_work(struct io_wqe *wqe, - struct io_wqe_acct *acct, |