summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 12:40:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 12:40:31 -0700
commit5b9a7bb72fddbc5247f56ede55d485fab7abdf92 (patch)
treef2871b8c63b876eeda5ce8f8cef4b590d772692c /io_uring
parent5c7ecada25d2086aee607ff7deb69e77faa4aa92 (diff)
parent3c85cc43c8e7855d202da184baf00c7b8eeacf71 (diff)
downloadlinux-5b9a7bb72fddbc5247f56ede55d485fab7abdf92.tar.gz
linux-5b9a7bb72fddbc5247f56ede55d485fab7abdf92.tar.bz2
linux-5b9a7bb72fddbc5247f56ede55d485fab7abdf92.zip
Merge tag 'for-6.4/io_uring-2023-04-21' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe: - Cleanup of the io-wq per-node mapping, notably getting rid of it so we just have a single io_wq entry per ring (Breno) - Followup to the above, move accounting to io_wq as well and completely drop struct io_wqe (Gabriel) - Enable KASAN for the internal io_uring caches (Breno) - Add support for multishot timeouts. Some applications use timeouts to wake someone waiting on completion entries, and this makes it a bit easier to just have a recurring timer rather than needing to rearm it every time (David) - Support archs that have shared cache coloring between userspace and the kernel, and hence have strict address requirements for mmap'ing the ring into userspace. This should only be parisc/hppa. (Helge, me) - XFS has supported O_DIRECT writes without needing to lock the inode exclusively for a long time, and ext4 now supports it as well. This is true for the common cases of not extending the file size. Flag the fs as having that feature, and utilize that to avoid serializing those writes in io_uring (me) - Enable completion batching for uring commands (me) - Revert patch adding io_uring restriction to what can be GUP mapped or not. This does not belong in io_uring, as io_uring isn't really special in this regard. Since this is also getting in the way of cleanups and improvements to the GUP code, get rid of if (me) - A few series greatly reducing the complexity of registered resources, like buffers or files. Not only does this clean up the code a lot, the simplified code is also a LOT more efficient (Pavel) - Series optimizing how we wait for events and run task_work related to it (Pavel) - Fixes for file/buffer unregistration with DEFER_TASKRUN (Pavel) - Misc cleanups and improvements (Pavel, me) * tag 'for-6.4/io_uring-2023-04-21' of git://git.kernel.dk/linux: (71 commits) Revert "io_uring/rsrc: disallow multi-source reg buffers" io_uring: add support for multishot timeouts io_uring/rsrc: disassociate nodes and rsrc_data io_uring/rsrc: devirtualise rsrc put callbacks io_uring/rsrc: pass node to io_rsrc_put_work() io_uring/rsrc: inline io_rsrc_put_work() io_uring/rsrc: add empty flag in rsrc_node io_uring/rsrc: merge nodes and io_rsrc_put io_uring/rsrc: infer node from ctx on io_queue_rsrc_removal io_uring/rsrc: remove unused io_rsrc_node::llist io_uring/rsrc: refactor io_queue_rsrc_removal io_uring/rsrc: simplify single file node switching io_uring/rsrc: clean up __io_sqe_buffers_update() io_uring/rsrc: inline switch_start fast path io_uring/rsrc: remove rsrc_data refs io_uring/rsrc: fix DEFER_TASKRUN rsrc quiesce io_uring/rsrc: use wq for quiescing io_uring/rsrc: refactor io_rsrc_ref_quiesce io_uring/rsrc: remove io_rsrc_node::done io_uring/rsrc: use nospec'ed indexes ...
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/alloc_cache.h39
-rw-r--r--io_uring/filetable.c21
-rw-r--r--io_uring/io-wq.c524
-rw-r--r--io_uring/io_uring.c348
-rw-r--r--io_uring/io_uring.h49
-rw-r--r--io_uring/kbuf.c160
-rw-r--r--io_uring/kbuf.h7
-rw-r--r--io_uring/net.h5
-rw-r--r--io_uring/notif.c8
-rw-r--r--io_uring/notif.h3
-rw-r--r--io_uring/poll.c32
-rw-r--r--io_uring/rsrc.c350
-rw-r--r--io_uring/rsrc.h72
-rw-r--r--io_uring/rw.c8
-rw-r--r--io_uring/timeout.c71
-rw-r--r--io_uring/uring_cmd.c18
16 files changed, 902 insertions, 813 deletions
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index c2cde88aeed5..241245cb54a6 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -7,47 +7,60 @@
#define IO_ALLOC_CACHE_MAX 512
struct io_cache_entry {
- struct hlist_node node;
+ struct io_wq_work_node node;
};
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
struct io_cache_entry *entry)
{
- if (cache->nr_cached < IO_ALLOC_CACHE_MAX) {
+ if (cache->nr_cached < cache->max_cached) {
cache->nr_cached++;
- hlist_add_head(&entry->node, &cache->list);
+ wq_stack_add_head(&entry->node, &cache->list);
+ /* KASAN poisons object */
+ kasan_slab_free_mempool(entry);
return true;
}
return false;
}
+static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache)
+{
+ return !cache->list.next;
+}
+
static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache)
{
- if (!hlist_empty(&cache->list)) {
- struct hlist_node *node = cache->list.first;
+ if (cache->list.next) {
+ struct io_cache_entry *entry;
- hlist_del(node);
+ entry = container_of(cache->list.next, struct io_cache_entry, node);
+ kasan_unpoison_range(entry, cache->elem_size);
+ cache->list.next = cache->list.next->next;
cache->nr_cached--;
- return container_of(node, struct io_cache_entry, node);
+ return entry;
}
return NULL;
}
-static inline void io_alloc_cache_init(struct io_alloc_cache *cache)
+static inline void io_alloc_cache_init(struct io_alloc_cache *cache,
+ unsigned max_nr, size_t size)
{
- INIT_HLIST_HEAD(&cache->list);
+ cache->list.next = NULL;
cache->nr_cached = 0;
+ cache->max_cached = max_nr;
+ cache->elem_size = size;
}
static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
void (*free)(struct io_cache_entry *))
{
- while (!hlist_empty(&cache->list)) {
- struct hlist_node *node = cache->list.first;
+ while (1) {
+ struct io_cache_entry *entry = io_alloc_cache_get(cache);
- hlist_del(node);
- free(container_of(node, struct io_cache_entry, node));
+ if (!entry)
+ break;
+ free(entry);
}
cache->nr_cached = 0;
}
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index b80614e7d605..0f6fa791a47d 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -64,7 +64,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
u32 slot_index)
__must_hold(&req->ctx->uring_lock)
{
- bool needs_switch = false;
struct io_fixed_file *file_slot;
int ret;
@@ -81,18 +80,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (file_slot->file_ptr) {
struct file *old_file;
- ret = io_rsrc_node_switch_start(ctx);
- if (ret)
- goto err;
-
old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
- ctx->rsrc_node, old_file);
+ ret = io_queue_rsrc_removal(ctx->file_data, slot_index, old_file);
if (ret)
- goto err;
+ return ret;
+
file_slot->file_ptr = 0;
io_file_bitmap_clear(&ctx->file_table, slot_index);
- needs_switch = true;
}
ret = io_scm_file_account(ctx, file);
@@ -101,9 +95,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
io_fixed_file_set(file_slot, file);
io_file_bitmap_set(&ctx->file_table, slot_index);
}
-err:
- if (needs_switch)
- io_rsrc_node_switch(ctx, ctx->file_data);
return ret;
}
@@ -156,9 +147,6 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
return -ENXIO;
if (offset >= ctx->nr_user_files)
return -EINVAL;
- ret = io_rsrc_node_switch_start(ctx);
- if (ret)
- return ret;
offset = array_index_nospec(offset, ctx->nr_user_files);
file_slot = io_fixed_file_slot(&ctx->file_table, offset);
@@ -166,13 +154,12 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
return -EBADF;
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
+ ret = io_queue_rsrc_removal(ctx->file_data, offset, file);
if (ret)
return ret;
file_slot->file_ptr = 0;
io_file_bitmap_clear(&ctx->file_table, offset);
- io_rsrc_node_switch(ctx, ctx->file_data);
return 0;
}
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index f81c0a7136a5..b2715988791e 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -15,6 +15,7 @@
#include <linux/cpu.h>
#include <linux/task_work.h>
#include <linux/audit.h>
+#include <linux/mmu_context.h>
#include <uapi/linux/io_uring.h>
#include "io-wq.h"
@@ -39,7 +40,7 @@ enum {
};
/*
- * One for each thread in a wqe pool
+ * One for each thread in a wq pool
*/
struct io_worker {
refcount_t ref;
@@ -47,7 +48,7 @@ struct io_worker {
struct hlist_nulls_node nulls_node;
struct list_head all_list;
struct task_struct *task;
- struct io_wqe *wqe;
+ struct io_wq *wq;
struct io_wq_work *cur_work;
struct io_wq_work *next_work;
@@ -73,7 +74,7 @@ struct io_worker {
#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
-struct io_wqe_acct {
+struct io_wq_acct {
unsigned nr_workers;
unsigned max_workers;
int index;
@@ -90,26 +91,6 @@ enum {
};
/*
- * Per-node worker thread pool
- */
-struct io_wqe {
- raw_spinlock_t lock;
- struct io_wqe_acct acct[IO_WQ_ACCT_NR];
-
- int node;
-
- struct hlist_nulls_head free_list;
- struct list_head all_list;
-
- struct wait_queue_entry wait;
-
- struct io_wq *wq;
- struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
-
- cpumask_var_t cpu_mask;
-};
-
-/*
* Per io_wq state
*/
struct io_wq {
@@ -127,7 +108,19 @@ struct io_wq {
struct task_struct *task;
- struct io_wqe *wqes[];
+ struct io_wq_acct acct[IO_WQ_ACCT_NR];
+
+ /* lock protects access to elements below */
+ raw_spinlock_t lock;
+
+ struct hlist_nulls_head free_list;
+ struct list_head all_list;
+
+ struct wait_queue_entry wait;
+
+ struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
+
+ cpumask_var_t cpu_mask;
};
static enum cpuhp_state io_wq_online;
@@ -140,10 +133,10 @@ struct io_cb_cancel_data {
bool cancel_all;
};
-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
-static void io_wqe_dec_running(struct io_worker *worker);
-static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
- struct io_wqe_acct *acct,
+static bool create_io_worker(struct io_wq *wq, int index);
+static void io_wq_dec_running(struct io_worker *worker);
+static bool io_acct_cancel_pending_work(struct io_wq *wq,
+ struct io_wq_acct *acct,
struct io_cb_cancel_data *match);
static void create_worker_cb(struct callback_head *cb);
static void io_wq_cancel_tw_create(struct io_wq *wq);
@@ -159,20 +152,20 @@ static void io_worker_release(struct io_worker *worker)
complete(&worker->ref_done);
}
-static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound)
+static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound)
{
- return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND];
+ return &wq->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND];
}
-static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
- struct io_wq_work *work)
+static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
+ struct io_wq_work *work)
{
- return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND));
+ return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND));
}
-static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker)
+static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
{
- return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND);
+ return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND);
}
static void io_worker_ref_put(struct io_wq *wq)
@@ -183,14 +176,13 @@ static void io_worker_ref_put(struct io_wq *wq)
static void io_worker_cancel_cb(struct io_worker *worker)
{
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
- struct io_wqe *wqe = worker->wqe;
- struct io_wq *wq = wqe->wq;
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
+ struct io_wq *wq = worker->wq;
atomic_dec(&acct->nr_running);
- raw_spin_lock(&worker->wqe->lock);
+ raw_spin_lock(&wq->lock);
acct->nr_workers--;
- raw_spin_unlock(&worker->wqe->lock);
+ raw_spin_unlock(&wq->lock);
io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state);
io_worker_release(worker);
@@ -208,8 +200,7 @@ static bool io_task_worker_match(struct callback_head *cb, void *data)
static void io_worker_exit(struct io_worker *worker)
{
- struct io_wqe *wqe = worker->wqe;
- struct io_wq *wq = wqe->wq;
+ struct io_wq *wq = worker->wq;
while (1) {
struct callback_head *cb = task_work_cancel_match(wq->task,
@@ -223,23 +214,23 @@ static void io_worker_exit(struct io_worker *worker)
io_worker_release(worker);
wait_for_completion(&worker->ref_done);
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&wq->lock);
if (worker->flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
- raw_spin_unlock(&wqe->lock);
- io_wqe_dec_running(worker);
+ raw_spin_unlock(&wq->lock);
+ io_wq_dec_running(worker);
worker->flags = 0;
preempt_disable();
current->flags &= ~PF_IO_WORKER;
preempt_enable();
kfree_rcu(worker, rcu);
- io_worker_ref_put(wqe->wq);
+ io_worker_ref_put(wq);
do_exit(0);
}
-static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
+static inline bool io_acct_run_queue(struct io_wq_acct *acct)
{
bool ret = false;
@@ -256,8 +247,8 @@ static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
* Check head of free list for an available worker. If one isn't available,
* caller must create one.
*/
-static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
- struct io_wqe_acct *acct)
+static bool io_wq_activate_free_worker(struct io_wq *wq,
+ struct io_wq_acct *acct)
__must_hold(RCU)
{
struct hlist_nulls_node *n;
@@ -268,10 +259,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
* activate. If a given worker is on the free_list but in the process
* of exiting, keep trying.
*/
- hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+ hlist_nulls_for_each_entry_rcu(worker, n, &wq->free_list, nulls_node) {
if (!io_worker_get(worker))
continue;
- if (io_wqe_get_acct(worker) != acct) {
+ if (io_wq_get_acct(worker) != acct) {
io_worker_release(worker);
continue;
}
@@ -289,7 +280,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
* We need a worker. If we find a free one, we're good. If not, and we're
* below the max number of workers, create one.
*/
-static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
+static bool io_wq_create_worker(struct io_wq *wq, struct io_wq_acct *acct)
{
/*
* Most likely an attempt to queue unbounded work on an io_wq that
@@ -298,21 +289,21 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
if (unlikely(!acct->max_workers))
pr_warn_once("io-wq is not configured for unbound workers");
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&wq->lock);
if (acct->nr_workers >= acct->max_workers) {
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
return true;
}
acct->nr_workers++;
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
atomic_inc(&acct->nr_running);
- atomic_inc(&wqe->wq->worker_refs);
- return create_io_worker(wqe->wq, wqe, acct->index);
+ atomic_inc(&wq->worker_refs);
+ return create_io_worker(wq, acct->index);
}
-static void io_wqe_inc_running(struct io_worker *worker)
+static void io_wq_inc_running(struct io_worker *worker)
{
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
atomic_inc(&acct->nr_running);
}
@@ -321,22 +312,22 @@ static void create_worker_cb(struct callback_head *cb)
{
struct io_worker *worker;
struct io_wq *wq;
- struct io_wqe *wqe;
- struct io_wqe_acct *acct;
+
+ struct io_wq_acct *acct;
bool do_create = false;
worker = container_of(cb, struct io_worker, create_work);
- wqe = worker->wqe;
- wq = wqe->wq;
- acct = &wqe->acct[worker->create_index];
- raw_spin_lock(&wqe->lock);
+ wq = worker->wq;
+ acct = &wq->acct[worker->create_index];
+ raw_spin_lock(&wq->lock);
+
if (acct->nr_workers < acct->max_workers) {
acct->nr_workers++;
do_create = true;
}
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
if (do_create) {
- create_io_worker(wq, wqe, worker->create_index);
+ create_io_worker(wq, worker->create_index);
} else {
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
@@ -346,11 +337,10 @@ static void create_worker_cb(struct callback_head *cb)
}
static bool io_queue_worker_create(struct io_worker *worker,
- struct io_wqe_acct *acct,
+ struct io_wq_acct *acct,
task_work_func_t func)
{
- struct io_wqe *wqe = worker->wqe;
- struct io_wq *wq = wqe->wq;
+ struct io_wq *wq = worker->wq;
/* raced with exit, just ignore create call */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
@@ -392,10 +382,10 @@ fail:
return false;
}
-static void io_wqe_dec_running(struct io_worker *worker)
+static void io_wq_dec_running(struct io_worker *worker)
{
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
- struct io_wqe *wqe = worker->wqe;
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
+ struct io_wq *wq = worker->wq;
if (!(worker->flags & IO_WORKER_F_UP))
return;
@@ -406,7 +396,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
return;
atomic_inc(&acct->nr_running);
- atomic_inc(&wqe->wq->worker_refs);
+ atomic_inc(&wq->worker_refs);
io_queue_worker_create(worker, acct, create_worker_cb);
}
@@ -414,29 +404,25 @@ static void io_wqe_dec_running(struct io_worker *worker)
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
*/
-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
+static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker)
{
if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE;
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&wq->lock);
hlist_nulls_del_init_rcu(&worker->nulls_node);
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
}
}
/*
- * No work, worker going to sleep. Move to freelist, and unuse mm if we
- * have one attached. Dropping the mm may potentially sleep, so we drop
- * the lock in that case and return success. Since the caller has to
- * retry the loop in that case (we changed task state), we don't regrab
- * the lock if we return success.
+ * No work, worker going to sleep. Move to freelist.
*/
-static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
- __must_hold(wqe->lock)
+static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker)
+ __must_hold(wq->lock)
{
if (!(worker->flags & IO_WORKER_F_FREE)) {
worker->flags |= IO_WORKER_F_FREE;
- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+ hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list);
}
}
@@ -445,17 +431,16 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work)
return work->flags >> IO_WQ_HASH_SHIFT;
}
-static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
{
- struct io_wq *wq = wqe->wq;
bool ret = false;
spin_lock_irq(&wq->hash->wait.lock);
- if (list_empty(&wqe->wait.entry)) {
- __add_wait_queue(&wq->hash->wait, &wqe->wait);
+ if (list_empty(&wq->wait.entry)) {
+ __add_wait_queue(&wq->hash->wait, &wq->wait);
if (!test_bit(hash, &wq->hash->map)) {
__set_current_state(TASK_RUNNING);
- list_del_init(&wqe->wait.entry);
+ list_del_init(&wq->wait.entry);
ret = true;
}
}
@@ -463,14 +448,14 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
return ret;
}
-static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+static struct io_wq_work *io_get_next_work(struct io_wq_acct *acct,
struct io_worker *worker)
__must_hold(acct->lock)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work, *tail;
unsigned int stall_hash = -1U;
- struct io_wqe *wqe = worker->wqe;
+ struct io_wq *wq = worker->wq;
wq_list_for_each(node, prev, &acct->work_list) {
unsigned int hash;
@@ -485,11 +470,11 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
hash = io_get_work_hash(work);
/* all items with this hash lie in [work, tail] */
- tail = wqe->hash_tail[hash];
+ tail = wq->hash_tail[hash];
/* hashed, can run if not already running */
- if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
- wqe->hash_tail[hash] = NULL;
+ if (!test_and_set_bit(hash, &wq->hash->map)) {
+ wq->hash_tail[hash] = NULL;
wq_list_cut(&acct->work_list, &tail->list, prev);
return work;
}
@@ -508,12 +493,12 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
*/
set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
raw_spin_unlock(&acct->lock);
- unstalled = io_wait_on_hash(wqe, stall_hash);
+ unstalled = io_wait_on_hash(wq, stall_hash);
raw_spin_lock(&acct->lock);
if (unstalled) {
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
- if (wq_has_sleeper(&wqe->wq->hash->wait))
- wake_up(&wqe->wq->hash->wait);
+ if (wq_has_sleeper(&wq->hash->wait))
+ wake_up(&wq->hash->wait);
}
}
@@ -534,13 +519,10 @@ static void io_assign_current_work(struct io_worker *worker,
raw_spin_unlock(&worker->lock);
}
-static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
-
static void io_worker_handle_work(struct io_worker *worker)
{
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
- struct io_wqe *wqe = worker->wqe;
- struct io_wq *wq = wqe->wq;
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
+ struct io_wq *wq = worker->wq;
bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
do {
@@ -557,7 +539,7 @@ static void io_worker_handle_work(struct io_worker *worker)
work = io_get_next_work(acct, worker);
raw_spin_unlock(&acct->lock);
if (work) {
- __io_worker_busy(wqe, worker);
+ __io_worker_busy(wq, worker);
/*
* Make sure cancelation can find this, even before
@@ -595,7 +577,7 @@ static void io_worker_handle_work(struct io_worker *worker)
}
io_assign_current_work(worker, work);
if (linked)
- io_wqe_enqueue(wqe, linked);
+ io_wq_enqueue(wq, linked);
if (hash != -1U && !next_hashed) {
/* serialize hash clear with wake_up() */
@@ -610,12 +592,11 @@ static void io_worker_handle_work(struct io_worker *worker)
} while (1);
}
-static int io_wqe_worker(void *data)
+static int io_wq_worker(void *data)
{
struct io_worker *worker = data;
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
- struct io_wqe *wqe = worker->wqe;
- struct io_wq *wq = wqe->wq;
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
+ struct io_wq *wq = worker->wq;
bool exit_mask = false, last_timeout = false;
char buf[TASK_COMM_LEN];
@@ -631,20 +612,20 @@ static int io_wqe_worker(void *data)
while (io_acct_run_queue(acct))
io_worker_handle_work(worker);
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&wq->lock);
/*
* Last sleep timed out. Exit if we're not the last worker,
* or if someone modified our affinity.
*/
if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
acct->nr_workers--;
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
__set_current_state(TASK_RUNNING);
break;
}
last_timeout = false;
- __io_worker_idle(wqe, worker);
- raw_spin_unlock(&wqe->lock);
+ __io_worker_idle(wq, worker);
+ raw_spin_unlock(&wq->lock);
if (io_run_task_work())
continue;
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
@@ -658,7 +639,7 @@ static int io_wqe_worker(void *data)
if (!ret) {
last_timeout = true;
exit_mask = !cpumask_test_cpu(raw_smp_processor_id(),
- wqe->cpu_mask);
+ wq->cpu_mask);
}
}
@@ -683,7 +664,7 @@ void io_wq_worker_running(struct task_struct *tsk)
if (worker->flags & IO_WORKER_F_RUNNING)
return;
worker->flags |= IO_WORKER_F_RUNNING;
- io_wqe_inc_running(worker);
+ io_wq_inc_running(worker);
}
/*
@@ -702,21 +683,21 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
return;
worker->flags &= ~IO_WORKER_F_RUNNING;
- io_wqe_dec_running(worker);
+ io_wq_dec_running(worker);
}
-static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
+static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker,
struct task_struct *tsk)
{
tsk->worker_private = worker;
worker->task = tsk;
- set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
+ set_cpus_allowed_ptr(tsk, wq->cpu_mask);
- raw_spin_lock(&wqe->lock);
- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
- list_add_tail_rcu(&worker->all_list, &wqe->all_list);
+ raw_spin_lock(&wq->lock);
+ hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list);
+ list_add_tail_rcu(&worker->all_list, &wq->all_list);
worker->flags |= IO_WORKER_F_FREE;
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
wake_up_new_task(tsk);
}
@@ -749,21 +730,21 @@ static void create_worker_cont(struct callback_head *cb)
{
struct io_worker *worker;
struct task_struct *tsk;
- struct io_wqe *wqe;
+ struct io_wq *wq;
worker = container_of(cb, struct io_worker, create_work);
clear_bit_unlock(0, &worker->create_state);
- wqe = worker->wqe;
- tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+ wq = worker->wq;
+ tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
- io_init_new_worker(wqe, worker, tsk);
+ io_init_new_worker(wq, worker, tsk);
io_worker_release(worker);
return;
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
atomic_dec(&acct->nr_running);
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&wq->lock);
acct->nr_workers--;
if (!acct->nr_workers) {
struct io_cb_cancel_data match = {
@@ -771,13 +752,13 @@ static void create_worker_cont(struct callback_head *cb)
.cancel_all = true,
};
- raw_spin_unlock(&wqe->lock);
- while (io_acct_cancel_pending_work(wqe, acct, &match))
+ raw_spin_unlock(&wq->lock);
+ while (io_acct_cancel_pending_work(wq, acct, &match))
;
} else {
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
}
- io_worker_ref_put(wqe->wq);
+ io_worker_ref_put(wq);
kfree(worker);
return;
}
@@ -790,42 +771,42 @@ static void create_worker_cont(struct callback_head *cb)
static void io_workqueue_create(struct work_struct *work)
{
struct io_worker *worker = container_of(work, struct io_worker, work);
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ struct io_wq_acct *acct = io_wq_get_acct(worker);
if (!io_queue_worker_create(worker, acct, create_worker_cont))
kfree(worker);
}
-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static bool create_io_worker(struct io_wq *wq, int index)
{
- struct io_wqe_acct *acct = &wqe->acct[index];
+ struct io_wq_acct *acct = &wq->acct[index];
struct io_worker *worker;
struct task_struct *tsk;
__set_current_state(TASK_RUNNING);
- worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
+ worker = kzalloc(sizeof(*worker), GFP_KERNEL);
if (!worker) {
fail:
atomic_dec(&acct->nr_running);
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&wq->lock);
acct->nr_workers--;
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&wq->lock);
io_worker_ref_put(wq);
return false;
}
refcount_set(&worker->ref, 1);
- worker->wqe = wqe;
+ worker->wq = wq;
raw_spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
- tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+ tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
- io_init_new_worker(wqe, worker, tsk);
+ io_init_new_worker(wq, worker, tsk);
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
kfree(worker);
goto fail;
@@ -841,14 +822,14 @@ fail:
* Iterate the passed in list and call the specific function for each
* worker that isn't exiting
*/
-static bool io_wq_for_each_worker(struct io_wqe *wqe,
+static bool io_wq_for_each_worker(struct io_wq *wq,
bool (*func)(struct io_worker *, void *),
void *data)
{
struct io_worker *worker;
bool ret = false;
- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
+ list_for_each_entry_rcu(worker, &wq->all_list, all_list) {
if (io_worker_get(worker)) {
/* no task if node is/was offline */
if (worker->task)
@@ -869,10 +850,8 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
return false;
}
-static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
+static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
{
- struct io_wq *wq = wqe->wq;
-
do {
work->flags |= IO_WQ_WORK_CANCEL;
wq->do_work(work);
@@ -880,9 +859,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
} while (work);
}
-static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+static void io_wq_insert_work(struct io_wq *wq, struct io_wq_work *work)
{
- struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+ struct io_wq_acct *acct = io_work_get_acct(wq, work);
unsigned int hash;
struct io_wq_work *tail;
@@ -893,8 +872,8 @@ append:
}
hash = io_get_work_hash(work);
- tail = wqe->hash_tail[hash];
- wqe->hash_tail[hash] = work;
+ tail = wq->hash_tail[hash];
+ wq->hash_tail[hash] = work;
if (!tail)
goto append;
@@ -906,9 +885,9 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
return work == data;
}
-static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
{
- struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+ struct io_wq_acct *acct = io_work_get_acct(wq, work);
struct io_cb_cancel_data match;
unsigned work_flags = work->flags;
bool do_create;
@@ -917,55 +896,48 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
* If io-wq is exiting for this task, or if the request has exp