diff options
-rw-r--r-- | fs/ext4/file.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 3 | ||||
-rw-r--r-- | include/linux/fs.h | 3 | ||||
-rw-r--r-- | include/linux/io_uring_types.h | 24 | ||||
-rw-r--r-- | include/trace/events/io_uring.h | 15 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 33 | ||||
-rw-r--r-- | io_uring/alloc_cache.h | 39 | ||||
-rw-r--r-- | io_uring/filetable.c | 21 | ||||
-rw-r--r-- | io_uring/io-wq.c | 524 | ||||
-rw-r--r-- | io_uring/io_uring.c | 348 | ||||
-rw-r--r-- | io_uring/io_uring.h | 49 | ||||
-rw-r--r-- | io_uring/kbuf.c | 160 | ||||
-rw-r--r-- | io_uring/kbuf.h | 7 | ||||
-rw-r--r-- | io_uring/net.h | 5 | ||||
-rw-r--r-- | io_uring/notif.c | 8 | ||||
-rw-r--r-- | io_uring/notif.h | 3 | ||||
-rw-r--r-- | io_uring/poll.c | 32 | ||||
-rw-r--r-- | io_uring/rsrc.c | 350 | ||||
-rw-r--r-- | io_uring/rsrc.h | 72 | ||||
-rw-r--r-- | io_uring/rw.c | 8 | ||||
-rw-r--r-- | io_uring/timeout.c | 71 | ||||
-rw-r--r-- | io_uring/uring_cmd.c | 18 |
22 files changed, 949 insertions, 847 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0b8b4499e5ca..d101b3b0c7da 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -899,7 +899,8 @@ static int ext4_file_open(struct inode *inode, struct file *filp) return ret; } - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | + FMODE_DIO_PARALLEL_WRITE; return dquot_file_open(inode, filp); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 705250f9f90a..863289aaa441 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1171,7 +1171,8 @@ xfs_file_open( { if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; - file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC; + file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | + FMODE_DIO_PARALLEL_WRITE; return generic_file_open(inode, file); } diff --git a/include/linux/fs.h b/include/linux/fs.h index ef2281a2acce..67495ef79bb2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -168,6 +168,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOREUSE ((__force fmode_t)0x800000) +/* File supports non-exclusive O_DIRECT writes from multiple threads */ +#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 00689c12f6ab..1b2a20a42413 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -188,8 +188,10 @@ struct io_ev_fd { }; struct io_alloc_cache { - struct hlist_head list; + struct io_wq_work_node list; unsigned int nr_cached; + unsigned int max_cached; + size_t elem_size; }; struct io_ring_ctx { @@ -239,7 +241,6 @@ struct io_ring_ctx { * uring_lock, and updated through io_uring_register(2) */ struct io_rsrc_node *rsrc_node; - int rsrc_cached_refs; atomic_t cancel_seq; struct io_file_table file_table; unsigned nr_user_files; @@ -295,7 +296,7 @@ struct io_ring_ctx { spinlock_t completion_lock; bool poll_multi_queue; - bool cq_waiting; + atomic_t cq_wait_nr; /* * ->iopoll_list is protected by the ctx->uring_lock for @@ -325,16 +326,15 @@ struct io_ring_ctx { struct io_restriction restrictions; /* slow path rsrc auxilary data, used by update/register */ - struct io_rsrc_node *rsrc_backup_node; struct io_mapped_ubuf *dummy_ubuf; struct io_rsrc_data *file_data; struct io_rsrc_data *buf_data; - struct delayed_work rsrc_put_work; - struct callback_head rsrc_put_tw; - struct llist_head rsrc_put_llist; + /* protected by ->uring_lock */ struct list_head rsrc_ref_list; - spinlock_t rsrc_ref_lock; + struct io_alloc_cache rsrc_node_cache; + struct wait_queue_head rsrc_quiesce_wq; + unsigned rsrc_quiesce; struct list_head io_buffers_pages; @@ -366,6 +366,11 @@ struct io_ring_ctx { unsigned evfd_last_cq_tail; }; +struct io_tw_state { + /* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */ + bool locked; +}; + enum { REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, @@ -472,7 +477,7 @@ enum { REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), }; -typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); struct io_task_work { struct llist_node node; @@ -562,6 +567,7 @@ struct io_kiocb { atomic_t refs; atomic_t poll_refs; struct io_task_work io_task_work; + unsigned nr_tw; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ union { struct hlist_node hash_node; diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 936fd41bf147..69454f1f98b0 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -360,19 +360,18 @@ TRACE_EVENT(io_uring_complete, ); /** - * io_uring_submit_sqe - called before submitting one SQE + * io_uring_submit_req - called before submitting a request * * @req: pointer to a submitted request - * @force_nonblock: whether a context blocking or not * * Allows to track SQE submitting, to understand what was the source of it, SQ * thread or io_uring_enter call. */ -TRACE_EVENT(io_uring_submit_sqe, +TRACE_EVENT(io_uring_submit_req, - TP_PROTO(struct io_kiocb *req, bool force_nonblock), + TP_PROTO(struct io_kiocb *req), - TP_ARGS(req, force_nonblock), + TP_ARGS(req), TP_STRUCT__entry ( __field( void *, ctx ) @@ -380,7 +379,6 @@ TRACE_EVENT(io_uring_submit_sqe, __field( unsigned long long, user_data ) __field( u8, opcode ) __field( u32, flags ) - __field( bool, force_nonblock ) __field( bool, sq_thread ) __string( op_str, io_uring_get_opcode(req->opcode) ) @@ -392,16 +390,15 @@ TRACE_EVENT(io_uring_submit_sqe, __entry->user_data = req->cqe.user_data; __entry->opcode = req->opcode; __entry->flags = req->flags; - __entry->force_nonblock = force_nonblock; __entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL; __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " - "non block %d, sq_thread %d", __entry->ctx, __entry->req, + "sq_thread %d", __entry->ctx, __entry->req, __entry->user_data, __get_str(op_str), - __entry->flags, __entry->force_nonblock, __entry->sq_thread) + __entry->flags, __entry->sq_thread) ); /* diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 709de6d4feb2..0716cb17e436 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -250,6 +250,7 @@ enum io_uring_op { #define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) +#define IORING_TIMEOUT_MULTISHOT (1U << 6) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* @@ -389,6 +390,9 @@ enum { #define IORING_OFF_SQ_RING 0ULL #define IORING_OFF_CQ_RING 0x8000000ULL #define IORING_OFF_SQES 0x10000000ULL +#define IORING_OFF_PBUF_RING 0x80000000ULL +#define IORING_OFF_PBUF_SHIFT 16 +#define IORING_OFF_MMAP_MASK 0xf8000000ULL /* * Filled with the offset for mmap(2) @@ -568,19 +572,6 @@ struct io_uring_rsrc_update2 { __u32 resv2; }; -struct io_uring_notification_slot { - __u64 tag; - __u64 resv[3]; -}; - -struct io_uring_notification_register { - __u32 nr_slots; - __u32 resv; - __u64 resv2; - __u64 data; - __u64 resv3; -}; - /* Skip updating fd indexes set to this value in the fd table */ #define IORING_REGISTER_FILES_SKIP (-2) @@ -635,12 +626,26 @@ struct io_uring_buf_ring { }; }; +/* + * Flags for IORING_REGISTER_PBUF_RING. + * + * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. + * The application must not set a ring_addr in struct + * io_uring_buf_reg, instead it must subsequently call + * mmap(2) with the offset set as: + * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) + * to get a virtual mapping for the ring. + */ +enum { + IOU_PBUF_RING_MMAP = 1, +}; + /* argument for IORING_(UN)REGISTER_PBUF_RING */ struct io_uring_buf_reg { __u64 ring_addr; __u32 ring_entries; __u16 bgid; - __u16 pad; + __u16 flags; __u64 resv[3]; }; diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index c2cde88aeed5..241245cb54a6 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -7,47 +7,60 @@ #define IO_ALLOC_CACHE_MAX 512 struct io_cache_entry { - struct hlist_node node; + struct io_wq_work_node node; }; static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, struct io_cache_entry *entry) { - if (cache->nr_cached < IO_ALLOC_CACHE_MAX) { + if (cache->nr_cached < cache->max_cached) { cache->nr_cached++; - hlist_add_head(&entry->node, &cache->list); + wq_stack_add_head(&entry->node, &cache->list); + /* KASAN poisons object */ + kasan_slab_free_mempool(entry); return true; } return false; } +static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache) +{ + return !cache->list.next; +} + static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache) { - if (!hlist_empty(&cache->list)) { - struct hlist_node *node = cache->list.first; + if (cache->list.next) { + struct io_cache_entry *entry; - hlist_del(node); + entry = container_of(cache->list.next, struct io_cache_entry, node); + kasan_unpoison_range(entry, cache->elem_size); + cache->list.next = cache->list.next->next; cache->nr_cached--; - return container_of(node, struct io_cache_entry, node); + return entry; } return NULL; } -static inline void io_alloc_cache_init(struct io_alloc_cache *cache) +static inline void io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, size_t size) { - INIT_HLIST_HEAD(&cache->list); + cache->list.next = NULL; cache->nr_cached = 0; + cache->max_cached = max_nr; + cache->elem_size = size; } static inline void io_alloc_cache_free(struct io_alloc_cache *cache, void (*free)(struct io_cache_entry *)) { - while (!hlist_empty(&cache->list)) { - struct hlist_node *node = cache->list.first; + while (1) { + struct io_cache_entry *entry = io_alloc_cache_get(cache); - hlist_del(node); - free(container_of(node, struct io_cache_entry, node)); + if (!entry) + break; + free(entry); } cache->nr_cached = 0; } diff --git a/io_uring/filetable.c b/io_uring/filetable.c index b80614e7d605..0f6fa791a47d 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -64,7 +64,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, u32 slot_index) __must_hold(&req->ctx->uring_lock) { - bool needs_switch = false; struct io_fixed_file *file_slot; int ret; @@ -81,18 +80,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (file_slot->file_ptr) { struct file *old_file; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - goto err; - old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, slot_index, - ctx->rsrc_node, old_file); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, old_file); if (ret) - goto err; + return ret; + file_slot->file_ptr = 0; io_file_bitmap_clear(&ctx->file_table, slot_index); - needs_switch = true; } ret = io_scm_file_account(ctx, file); @@ -101,9 +95,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, slot_index); } -err: - if (needs_switch) - io_rsrc_node_switch(ctx, ctx->file_data); return ret; } @@ -156,9 +147,6 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) return -ENXIO; if (offset >= ctx->nr_user_files) return -EINVAL; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; offset = array_index_nospec(offset, ctx->nr_user_files); file_slot = io_fixed_file_slot(&ctx->file_table, offset); @@ -166,13 +154,12 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) return -EBADF; file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + ret = io_queue_rsrc_removal(ctx->file_data, offset, file); if (ret) return ret; file_slot->file_ptr = 0; io_file_bitmap_clear(&ctx->file_table, offset); - io_rsrc_node_switch(ctx, ctx->file_data); return 0; } diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index f81c0a7136a5..b2715988791e 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -15,6 +15,7 @@ #include <linux/cpu.h> #include <linux/task_work.h> #include <linux/audit.h> +#include <linux/mmu_context.h> #include <uapi/linux/io_uring.h> #include "io-wq.h" @@ -39,7 +40,7 @@ enum { }; /* - * One for each thread in a wqe pool + * One for each thread in a wq pool */ struct io_worker { refcount_t ref; @@ -47,7 +48,7 @@ struct io_worker { struct hlist_nulls_node nulls_node; struct list_head all_list; struct task_struct *task; - struct io_wqe *wqe; + struct io_wq *wq; struct io_wq_work *cur_work; struct io_wq_work *next_work; @@ -73,7 +74,7 @@ struct io_worker { #define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) -struct io_wqe_acct { +struct io_wq_acct { unsigned nr_workers; unsigned max_workers; int index; @@ -90,26 +91,6 @@ enum { }; /* - * Per-node worker thread pool - */ -struct io_wqe { - raw_spinlock_t lock; - struct io_wqe_acct acct[IO_WQ_ACCT_NR]; - - int node; - - struct hlist_nulls_head free_list; - struct list_head all_list; - - struct wait_queue_entry wait; - - struct io_wq *wq; - struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; - - cpumask_var_t cpu_mask; -}; - -/* * Per io_wq state */ struct io_wq { @@ -127,7 +108,19 @@ struct io_wq { struct task_struct *task; - struct io_wqe *wqes[]; + struct io_wq_acct acct[IO_WQ_ACCT_NR]; + + /* lock protects access to elements below */ + raw_spinlock_t lock; + + struct hlist_nulls_head free_list; + struct list_head all_list; + + struct wait_queue_entry wait; + + struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; + + cpumask_var_t cpu_mask; }; static enum cpuhp_state io_wq_online; @@ -140,10 +133,10 @@ struct io_cb_cancel_data { bool cancel_all; }; -static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); -static void io_wqe_dec_running(struct io_worker *worker); -static bool io_acct_cancel_pending_work(struct io_wqe *wqe, - struct io_wqe_acct *acct, +static bool create_io_worker(struct io_wq *wq, int index); +static void io_wq_dec_running(struct io_worker *worker); +static bool io_acct_cancel_pending_work(struct io_wq *wq, + struct io_wq_acct *acct, struct io_cb_cancel_data *match); static void create_worker_cb(struct callback_head *cb); static void io_wq_cancel_tw_create(struct io_wq *wq); @@ -159,20 +152,20 @@ static void io_worker_release(struct io_worker *worker) complete(&worker->ref_done); } -static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) +static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound) { - return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; + return &wq->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; } -static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, - struct io_wq_work *work) +static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, + struct io_wq_work *work) { - return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); + return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND)); } -static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) +static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) { - return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); + return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND); } static void io_worker_ref_put(struct io_wq *wq) @@ -183,14 +176,13 @@ static void io_worker_ref_put(struct io_wq *wq) static void io_worker_cancel_cb(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; atomic_dec(&acct->nr_running); - raw_spin_lock(&worker->wqe->lock); + raw_spin_lock(&wq->lock); acct->nr_workers--; - raw_spin_unlock(&worker->wqe->lock); + raw_spin_unlock(&wq->lock); io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); @@ -208,8 +200,7 @@ static bool io_task_worker_match(struct callback_head *cb, void *data) static void io_worker_exit(struct io_worker *worker) { - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq *wq = worker->wq; while (1) { struct callback_head *cb = task_work_cancel_match(wq->task, @@ -223,23 +214,23 @@ static void io_worker_exit(struct io_worker *worker) io_worker_release(worker); wait_for_completion(&worker->ref_done); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); - raw_spin_unlock(&wqe->lock); - io_wqe_dec_running(worker); + raw_spin_unlock(&wq->lock); + io_wq_dec_running(worker); worker->flags = 0; preempt_disable(); current->flags &= ~PF_IO_WORKER; preempt_enable(); kfree_rcu(worker, rcu); - io_worker_ref_put(wqe->wq); + io_worker_ref_put(wq); do_exit(0); } -static inline bool io_acct_run_queue(struct io_wqe_acct *acct) +static inline bool io_acct_run_queue(struct io_wq_acct *acct) { bool ret = false; @@ -256,8 +247,8 @@ static inline bool io_acct_run_queue(struct io_wqe_acct *acct) * Check head of free list for an available worker. If one isn't available, * caller must create one. */ -static bool io_wqe_activate_free_worker(struct io_wqe *wqe, - struct io_wqe_acct *acct) +static bool io_wq_activate_free_worker(struct io_wq *wq, + struct io_wq_acct *acct) __must_hold(RCU) { struct hlist_nulls_node *n; @@ -268,10 +259,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe, * activate. If a given worker is on the free_list but in the process * of exiting, keep trying. */ - hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + hlist_nulls_for_each_entry_rcu(worker, n, &wq->free_list, nulls_node) { if (!io_worker_get(worker)) continue; - if (io_wqe_get_acct(worker) != acct) { + if (io_wq_get_acct(worker) != acct) { io_worker_release(worker); continue; } @@ -289,7 +280,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe, * We need a worker. If we find a free one, we're good. If not, and we're * below the max number of workers, create one. */ -static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) +static bool io_wq_create_worker(struct io_wq *wq, struct io_wq_acct *acct) { /* * Most likely an attempt to queue unbounded work on an io_wq that @@ -298,21 +289,21 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) if (unlikely(!acct->max_workers)) pr_warn_once("io-wq is not configured for unbound workers"); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); if (acct->nr_workers >= acct->max_workers) { - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); return true; } acct->nr_workers++; - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - return create_io_worker(wqe->wq, wqe, acct->index); + atomic_inc(&wq->worker_refs); + return create_io_worker(wq, acct->index); } -static void io_wqe_inc_running(struct io_worker *worker) +static void io_wq_inc_running(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wq_acct *acct = io_wq_get_acct(worker); atomic_inc(&acct->nr_running); } @@ -321,22 +312,22 @@ static void create_worker_cb(struct callback_head *cb) { struct io_worker *worker; struct io_wq *wq; - struct io_wqe *wqe; - struct io_wqe_acct *acct; + + struct io_wq_acct *acct; bool do_create = false; worker = container_of(cb, struct io_worker, create_work); - wqe = worker->wqe; - wq = wqe->wq; - acct = &wqe->acct[worker->create_index]; - raw_spin_lock(&wqe->lock); + wq = worker->wq; + acct = &wq->acct[worker->create_index]; + raw_spin_lock(&wq->lock); + if (acct->nr_workers < acct->max_workers) { acct->nr_workers++; do_create = true; } - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); if (do_create) { - create_io_worker(wq, wqe, worker->create_index); + create_io_worker(wq, worker->create_index); } else { atomic_dec(&acct->nr_running); io_worker_ref_put(wq); @@ -346,11 +337,10 @@ static void create_worker_cb(struct callback_head *cb) } static bool io_queue_worker_create(struct io_worker *worker, - struct io_wqe_acct *acct, + struct io_wq_acct *acct, task_work_func_t func) { - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq *wq = worker->wq; /* raced with exit, just ignore create call */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) @@ -392,10 +382,10 @@ fail: return false; } -static void io_wqe_dec_running(struct io_worker *worker) +static void io_wq_dec_running(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; if (!(worker->flags & IO_WORKER_F_UP)) return; @@ -406,7 +396,7 @@ static void io_wqe_dec_running(struct io_worker *worker) return; atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); + atomic_inc(&wq->worker_refs); io_queue_worker_create(worker, acct, create_worker_cb); } @@ -414,29 +404,25 @@ static void io_wqe_dec_running(struct io_worker *worker) * Worker will start processing some work. Move it to the busy list, if * it's currently on the freelist */ -static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker) +static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) { if (worker->flags & IO_WORKER_F_FREE) { worker->flags &= ~IO_WORKER_F_FREE; - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); hlist_nulls_del_init_rcu(&worker->nulls_node); - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); } } /* - * No work, worker going to sleep. Move to freelist, and unuse mm if we - * have one attached. Dropping the mm may potentially sleep, so we drop - * the lock in that case and return success. Since the caller has to - * retry the loop in that case (we changed task state), we don't regrab - * the lock if we return success. + * No work, worker going to sleep. Move to freelist. */ -static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) - __must_hold(wqe->lock) +static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker) + __must_hold(wq->lock) { if (!(worker->flags & IO_WORKER_F_FREE)) { worker->flags |= IO_WORKER_F_FREE; - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); } } @@ -445,17 +431,16 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work) return work->flags >> IO_WQ_HASH_SHIFT; } -static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) +static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash) { - struct io_wq *wq = wqe->wq; bool ret = false; spin_lock_irq(&wq->hash->wait.lock); - if (list_empty(&wqe->wait.entry)) { - __add_wait_queue(&wq->hash->wait, &wqe->wait); + if (list_empty(&wq->wait.entry)) { + __add_wait_queue(&wq->hash->wait, &wq->wait); if (!test_bit(hash, &wq->hash->map)) { __set_current_state(TASK_RUNNING); - list_del_init(&wqe->wait.entry); + list_del_init(&wq->wait.entry); ret = true; } } @@ -463,14 +448,14 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) return ret; } -static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, +static struct io_wq_work *io_get_next_work(struct io_wq_acct *acct, struct io_worker *worker) __must_hold(acct->lock) { struct io_wq_work_node *node, *prev; struct io_wq_work *work, *tail; unsigned int stall_hash = -1U; - struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = worker->wq; wq_list_for_each(node, prev, &acct->work_list) { unsigned int hash; @@ -485,11 +470,11 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, hash = io_get_work_hash(work); /* all items with this hash lie in [work, tail] */ - tail = wqe->hash_tail[hash]; + tail = wq->hash_tail[hash]; /* hashed, can run if not already running */ - if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { - wqe->hash_tail[hash] = NULL; + if (!test_and_set_bit(hash, &wq->hash->map)) { + wq->hash_tail[hash] = NULL; wq_list_cut(&acct->work_list, &tail->list, prev); return work; } @@ -508,12 +493,12 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, */ set_bit(IO_ACCT_STALLED_BIT, &acct->flags); raw_spin_unlock(&acct->lock); - unstalled = io_wait_on_hash(wqe, stall_hash); + unstalled = io_wait_on_hash(wq, stall_hash); raw_spin_lock(&acct->lock); if (unstalled) { clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); - if (wq_has_sleeper(&wqe->wq->hash->wait)) - wake_up(&wqe->wq->hash->wait); + if (wq_has_sleeper(&wq->hash->wait)) + wake_up(&wq->hash->wait); } } @@ -534,13 +519,10 @@ static void io_assign_current_work(struct io_worker *worker, raw_spin_unlock(&worker->lock); } -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); - static void io_worker_handle_work(struct io_worker *worker) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); do { @@ -557,7 +539,7 @@ static void io_worker_handle_work(struct io_worker *worker) work = io_get_next_work(acct, worker); raw_spin_unlock(&acct->lock); if (work) { - __io_worker_busy(wqe, worker); + __io_worker_busy(wq, worker); /* * Make sure cancelation can find this, even before @@ -595,7 +577,7 @@ static void io_worker_handle_work(struct io_worker *worker) } io_assign_current_work(worker, work); if (linked) - io_wqe_enqueue(wqe, linked); + io_wq_enqueue(wq, linked); if (hash != -1U && !next_hashed) { /* serialize hash clear with wake_up() */ @@ -610,12 +592,11 @@ static void io_worker_handle_work(struct io_worker *worker) } while (1); } -static int io_wqe_worker(void *data) +static int io_wq_worker(void *data) { struct io_worker *worker = data; - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; bool exit_mask = false, last_timeout = false; char buf[TASK_COMM_LEN]; @@ -631,20 +612,20 @@ static int io_wqe_worker(void *data) while (io_acct_run_queue(acct)) io_worker_handle_work(worker); - raw_spin_lock(&wqe->lock); + raw_spin_lock(&wq->lock); /* * Last sleep timed out. Exit if we're not the last worker, * or if someone modified our affinity. */ if (last_timeout && (exit_mask || acct->nr_workers > 1)) { acct->nr_workers--; - raw_spin_unlock(&wqe->lock); + raw_spin_unlock(&wq->lock); __set_current_state(TASK_RUNNING); break; } last_timeout = false; - __io_worker_idle(wqe, worker); - raw_spin_unlock(&wqe->lock); + __io_worker_idle(wq, worker); + raw_spin_unlock(&wq->lock); if (io_run_task_work()) continue; ret = schedule_timeout(WORKER_IDLE_TIMEOUT); @@ -658,7 +639,7 @@ static int io_wqe_worker(void *data) if (!ret) { la |