diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-28 15:07:04 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-28 15:07:04 -0700 |
| commit | eff5f16bfd87ae48c56751741af41a825d5d4618 (patch) | |
| tree | 9b1e58d1038902a754107b35621d428ba24f5165 /io_uring/io_uring.c | |
| parent | 6df9d086ffcb6b0521872fef5f9f4dd1907abb9a (diff) | |
| parent | 6889ae1b4df1579bcdffef023e2ea9a982565dff (diff) | |
| download | linux-eff5f16bfd87ae48c56751741af41a825d5d4618.tar.gz linux-eff5f16bfd87ae48c56751741af41a825d5d4618.tar.bz2 linux-eff5f16bfd87ae48c56751741af41a825d5d4618.zip | |
Merge tag 'for-6.15/io_uring-reg-vec-20250327' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe:
"Final separate updates for io_uring.
This started out as a series of cleanups improvements and improvements
for registered buffers, but as the last series of the io_uring changes
for 6.15, it also collected a few fixes for the other branches on top:
- Add support for vectored fixed/registered buffers.
Previously only single segments have been supported for commands,
now vectored variants are supported as well. This series includes
networking and file read/write support.
- Small series unifying return codes across multi and single shot.
- Small series cleaning up registerd buffer importing.
- Adding support for vectored registered buffers for uring_cmd.
- Fix for io-wq handling of command reissue.
- Various little fixes and tweaks"
* tag 'for-6.15/io_uring-reg-vec-20250327' of git://git.kernel.dk/linux: (25 commits)
io_uring/net: fix io_req_post_cqe abuse by send bundle
io_uring/net: use REQ_F_IMPORT_BUFFER for send_zc
io_uring: move min_events sanitisation
io_uring: rename "min" arg in io_iopoll_check()
io_uring: open code __io_post_aux_cqe()
io_uring: defer iowq cqe overflow via task_work
io_uring: fix retry handling off iowq
io_uring/net: only import send_zc buffer once
io_uring/cmd: introduce io_uring_cmd_import_fixed_vec
io_uring/cmd: add iovec cache for commands
io_uring/cmd: don't expose entire cmd async data
io_uring: rename the data cmd cache
io_uring: rely on io_prep_reg_vec for iovec placement
io_uring: introduce io_prep_reg_iovec()
io_uring: unify STOP_MULTISHOT with IOU_OK
io_uring: return -EAGAIN to continue multishot
io_uring: cap cached iovec/bvec size
io_uring/net: implement vectored reg bufs for zctx
io_uring/net: convert to struct iou_vec
io_uring/net: pull vec alloc out of msghdr import
...
Diffstat (limited to 'io_uring/io_uring.c')
| -rw-r--r-- | io_uring/io_uring.c | 65 |
1 files changed, 27 insertions, 38 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f743581cc81b..3ba49c628337 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -289,7 +289,7 @@ static void io_free_alloc_caches(struct io_ring_ctx *ctx) io_alloc_cache_free(&ctx->apoll_cache, kfree); io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); - io_alloc_cache_free(&ctx->uring_cache, kfree); + io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free); io_alloc_cache_free(&ctx->msg_cache, kfree); io_futex_cache_free(ctx); io_rsrc_cache_free(ctx); @@ -334,8 +334,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_async_rw), offsetof(struct io_async_rw, clear)); - ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_uring_cmd_data), 0); + ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX, + sizeof(struct io_async_cmd), + sizeof(struct io_async_cmd)); spin_lock_init(&ctx->msg_lock); ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_kiocb), 0); @@ -833,24 +834,14 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, return false; } -static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, - u32 cflags) +bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags) { bool filled; + io_cq_lock(ctx); filled = io_fill_cqe_aux(ctx, user_data, res, cflags); if (!filled) filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); - - return filled; -} - -bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags) -{ - bool filled; - - io_cq_lock(ctx); - filled = __io_post_aux_cqe(ctx, user_data, res, cflags); io_cq_unlock_post(ctx); return filled; } @@ -891,6 +882,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; + bool completed = true; /* * All execution paths but io-wq use the deferred completions by @@ -903,19 +895,21 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags) * Handle special CQ sync cases via task_work. DEFER_TASKRUN requires * the submitter task context, IOPOLL protects with uring_lock. */ - if (ctx->lockless_cq) { + if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) { +defer_complete: req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); return; } io_cq_lock(ctx); - if (!(req->flags & REQ_F_CQE_SKIP)) { - if (!io_fill_cqe_req(ctx, req)) - io_req_cqe_overflow(req); - } + if (!(req->flags & REQ_F_CQE_SKIP)) + completed = io_fill_cqe_req(ctx, req); io_cq_unlock_post(ctx); + if (!completed) + goto defer_complete; + /* * We don't free the request here because we know it's called from * io-wq only, which holds a reference, so it cannot be the last put. @@ -1511,11 +1505,13 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int io_iopoll_check(struct io_ring_ctx *ctx, long min) +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) { unsigned int nr_events = 0; unsigned long check_cq; + min_events = min(min_events, ctx->cq_entries); + lockdep_assert_held(&ctx->uring_lock); if (!io_allowed_run_tw(ctx)) @@ -1557,7 +1553,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) io_task_work_pending(ctx)) { u32 tail = ctx->cached_cq_tail; - (void) io_run_local_work_locked(ctx, min); + (void) io_run_local_work_locked(ctx, min_events); if (task_work_pending(current) || wq_list_empty(&ctx->iopoll_list)) { @@ -1570,7 +1566,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) wq_list_empty(&ctx->iopoll_list)) break; } - ret = io_do_iopoll(ctx, !min); + ret = io_do_iopoll(ctx, !min_events); if (unlikely(ret < 0)) return ret; @@ -1580,7 +1576,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) break; nr_events += ret; - } while (nr_events < min); + } while (nr_events < min_events); return 0; } @@ -1791,10 +1787,7 @@ int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw) ret = __io_issue_sqe(req, issue_flags, &io_issue_defs[req->opcode]); - WARN_ON_ONCE(ret == IOU_OK); - - if (ret == IOU_ISSUE_SKIP_COMPLETE) - ret = 0; + WARN_ON_ONCE(ret == IOU_ISSUE_SKIP_COMPLETE); return ret; } @@ -1847,7 +1840,7 @@ fail: * Don't allow any multishot execution from io-wq. It's more restrictive * than necessary and also cleaner. */ - if (req->flags & REQ_F_APOLL_MULTISHOT) { + if (req->flags & (REQ_F_MULTISHOT|REQ_F_APOLL_MULTISHOT)) { err = -EBADFD; if (!io_file_can_poll(req)) goto fail; @@ -1858,7 +1851,7 @@ fail: goto fail; return; } else { - req->flags &= ~REQ_F_APOLL_MULTISHOT; + req->flags &= ~(REQ_F_APOLL_MULTISHOT|REQ_F_MULTISHOT); } } @@ -2549,6 +2542,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, ktime_t start_time; int ret; + min_events = min_t(int, min_events, ctx->cq_entries); + if (!io_allowed_run_tw(ctx)) return -EEXIST; if (io_local_work_pending(ctx)) @@ -3435,22 +3430,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, mutex_lock(&ctx->uring_lock); iopoll_locked: ret2 = io_validate_ext_arg(ctx, flags, argp, argsz); - if (likely(!ret2)) { - min_complete = min(min_complete, - ctx->cq_entries); + if (likely(!ret2)) ret2 = io_iopoll_check(ctx, min_complete); - } mutex_unlock(&ctx->uring_lock); } else { struct ext_arg ext_arg = { .argsz = argsz }; ret2 = io_get_ext_arg(ctx, flags, argp, &ext_arg); - if (likely(!ret2)) { - min_complete = min(min_complete, - ctx->cq_entries); + if (likely(!ret2)) ret2 = io_cqring_wait(ctx, min_complete, flags, &ext_arg); - } } if (!ret) { |
