summaryrefslogtreecommitdiff
path: root/io_uring/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-05-16 12:31:19 -0600
committerJens Axboe <axboe@kernel.dk>2025-05-16 12:31:19 -0600
commit3de7361f7cd9c7b79acba5a3c72588ee1d83f031 (patch)
tree4bec6d1271fa15c10e26144278f83a5140d1ffb7 /io_uring/io_uring.c
parent2b61bb1d9aa601ec393054a61be0a707a5bea928 (diff)
parentd871198ee431d90f5308d53998c1ba1d5db5619a (diff)
downloadlinux-3de7361f7cd9c7b79acba5a3c72588ee1d83f031.tar.gz
linux-3de7361f7cd9c7b79acba5a3c72588ee1d83f031.tar.bz2
linux-3de7361f7cd9c7b79acba5a3c72588ee1d83f031.zip
Merge branch 'io_uring-6.15' into for-6.16/io_uring
Merge in 6.15 io_uring fixes, mostly so that the fdinfo changes can get easily extended without causing merge conflicts. * io_uring-6.15: io_uring/fdinfo: grab ctx->uring_lock around io_uring_show_fdinfo() io_uring/memmap: don't use page_address() on a highmem page io_uring/uring_cmd: fix hybrid polling initialization issue io_uring/sqpoll: Increase task_work submission batch size io_uring: ensure deferred completions are flushed for multishot io_uring: always arm linked timeouts prior to issue io_uring/fdinfo: annotate racy sq/cq head/tail reads io_uring: fix 'sync' handling of io_fallback_tw() io_uring: don't duplicate flushing in io_req_post_cqe
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r--io_uring/io_uring.c82
1 files changed, 38 insertions, 44 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a9b8d35349b..43c285cd2294 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -430,24 +430,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
return req->link;
}
-static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
-{
- if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
- return NULL;
- return __io_prep_linked_timeout(req);
-}
-
-static noinline void __io_arm_ltimeout(struct io_kiocb *req)
-{
- io_queue_linked_timeout(__io_prep_linked_timeout(req));
-}
-
-static inline void io_arm_ltimeout(struct io_kiocb *req)
-{
- if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
- __io_arm_ltimeout(req);
-}
-
static void io_prep_async_work(struct io_kiocb *req)
{
const struct io_issue_def *def = &io_issue_defs[req->opcode];
@@ -500,7 +482,6 @@ static void io_prep_async_link(struct io_kiocb *req)
static void io_queue_iowq(struct io_kiocb *req)
{
- struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->tctx;
BUG_ON(!tctx);
@@ -525,8 +506,6 @@ static void io_queue_iowq(struct io_kiocb *req)
trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
io_wq_enqueue(tctx->io_wq, &req->work);
- if (link)
- io_queue_linked_timeout(link);
}
static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw)
@@ -864,13 +843,26 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
struct io_ring_ctx *ctx = req->ctx;
bool posted;
+ /*
+ * If multishot has already posted deferred completions, ensure that
+ * those are flushed first before posting this one. If not, CQEs
+ * could get reordered.
+ */
+ if (!wq_list_empty(&ctx->submit_state.compl_reqs))
+ __io_submit_flush_completions(ctx);
+
lockdep_assert(!io_wq_current_is_worker());
lockdep_assert_held(&ctx->uring_lock);
- __io_cq_lock(ctx);
- posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+ if (!ctx->lockless_cq) {
+ spin_lock(&ctx->completion_lock);
+ posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+ spin_unlock(&ctx->completion_lock);
+ } else {
+ posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+ }
+
ctx->submit_state.cq_flush = true;
- __io_cq_unlock_post(ctx);
return posted;
}
@@ -1058,21 +1050,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync)
while (node) {
req = container_of(node, struct io_kiocb, io_task_work.node);
node = node->next;
- if (sync && last_ctx != req->ctx) {
+ if (last_ctx != req->ctx) {
if (last_ctx) {
- flush_delayed_work(&last_ctx->fallback_work);
+ if (sync)
+ flush_delayed_work(&last_ctx->fallback_work);
percpu_ref_put(&last_ctx->refs);
}
last_ctx = req->ctx;
percpu_ref_get(&last_ctx->refs);
}
- if (llist_add(&req->io_task_work.node,
- &req->ctx->fallback_llist))
- schedule_delayed_work(&req->ctx->fallback_work, 1);
+ if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist))
+ schedule_delayed_work(&last_ctx->fallback_work, 1);
}
if (last_ctx) {
- flush_delayed_work(&last_ctx->fallback_work);
+ if (sync)
+ flush_delayed_work(&last_ctx->fallback_work);
percpu_ref_put(&last_ctx->refs);
}
}
@@ -1684,15 +1677,22 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
return !!req->file;
}
+#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT)
+
static inline int __io_issue_sqe(struct io_kiocb *req,
unsigned int issue_flags,
const struct io_issue_def *def)
{
const struct cred *creds = NULL;
+ struct io_kiocb *link = NULL;
int ret;
- if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
- creds = override_creds(req->creds);
+ if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) {
+ if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
+ creds = override_creds(req->creds);
+ if (req->flags & REQ_F_ARM_LTIMEOUT)
+ link = __io_prep_linked_timeout(req);
+ }
if (!def->audit_skip)
audit_uring_entry(req->opcode);
@@ -1702,8 +1702,12 @@ static inline int __io_issue_sqe(struct io_kiocb *req,
if (!def->audit_skip)
audit_uring_exit(!ret, ret);
- if (creds)
- revert_creds(creds);
+ if (unlikely(creds || link)) {
+ if (creds)
+ revert_creds(creds);
+ if (link)
+ io_queue_linked_timeout(link);
+ }
return ret;
}
@@ -1729,7 +1733,6 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
if (ret == IOU_ISSUE_SKIP_COMPLETE) {
ret = 0;
- io_arm_ltimeout(req);
/* If the op doesn't have a file, we're not polling for it */
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
@@ -1784,8 +1787,6 @@ void io_wq_submit_work(struct io_wq_work *work)
else
req_ref_get(req);
- io_arm_ltimeout(req);
-
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
fail:
@@ -1902,15 +1903,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd)
static void io_queue_async(struct io_kiocb *req, int ret)
__must_hold(&req->ctx->uring_lock)
{
- struct io_kiocb *linked_timeout;
-
if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
io_req_defer_failed(req, ret);
return;
}
- linked_timeout = io_prep_linked_timeout(req);
-
switch (io_arm_poll_handler(req, 0)) {
case IO_APOLL_READY:
io_kbuf_recycle(req, 0);
@@ -1923,9 +1920,6 @@ static void io_queue_async(struct io_kiocb *req, int ret)
case IO_APOLL_OK:
break;
}
-
- if (linked_timeout)
- io_queue_linked_timeout(linked_timeout);
}
static inline void io_queue_sqe(struct io_kiocb *req)