From c1c03ee7957ec178756cae09c39d77194e8cddb7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Jan 2025 09:44:21 -0700 Subject: io_uring/rsrc: fixup io_clone_buffers() error handling Jann reports he can trigger a UAF if the target ring unregisters buffers before the clone operation is fully done. And additionally also an issue related to node allocation failures. Both of those stemp from the fact that the cleanup logic puts the buffers manually, rather than just relying on io_rsrc_data_free() doing it. Hence kill the manual cleanup code and just let io_rsrc_data_free() handle it, it'll put the nodes appropriately. Reported-by: Jann Horn Fixes: 3597f2786b68 ("io_uring/rsrc: unify file and buffer resource tables") Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'io_uring') diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 077f84684c18..69937d0c94f9 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -997,7 +997,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; - goto out_put_free; + goto out_unlock; } refcount_inc(&src_node->buf->refs); @@ -1033,14 +1033,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx mutex_lock(&src_ctx->uring_lock); /* someone raced setting up buffers, dump ours */ ret = -EBUSY; -out_put_free: - i = data.nr; - while (i--) { - if (data.nodes[i]) { - io_buffer_unmap(src_ctx, data.nodes[i]); - kfree(data.nodes[i]); - } - } out_unlock: io_rsrc_data_free(ctx, &data); mutex_unlock(&src_ctx->uring_lock); -- cgit v1.2.3 From 8911798d3e8a9624b1acf2882c7a0183694d714d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Jan 2025 07:39:12 -0700 Subject: io_uring/register: use stable SQ/CQ ring data during resize Normally the kernel would not expect an application to modify any of the data shared with the kernel during a resize operation, but of course the kernel cannot always assume good intent on behalf of the application. As part of resizing the rings, existing SQEs and CQEs are copied over to the new storage. Resizing uses the masks in the newly allocated shared storage to index the arrays, however it's possible that malicious userspace could modify these after they have been sanity checked. Use the validated and locally stored CQ and SQ ring sizing for masking to ensure the values are both stable and valid. Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS") Reported-by: Jann Horn Signed-off-by: Jens Axboe --- io_uring/register.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'io_uring') diff --git a/io_uring/register.c b/io_uring/register.c index fdd44914c39c..5880eb75ae44 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -514,7 +514,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) goto overflow; for (i = o.rings->sq.head; i < tail; i++) { unsigned src_head = i & (ctx->sq_entries - 1); - unsigned dst_head = i & n.rings->sq_ring_mask; + unsigned dst_head = i & (p.sq_entries - 1); n.sq_sqes[dst_head] = o.sq_sqes[src_head]; } @@ -533,7 +533,7 @@ overflow: } for (i = o.rings->cq.head; i < tail; i++) { unsigned src_head = i & (ctx->cq_entries - 1); - unsigned dst_head = i & n.rings->cq_ring_mask; + unsigned dst_head = i & (p.cq_entries - 1); n.rings->cqes[dst_head] = o.rings->cqes[src_head]; } -- cgit v1.2.3 From 2c5aae129f427f83eeba5efbfb4e60a777cd073c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Jan 2025 08:23:55 -0700 Subject: io_uring/register: document io_register_resize_rings() shared mem usage It can be a bit hard to tell which parts of io_register_resize_rings() are operating on shared memory, and which ones are not. And anything reading or writing to those regions should really use the read/write once primitives. Hence add those, ensuring sanity in how this memory is accessed, and helping document the shared nature of it. Signed-off-by: Jens Axboe --- io_uring/register.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'io_uring') diff --git a/io_uring/register.c b/io_uring/register.c index 5880eb75ae44..ffcbc840032e 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -449,10 +449,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (IS_ERR(n.rings)) return PTR_ERR(n.rings); - n.rings->sq_ring_mask = p.sq_entries - 1; - n.rings->cq_ring_mask = p.cq_entries - 1; - n.rings->sq_ring_entries = p.sq_entries; - n.rings->cq_ring_entries = p.cq_entries; + /* + * At this point n.rings is shared with userspace, just like o.rings + * is as well. While we don't expect userspace to modify it while + * a resize is in progress, and it's most likely that userspace will + * shoot itself in the foot if it does, we can't always assume good + * intent... Use read/write once helpers from here on to indicate the + * shared nature of it. + */ + WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1); + WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1); + WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries); + WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); if (copy_to_user(arg, &p, sizeof(p))) { io_register_free_rings(&p, &n); @@ -509,20 +517,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) * rings can't hold what is already there, then fail the operation. */ n.sq_sqes = ptr; - tail = o.rings->sq.tail; - if (tail - o.rings->sq.head > p.sq_entries) + tail = READ_ONCE(o.rings->sq.tail); + if (tail - READ_ONCE(o.rings->sq.head) > p.sq_entries) goto overflow; - for (i = o.rings->sq.head; i < tail; i++) { + for (i = READ_ONCE(o.rings->sq.head); i < tail; i++) { unsigned src_head = i & (ctx->sq_entries - 1); unsigned dst_head = i & (p.sq_entries - 1); n.sq_sqes[dst_head] = o.sq_sqes[src_head]; } - n.rings->sq.head = o.rings->sq.head; - n.rings->sq.tail = o.rings->sq.tail; + WRITE_ONCE(n.rings->sq.head, READ_ONCE(o.rings->sq.head)); + WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail)); - tail = o.rings->cq.tail; - if (tail - o.rings->cq.head > p.cq_entries) { + tail = READ_ONCE(o.rings->cq.tail); + if (tail - READ_ONCE(o.rings->cq.head) > p.cq_entries) { overflow: /* restore old rings, and return -EOVERFLOW via cleanup path */ ctx->rings = o.rings; @@ -531,21 +539,21 @@ overflow: ret = -EOVERFLOW; goto out; } - for (i = o.rings->cq.head; i < tail; i++) { + for (i = READ_ONCE(o.rings->cq.head); i < tail; i++) { unsigned src_head = i & (ctx->cq_entries - 1); unsigned dst_head = i & (p.cq_entries - 1); n.rings->cqes[dst_head] = o.rings->cqes[src_head]; } - n.rings->cq.head = o.rings->cq.head; - n.rings->cq.tail = o.rings->cq.tail; + WRITE_ONCE(n.rings->cq.head, READ_ONCE(o.rings->cq.head)); + WRITE_ONCE(n.rings->cq.tail, READ_ONCE(o.rings->cq.tail)); /* invalidate cached cqe refill */ ctx->cqe_cached = ctx->cqe_sentinel = NULL; - n.rings->sq_dropped = o.rings->sq_dropped; - n.rings->sq_flags = o.rings->sq_flags; - n.rings->cq_flags = o.rings->cq_flags; - n.rings->cq_overflow = o.rings->cq_overflow; + WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); + WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags)); + WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); + WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); /* all done, store old pointers and assign new ones */ if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) -- cgit v1.2.3 From 6f7a644eb7db10f9993039bab7740f7982d4edf4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Jan 2025 08:39:15 -0700 Subject: io_uring/register: cache old SQ/CQ head reading for copies The SQ and CQ ring heads are read twice - once for verifying that it's within bounds, and once inside the loops copying SQE and CQE entries. This is technically incorrect, in case the values could get modified in between verifying them and using them in the copy loop. While this won't lead to anything truly nefarious, it may cause longer loop times for the copies than expected. Read the ring head values once, and use the verified value in the copy loops. Signed-off-by: Jens Axboe --- io_uring/register.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'io_uring') diff --git a/io_uring/register.c b/io_uring/register.c index ffcbc840032e..371aec87e078 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -405,8 +405,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) { struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; size_t size, sq_array_offset; + unsigned i, tail, old_head; struct io_uring_params p; - unsigned i, tail; void *ptr; int ret; @@ -518,9 +518,10 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) */ n.sq_sqes = ptr; tail = READ_ONCE(o.rings->sq.tail); - if (tail - READ_ONCE(o.rings->sq.head) > p.sq_entries) + old_head = READ_ONCE(o.rings->sq.head); + if (tail - old_head > p.sq_entries) goto overflow; - for (i = READ_ONCE(o.rings->sq.head); i < tail; i++) { + for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->sq_entries - 1); unsigned dst_head = i & (p.sq_entries - 1); @@ -530,7 +531,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail)); tail = READ_ONCE(o.rings->cq.tail); - if (tail - READ_ONCE(o.rings->cq.head) > p.cq_entries) { + old_head = READ_ONCE(o.rings->cq.head); + if (tail - old_head > p.cq_entries) { overflow: /* restore old rings, and return -EOVERFLOW via cleanup path */ ctx->rings = o.rings; @@ -539,7 +541,7 @@ overflow: ret = -EOVERFLOW; goto out; } - for (i = READ_ONCE(o.rings->cq.head); i < tail; i++) { + for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->cq_entries - 1); unsigned dst_head = i & (p.cq_entries - 1); -- cgit v1.2.3