summaryrefslogtreecommitdiff
path: root/io_uring/notif.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 13:37:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 13:37:55 -0700
commit42df1cbf6a4726934cc5dac12bf263aa73c49fa3 (patch)
tree129e32104dccc660c3c8fca03b8bba418e572d09 /io_uring/notif.c
parent98e247464088a11ce2328a214fdb87d4c06f8db6 (diff)
parent14b146b688ad9593f5eee93d51a34d09a47e50b5 (diff)
downloadlinux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.tar.gz
linux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.tar.bz2
linux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.zip
Merge tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk/linux-block
Pull io_uring zerocopy support from Jens Axboe: "This adds support for efficient support for zerocopy sends through io_uring. Both ipv4 and ipv6 is supported, as well as both TCP and UDP. The core network changes to support this is in a stable branch from Jakub that both io_uring and net-next has pulled in, and the io_uring changes are layered on top of that. All of the work has been done by Pavel" * tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk/linux-block: (34 commits) io_uring: notification completion optimisation io_uring: export req alloc from core io_uring/net: use unsigned for flags io_uring/net: make page accounting more consistent io_uring/net: checks errors of zc mem accounting io_uring/net: improve io_get_notif_slot types selftests/io_uring: test zerocopy send io_uring: enable managed frags with register buffers io_uring: add zc notification flush requests io_uring: rename IORING_OP_FILES_UPDATE io_uring: flush notifiers after sendzc io_uring: sendzc with fixed buffers io_uring: allow to pass addr into sendzc io_uring: account locked pages for non-fixed zc io_uring: wire send zc request type io_uring: add notification slot registration io_uring: add rsrc referencing for notifiers io_uring: complete notifiers in tw io_uring: cache struct io_notif io_uring: add zc notification infrastructure ...
Diffstat (limited to 'io_uring/notif.c')
-rw-r--r--io_uring/notif.c159
1 files changed, 159 insertions, 0 deletions
diff --git a/io_uring/notif.c b/io_uring/notif.c
new file mode 100644
index 000000000000..b5f989dff9de
--- /dev/null
+++ b/io_uring/notif.c
@@ -0,0 +1,159 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/io_uring.h>
+
+#include "io_uring.h"
+#include "notif.h"
+#include "rsrc.h"
+
+static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
+{
+ struct io_notif_data *nd = io_notif_to_data(notif);
+ struct io_ring_ctx *ctx = notif->ctx;
+
+ if (nd->account_pages && ctx->user) {
+ __io_unaccount_mem(ctx->user, nd->account_pages);
+ nd->account_pages = 0;
+ }
+ io_req_task_complete(notif, locked);
+}
+
+static inline void io_notif_complete(struct io_kiocb *notif)
+ __must_hold(&notif->ctx->uring_lock)
+{
+ bool locked = true;
+
+ __io_notif_complete_tw(notif, &locked);
+}
+
+static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
+ struct ubuf_info *uarg,
+ bool success)
+{
+ struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
+ struct io_kiocb *notif = cmd_to_io_kiocb(nd);
+
+ if (refcount_dec_and_test(&uarg->refcnt)) {
+ notif->io_task_work.func = __io_notif_complete_tw;
+ io_req_task_work_add(notif);
+ }
+}
+
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx,
+ struct io_notif_slot *slot)
+ __must_hold(&ctx->uring_lock)
+{
+ struct io_kiocb *notif;
+ struct io_notif_data *nd;
+
+ if (unlikely(!io_alloc_req_refill(ctx)))
+ return NULL;
+ notif = io_alloc_req(ctx);
+ notif->opcode = IORING_OP_NOP;
+ notif->flags = 0;
+ notif->file = NULL;
+ notif->task = current;
+ io_get_task_refs(1);
+ notif->rsrc_node = NULL;
+ io_req_set_rsrc_node(notif, ctx, 0);
+ notif->cqe.user_data = slot->tag;
+ notif->cqe.flags = slot->seq++;
+ notif->cqe.res = 0;
+
+ nd = io_notif_to_data(notif);
+ nd->account_pages = 0;
+ nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+ nd->uarg.callback = io_uring_tx_zerocopy_callback;
+ /* master ref owned by io_notif_slot, will be dropped on flush */
+ refcount_set(&nd->uarg.refcnt, 1);
+ return notif;
+}
+
+void io_notif_slot_flush(struct io_notif_slot *slot)
+ __must_hold(&ctx->uring_lock)
+{
+ struct io_kiocb *notif = slot->notif;
+ struct io_notif_data *nd = io_notif_to_data(notif);
+
+ slot->notif = NULL;
+
+ /* drop slot's master ref */
+ if (refcount_dec_and_test(&nd->uarg.refcnt))
+ io_notif_complete(notif);
+}
+
+__cold int io_notif_unregister(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
+{
+ int i;
+
+ if (!ctx->notif_slots)
+ return -ENXIO;
+
+ for (i = 0; i < ctx->nr_notif_slots; i++) {
+ struct io_notif_slot *slot = &ctx->notif_slots[i];
+ struct io_kiocb *notif = slot->notif;
+ struct io_notif_data *nd;
+
+ if (!notif)
+ continue;
+ nd = io_kiocb_to_cmd(notif);
+ slot->notif = NULL;
+ if (!refcount_dec_and_test(&nd->uarg.refcnt))
+ continue;
+ notif->io_task_work.func = __io_notif_complete_tw;
+ io_req_task_work_add(notif);
+ }
+
+ kvfree(ctx->notif_slots);
+ ctx->notif_slots = NULL;
+ ctx->nr_notif_slots = 0;
+ return 0;
+}
+
+__cold int io_notif_register(struct io_ring_ctx *ctx,
+ void __user *arg, unsigned int size)
+ __must_hold(&ctx->uring_lock)
+{
+ struct io_uring_notification_slot __user *slots;
+ struct io_uring_notification_slot slot;
+ struct io_uring_notification_register reg;
+ unsigned i;
+
+ BUILD_BUG_ON(sizeof(struct io_notif_data) > 64);
+
+ if (ctx->nr_notif_slots)
+ return -EBUSY;
+ if (size != sizeof(reg))
+ return -EINVAL;
+ if (copy_from_user(&reg, arg, sizeof(reg)))
+ return -EFAULT;
+ if (!reg.nr_slots || reg.nr_slots > IORING_MAX_NOTIF_SLOTS)
+ return -EINVAL;
+ if (reg.resv || reg.resv2 || reg.resv3)
+ return -EINVAL;
+
+ slots = u64_to_user_ptr(reg.data);
+ ctx->notif_slots = kvcalloc(reg.nr_slots, sizeof(ctx->notif_slots[0]),
+ GFP_KERNEL_ACCOUNT);
+ if (!ctx->notif_slots)
+ return -ENOMEM;
+
+ for (i = 0; i < reg.nr_slots; i++, ctx->nr_notif_slots++) {
+ struct io_notif_slot *notif_slot = &ctx->notif_slots[i];
+
+ if (copy_from_user(&slot, &slots[i], sizeof(slot))) {
+ io_notif_unregister(ctx);
+ return -EFAULT;
+ }
+ if (slot.resv[0] | slot.resv[1] | slot.resv[2]) {
+ io_notif_unregister(ctx);
+ return -EINVAL;
+ }
+ notif_slot->tag = slot.tag;
+ }
+ return 0;
+}