diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 13:37:55 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 13:37:55 -0700 |
| commit | 42df1cbf6a4726934cc5dac12bf263aa73c49fa3 (patch) | |
| tree | 129e32104dccc660c3c8fca03b8bba418e572d09 /io_uring/notif.c | |
| parent | 98e247464088a11ce2328a214fdb87d4c06f8db6 (diff) | |
| parent | 14b146b688ad9593f5eee93d51a34d09a47e50b5 (diff) | |
| download | linux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.tar.gz linux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.tar.bz2 linux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.zip | |
Merge tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk/linux-block
Pull io_uring zerocopy support from Jens Axboe:
"This adds support for efficient support for zerocopy sends through
io_uring. Both ipv4 and ipv6 is supported, as well as both TCP and
UDP.
The core network changes to support this is in a stable branch from
Jakub that both io_uring and net-next has pulled in, and the io_uring
changes are layered on top of that.
All of the work has been done by Pavel"
* tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk/linux-block: (34 commits)
io_uring: notification completion optimisation
io_uring: export req alloc from core
io_uring/net: use unsigned for flags
io_uring/net: make page accounting more consistent
io_uring/net: checks errors of zc mem accounting
io_uring/net: improve io_get_notif_slot types
selftests/io_uring: test zerocopy send
io_uring: enable managed frags with register buffers
io_uring: add zc notification flush requests
io_uring: rename IORING_OP_FILES_UPDATE
io_uring: flush notifiers after sendzc
io_uring: sendzc with fixed buffers
io_uring: allow to pass addr into sendzc
io_uring: account locked pages for non-fixed zc
io_uring: wire send zc request type
io_uring: add notification slot registration
io_uring: add rsrc referencing for notifiers
io_uring: complete notifiers in tw
io_uring: cache struct io_notif
io_uring: add zc notification infrastructure
...
Diffstat (limited to 'io_uring/notif.c')
| -rw-r--r-- | io_uring/notif.c | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/io_uring/notif.c b/io_uring/notif.c new file mode 100644 index 000000000000..b5f989dff9de --- /dev/null +++ b/io_uring/notif.c @@ -0,0 +1,159 @@ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/file.h> +#include <linux/slab.h> +#include <linux/net.h> +#include <linux/io_uring.h> + +#include "io_uring.h" +#include "notif.h" +#include "rsrc.h" + +static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked) +{ + struct io_notif_data *nd = io_notif_to_data(notif); + struct io_ring_ctx *ctx = notif->ctx; + + if (nd->account_pages && ctx->user) { + __io_unaccount_mem(ctx->user, nd->account_pages); + nd->account_pages = 0; + } + io_req_task_complete(notif, locked); +} + +static inline void io_notif_complete(struct io_kiocb *notif) + __must_hold(¬if->ctx->uring_lock) +{ + bool locked = true; + + __io_notif_complete_tw(notif, &locked); +} + +static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, + struct ubuf_info *uarg, + bool success) +{ + struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); + struct io_kiocb *notif = cmd_to_io_kiocb(nd); + + if (refcount_dec_and_test(&uarg->refcnt)) { + notif->io_task_work.func = __io_notif_complete_tw; + io_req_task_work_add(notif); + } +} + +struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, + struct io_notif_slot *slot) + __must_hold(&ctx->uring_lock) +{ + struct io_kiocb *notif; + struct io_notif_data *nd; + + if (unlikely(!io_alloc_req_refill(ctx))) + return NULL; + notif = io_alloc_req(ctx); + notif->opcode = IORING_OP_NOP; + notif->flags = 0; + notif->file = NULL; + notif->task = current; + io_get_task_refs(1); + notif->rsrc_node = NULL; + io_req_set_rsrc_node(notif, ctx, 0); + notif->cqe.user_data = slot->tag; + notif->cqe.flags = slot->seq++; + notif->cqe.res = 0; + + nd = io_notif_to_data(notif); + nd->account_pages = 0; + nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; + nd->uarg.callback = io_uring_tx_zerocopy_callback; + /* master ref owned by io_notif_slot, will be dropped on flush */ + refcount_set(&nd->uarg.refcnt, 1); + return notif; +} + +void io_notif_slot_flush(struct io_notif_slot *slot) + __must_hold(&ctx->uring_lock) +{ + struct io_kiocb *notif = slot->notif; + struct io_notif_data *nd = io_notif_to_data(notif); + + slot->notif = NULL; + + /* drop slot's master ref */ + if (refcount_dec_and_test(&nd->uarg.refcnt)) + io_notif_complete(notif); +} + +__cold int io_notif_unregister(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + int i; + + if (!ctx->notif_slots) + return -ENXIO; + + for (i = 0; i < ctx->nr_notif_slots; i++) { + struct io_notif_slot *slot = &ctx->notif_slots[i]; + struct io_kiocb *notif = slot->notif; + struct io_notif_data *nd; + + if (!notif) + continue; + nd = io_kiocb_to_cmd(notif); + slot->notif = NULL; + if (!refcount_dec_and_test(&nd->uarg.refcnt)) + continue; + notif->io_task_work.func = __io_notif_complete_tw; + io_req_task_work_add(notif); + } + + kvfree(ctx->notif_slots); + ctx->notif_slots = NULL; + ctx->nr_notif_slots = 0; + return 0; +} + +__cold int io_notif_register(struct io_ring_ctx *ctx, + void __user *arg, unsigned int size) + __must_hold(&ctx->uring_lock) +{ + struct io_uring_notification_slot __user *slots; + struct io_uring_notification_slot slot; + struct io_uring_notification_register reg; + unsigned i; + + BUILD_BUG_ON(sizeof(struct io_notif_data) > 64); + + if (ctx->nr_notif_slots) + return -EBUSY; + if (size != sizeof(reg)) + return -EINVAL; + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + if (!reg.nr_slots || reg.nr_slots > IORING_MAX_NOTIF_SLOTS) + return -EINVAL; + if (reg.resv || reg.resv2 || reg.resv3) + return -EINVAL; + + slots = u64_to_user_ptr(reg.data); + ctx->notif_slots = kvcalloc(reg.nr_slots, sizeof(ctx->notif_slots[0]), + GFP_KERNEL_ACCOUNT); + if (!ctx->notif_slots) + return -ENOMEM; + + for (i = 0; i < reg.nr_slots; i++, ctx->nr_notif_slots++) { + struct io_notif_slot *notif_slot = &ctx->notif_slots[i]; + + if (copy_from_user(&slot, &slots[i], sizeof(slot))) { + io_notif_unregister(ctx); + return -EFAULT; + } + if (slot.resv[0] | slot.resv[1] | slot.resv[2]) { + io_notif_unregister(ctx); + return -EINVAL; + } + notif_slot->tag = slot.tag; + } + return 0; +} |
