summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/iou-zcrx.rst2
-rw-r--r--block/ioctl.c2
-rw-r--r--drivers/block/ublk_drv.c6
-rw-r--r--drivers/nvme/host/ioctl.c2
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/fuse/dev_uring.c8
-rw-r--r--include/linux/io_uring/cmd.h69
-rw-r--r--include/linux/io_uring_types.h31
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/trace/events/io_uring.h4
-rw-r--r--include/uapi/linux/io_uring.h38
-rw-r--r--include/uapi/linux/io_uring/query.h41
-rw-r--r--io_uring/Makefile2
-rw-r--r--io_uring/cancel.c1
-rw-r--r--io_uring/cmd_net.c3
-rw-r--r--io_uring/fdinfo.c24
-rw-r--r--io_uring/futex.c13
-rw-r--r--io_uring/io_uring.c145
-rw-r--r--io_uring/io_uring.h120
-rw-r--r--io_uring/kbuf.c67
-rw-r--r--io_uring/kbuf.h39
-rw-r--r--io_uring/net.c160
-rw-r--r--io_uring/nop.c17
-rw-r--r--io_uring/notif.c5
-rw-r--r--io_uring/opdef.c1
-rw-r--r--io_uring/openclose.c1
-rw-r--r--io_uring/poll.c4
-rw-r--r--io_uring/query.c101
-rw-r--r--io_uring/query.h9
-rw-r--r--io_uring/register.c60
-rw-r--r--io_uring/rsrc.c8
-rw-r--r--io_uring/rw.c63
-rw-r--r--io_uring/splice.c1
-rw-r--r--io_uring/uring_cmd.c83
-rw-r--r--io_uring/waitid.c4
-rw-r--r--io_uring/zcrx.c295
-rw-r--r--io_uring/zcrx.h19
37 files changed, 1001 insertions, 452 deletions
diff --git a/Documentation/networking/iou-zcrx.rst b/Documentation/networking/iou-zcrx.rst
index 0127319b30bb..54a72e172bdc 100644
--- a/Documentation/networking/iou-zcrx.rst
+++ b/Documentation/networking/iou-zcrx.rst
@@ -75,7 +75,7 @@ Create an io_uring instance with the following required setup flags::
IORING_SETUP_SINGLE_ISSUER
IORING_SETUP_DEFER_TASKRUN
- IORING_SETUP_CQE32
+ IORING_SETUP_CQE32 or IORING_SETUP_CQE_MIXED
Create memory area
------------------
diff --git a/block/ioctl.c b/block/ioctl.c
index f7b0006ca45d..c9ea8e53871e 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -776,7 +776,7 @@ static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags)
if (bic->res == -EAGAIN && bic->nowait)
io_uring_cmd_issue_blocking(cmd);
else
- io_uring_cmd_done(cmd, bic->res, 0, issue_flags);
+ io_uring_cmd_done(cmd, bic->res, issue_flags);
}
static void bio_cmd_bio_end_io(struct bio *bio)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 67d4a867aec4..8fdc26a61104 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1189,7 +1189,7 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req);
/* tell ublksrv one io request is coming */
- io_uring_cmd_done(cmd, res, 0, issue_flags);
+ io_uring_cmd_done(cmd, res, issue_flags);
}
#define UBLK_REQUEUE_DELAY_MS 3
@@ -1873,7 +1873,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
spin_unlock(&ubq->cancel_lock);
if (!done)
- io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, issue_flags);
+ io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, issue_flags);
}
/*
@@ -2520,7 +2520,7 @@ static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
if (ret != -EIOCBQUEUED)
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
+ io_uring_cmd_done(cmd, ret, issue_flags);
}
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 6b3ac8ae3f34..e28bb9113f64 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -410,7 +410,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
if (pdu->bio)
blk_rq_unmap_user(pdu->bio);
- io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags);
+ io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, issue_flags);
}
static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a454b5ba2097..185bef0df1c2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4695,7 +4695,7 @@ out:
btrfs_unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
+ io_uring_cmd_done(cmd, ret, issue_flags);
add_rchar(current, ret);
for (index = 0; index < priv->nr_pages; index++)
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 249b210becb1..a30c44234a4e 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -351,7 +351,7 @@ static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
spin_unlock(&queue->lock);
if (cmd)
- io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED);
+ io_uring_cmd_done(cmd, -ENOTCONN, IO_URING_F_UNLOCKED);
if (req)
fuse_uring_stop_fuse_req_end(req);
@@ -518,7 +518,7 @@ static void fuse_uring_cancel(struct io_uring_cmd *cmd,
if (need_cmd_done) {
/* no queue lock to avoid lock order issues */
- io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
+ io_uring_cmd_done(cmd, -ENOTCONN, issue_flags);
}
}
@@ -733,7 +733,7 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
list_move_tail(&ent->list, &queue->ent_in_userspace);
spin_unlock(&queue->lock);
- io_uring_cmd_done(cmd, 0, 0, issue_flags);
+ io_uring_cmd_done(cmd, 0, issue_flags);
return 0;
}
@@ -1200,7 +1200,7 @@ static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
ent->cmd = NULL;
spin_unlock(&queue->lock);
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
+ io_uring_cmd_done(cmd, ret, issue_flags);
}
/*
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index cfa6d0c0c322..7509025b4071 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -11,11 +11,14 @@
/* io_uring_cmd is being issued again */
#define IORING_URING_CMD_REISSUE (1U << 31)
+typedef void (*io_uring_cmd_tw_t)(struct io_uring_cmd *cmd,
+ unsigned issue_flags);
+
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
/* callback to defer completions to task context */
- void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
+ io_uring_cmd_tw_t task_work_cb;
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
@@ -53,11 +56,11 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
* Note: the caller should never hard code @issue_flags and is only allowed
* to pass the mask provided by the core io_uring code.
*/
-void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2,
- unsigned issue_flags);
+void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
+ unsigned issue_flags, bool is_cqe32);
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+ io_uring_cmd_tw_t task_work_cb,
unsigned flags);
/*
@@ -70,6 +73,21 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
/* Execute the request from a blocking context */
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
+/*
+ * Select a buffer from the provided buffer group for multishot uring_cmd.
+ * Returns the selected buffer address and size.
+ */
+struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd,
+ unsigned buf_group, size_t *len,
+ unsigned int issue_flags);
+
+/*
+ * Complete a multishot uring_cmd event. This will post a CQE to the completion
+ * queue and update the provided buffer.
+ */
+bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+ struct io_br_sel *sel, unsigned int issue_flags);
+
#else
static inline int
io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
@@ -86,13 +104,12 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
{
return -EOPNOTSUPP;
}
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
- u64 ret2, unsigned issue_flags)
+static inline void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
+ u64 ret2, unsigned issue_flags, bool is_cqe32)
{
}
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned),
- unsigned flags)
+ io_uring_cmd_tw_t task_work_cb, unsigned flags)
{
}
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
@@ -102,28 +119,28 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
{
}
-#endif
-
-/*
- * Polled completions must ensure they are coming from a poll queue, and
- * hence are completed inside the usual poll handling loops.
- */
-static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd,
- ssize_t ret, ssize_t res2)
+static inline struct io_br_sel
+io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group,
+ size_t *len, unsigned int issue_flags)
+{
+ return (struct io_br_sel) { .val = -EOPNOTSUPP };
+}
+static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+ struct io_br_sel *sel, unsigned int issue_flags)
{
- lockdep_assert(in_task());
- io_uring_cmd_done(ioucmd, ret, res2, 0);
+ return true;
}
+#endif
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+ io_uring_cmd_tw_t task_work_cb)
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+ io_uring_cmd_tw_t task_work_cb)
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
}
@@ -142,6 +159,18 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
return cmd_to_io_kiocb(cmd)->ctx;
}
+static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret,
+ unsigned issue_flags)
+{
+ return __io_uring_cmd_done(ioucmd, ret, 0, issue_flags, false);
+}
+
+static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret,
+ u64 res2, unsigned issue_flags)
+{
+ return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, true);
+}
+
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
void (*release)(void *), unsigned int index,
unsigned int issue_flags);
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 12f5ee43850e..c2ea6280901d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -86,6 +86,25 @@ struct io_mapped_region {
};
/*
+ * Return value from io_buffer_list selection, to avoid stashing it in
+ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
+ * across execution contexts are fine. But for ring provided buffers, the
+ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
+ * persists, it's better to just keep the buffer local for those cases.
+ */
+struct io_br_sel {
+ struct io_buffer_list *buf_list;
+ /*
+ * Some selection parts return the user address, others return an error.
+ */
+ union {
+ void __user *addr;
+ ssize_t val;
+ };
+};
+
+
+/*
* Arbitrary limit, can be raised if need be
*/
#define IO_RINGFD_REG_MAX 16
@@ -671,12 +690,6 @@ struct io_kiocb {
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
- /*
- * stores buffer ID for ring provided buffers, valid IFF
- * REQ_F_BUFFER_RING is set.
- */
- struct io_buffer_list *buf_list;
-
struct io_rsrc_node *buf_node;
};
@@ -724,10 +737,4 @@ struct io_overflow_cqe {
struct list_head list;
struct io_uring_cqe cqe;
};
-
-static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx)
-{
- return ctx->flags & IORING_SETUP_CQE32;
-}
-
#endif
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 8ca2235f78d5..299e2dd7da6d 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -90,4 +90,7 @@
/********** lib/stackdepot.c **********/
#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
+/********** io_uring/ **********/
+#define IO_URING_PTR_POISON ((void *)(0x1091UL + POISON_POINTER_DELTA))
+
#endif
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 178ab6f611be..45d15460b495 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -340,8 +340,8 @@ TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe),
__entry->user_data = cqe->user_data;
__entry->res = cqe->res;
__entry->cflags = cqe->flags;
- __entry->extra1 = io_ctx_cqe32(ctx) ? cqe->big_cqe[0] : 0;
- __entry->extra2 = io_ctx_cqe32(ctx) ? cqe->big_cqe[1] : 0;
+ __entry->extra1 = ctx->flags & IORING_SETUP_CQE32 || cqe->flags & IORING_CQE_F_32 ? cqe->big_cqe[0] : 0;
+ __entry->extra2 = ctx->flags & IORING_SETUP_CQE32 || cqe->flags & IORING_CQE_F_32 ? cqe->big_cqe[1] : 0;
),
TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6957dc539d83..a0cc1cc0dd01 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -225,6 +225,12 @@ enum io_uring_sqe_flags_bit {
/* Use hybrid poll in iopoll process */
#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
+/*
+ * Allow both 16b and 32b CQEs. If a 32b CQE is posted, it will have
+ * IORING_CQE_F_32 set in cqe->flags.
+ */
+#define IORING_SETUP_CQE_MIXED (1U << 18)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -298,9 +304,13 @@ enum io_uring_op {
* sqe->uring_cmd_flags top 8bits aren't available for userspace
* IORING_URING_CMD_FIXED use registered buffer; pass this flag
* along with setting sqe->buf_index.
+ * IORING_URING_CMD_MULTISHOT must be used with buffer select, like other
+ * multishot commands. Not compatible with
+ * IORING_URING_CMD_FIXED, for now.
*/
#define IORING_URING_CMD_FIXED (1U << 0)
-#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED
+#define IORING_URING_CMD_MULTISHOT (1U << 1)
+#define IORING_URING_CMD_MASK (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
/*
@@ -454,6 +464,7 @@ enum io_uring_msg_ring_flags {
#define IORING_NOP_FIXED_FILE (1U << 2)
#define IORING_NOP_FIXED_BUFFER (1U << 3)
#define IORING_NOP_TW (1U << 4)
+#define IORING_NOP_CQE32 (1U << 5)
/*
* IO completion data structure (Completion Queue Entry)
@@ -487,12 +498,22 @@ struct io_uring_cqe {
* other provided buffer type, all completions with a
* buffer passed back is automatically returned to the
* application.
+ * IORING_CQE_F_SKIP If set, then the application/liburing must ignore this
+ * CQE. It's only purpose is to fill a gap in the ring,
+ * if a large CQE is attempted posted when the ring has
+ * just a single small CQE worth of space left before
+ * wrapping.
+ * IORING_CQE_F_32 If set, this is a 32b/big-cqe posting. Use with rings
+ * setup in a mixed CQE mode, where both 16b and 32b
+ * CQEs may be posted to the CQ ring.
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
#define IORING_CQE_F_NOTIF (1U << 3)
#define IORING_CQE_F_BUF_MORE (1U << 4)
+#define IORING_CQE_F_SKIP (1U << 5)
+#define IORING_CQE_F_32 (1U << 15)
#define IORING_CQE_BUFFER_SHIFT 16
@@ -665,6 +686,12 @@ enum io_uring_register_op {
IORING_REGISTER_MEM_REGION = 34,
+ /* query various aspects of io_uring, see linux/io_uring/query.h */
+ IORING_REGISTER_QUERY = 35,
+
+ /* return zcrx buffers back into circulation */
+ IORING_REGISTER_ZCRX_REFILL = 36,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -1046,6 +1073,15 @@ struct io_uring_zcrx_ifq_reg {
__u64 __resv[3];
};
+struct io_uring_zcrx_sync_refill {
+ __u32 zcrx_id;
+ /* the number of entries to return */
+ __u32 nr_entries;
+ /* pointer to an array of struct io_uring_zcrx_rqe */
+ __u64 rqes;
+ __u64 __resv[2];
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
new file mode 100644
index 000000000000..5d754322a27c
--- /dev/null
+++ b/include/uapi/linux/io_uring/query.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring query interface.
+ */
+#ifndef LINUX_IO_URING_QUERY_H
+#define LINUX_IO_URING_QUERY_H
+
+#include <linux/types.h>
+
+struct io_uring_query_hdr {
+ __u64 next_entry;
+ __u64 query_data;
+ __u32 query_op;
+ __u32 size;
+ __s32 result;
+ __u32 __resv[3];
+};
+
+enum {
+ IO_URING_QUERY_OPCODES = 0,
+
+ __IO_URING_QUERY_MAX,
+};
+
+/* Doesn't require a ring */
+struct io_uring_query_opcode {
+ /* The number of supported IORING_OP_* opcodes */
+ __u32 nr_request_opcodes;
+ /* The number of supported IORING_[UN]REGISTER_* opcodes */
+ __u32 nr_register_opcodes;
+ /* Bitmask of all supported IORING_FEAT_* flags */
+ __u64 feature_flags;
+ /* Bitmask of all supported IORING_SETUP_* flags */
+ __u64 ring_setup_flags;
+ /* Bitmask of all supported IORING_ENTER_** flags */
+ __u64 enter_flags;
+ /* Bitmask of all supported IOSQE_* flags */
+ __u64 sqe_flags;
+};
+
+#endif
diff --git a/io_uring/Makefile b/io_uring/Makefile
index b3f1bd492804..bc4e4a3fa0a5 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
sync.o msg_ring.o advise.o openclose.o \
statx.o timeout.o cancel.o \
waitid.o register.o truncate.o \
- memmap.o alloc_cache.o
+ memmap.o alloc_cache.o query.o
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 6d57602304df..64b51e82baa2 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -11,6 +11,7 @@
#include <uapi/linux/io_uring.h>
+#include "filetable.h"
#include "io_uring.h"
#include "tctx.h"
#include "poll.h"
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c
index 3866fe6ff541..27a09aa4c9d0 100644
--- a/io_uring/cmd_net.c
+++ b/io_uring/cmd_net.c
@@ -4,6 +4,7 @@
#include <net/sock.h>
#include "uring_cmd.h"
+#include "io_uring.h"
static inline int io_uring_cmd_getsockopt(struct socket *sock,
struct io_uring_cmd *cmd,
@@ -73,7 +74,7 @@ static bool io_process_timestamp_skb(struct io_uring_cmd *cmd, struct sock *sk,
cqe->user_data = 0;
cqe->res = tskey;
- cqe->flags = IORING_CQE_F_MORE;
+ cqe->flags = IORING_CQE_F_MORE | ctx_cqe32_flags(cmd_to_io_kiocb(cmd)->ctx);
cqe->flags |= tstype << IORING_TIMESTAMP_TYPE_SHIFT;
if (ret == SOF_TIMESTAMPING_TX_HARDWARE)
cqe->flags |= IORING_CQE_F_TSTAMP_HW;
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 9798d6fb4ec7..ff3364531c77 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -9,7 +9,7 @@
#include <uapi/linux/io_uring.h>
-#include "io_uring.h"
+#include "filetable.h"
#include "sqpoll.h"
#include "fdinfo.h"
#include "cancel.h"
@@ -65,15 +65,12 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
unsigned int sq_tail = READ_ONCE(r->sq.tail);
unsigned int cq_head = READ_ONCE(r->cq.head);
unsigned int cq_tail = READ_ONCE(r->cq.tail);
- unsigned int cq_shift = 0;
unsigned int sq_shift = 0;
- unsigned int sq_entries, cq_entries;
+ unsigned int sq_entries;
int sq_pid = -1, sq_cpu = -1;
u64 sq_total_time = 0, sq_work_time = 0;
unsigned int i;
- if (ctx->flags & IORING_SETUP_CQE32)
- cq_shift = 1;
if (ctx->flags & IORING_SETUP_SQE128)
sq_shift = 1;
@@ -125,18 +122,23 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "\n");
}
seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head);
- cq_entries = min(cq_tail - cq_head, ctx->cq_entries);
- for (i = 0; i < cq_entries; i++) {
- unsigned int entry = i + cq_head;
- struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift];
+ while (cq_head < cq_tail) {
+ struct io_uring_cqe *cqe;
+ bool cqe32 = false;
+ cqe = &r->cqes[(cq_head & cq_mask)];
+ if (cqe->flags & IORING_CQE_F_32 || ctx->flags & IORING_SETUP_CQE32)
+ cqe32 = true;
seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x",
- entry & cq_mask, cqe->user_data, cqe->res,
+ cq_head & cq_mask, cqe->user_data, cqe->res,
cqe->flags);
- if (cq_shift)
+ if (cqe32)
seq_printf(m, ", extra1:%llu, extra2:%llu\n",
cqe->big_cqe[0], cqe->big_cqe[1]);
seq_printf(m, "\n");
+ cq_head++;
+ if (cqe32)
+ cq_head++;
}
if (ctx->flags & IORING_SETUP_SQPOLL) {
diff --git a/io_uring/futex.c b/io_uring/futex.c
index 9113a44984f3..64f3bd51c84c 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -43,7 +43,6 @@ void io_futex_cache_free(struct io_ring_ctx *ctx)
static void __io_futex_complete(struct io_kiocb *req, io_tw_token_t tw)
{
- req->async_data = NULL;
hlist_del_init(&req->hash_node);
io_req_task_complete(req, tw);
}
@@ -54,6 +53,7 @@ static void io_futex_complete(struct io_kiocb *req, io_tw_token_t tw)
io_tw_lock(ctx, tw);
io_cache_free(&ctx->futex_cache, req->async_data);
+ io_req_async_data_clear(req, 0);
__io_futex_complete(req, tw);
}
@@ -72,8 +72,7 @@ static void io_futexv_complete(struct io_kiocb *req, io_tw_token_t tw)
io_req_set_res(req, res, 0);
}
- kfree(req->async_data);
- req->flags &= ~REQ_F_ASYNC_DATA;
+ io_req_async_data_free(req);
__io_futex_complete(req, tw);
}
@@ -232,9 +231,7 @@ int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)
io_ring_submit_unlock(ctx, issue_flags);
req_set_fail(req);
io_req_set_res(req, ret, 0);
- kfree(futexv);
- req->async_data = NULL;
- req->flags &= ~REQ_F_ASYNC_DATA;
+ io_req_async_data_free(req);
return IOU_COMPLETE;
}
@@ -310,9 +307,7 @@ done:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- req->async_data = NULL;
- req->flags &= ~REQ_F_ASYNC_DATA;
- kfree(ifd);
+ io_req_async_data_free(req);
return IOU_COMPLETE;
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 93665cebe9bd..49ebdeb5b2d9 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -79,6 +79,7 @@
#include "io-wq.h"
+#include "filetable.h"
#include "io_uring.h"
#include "opdef.h"
#include "refs.h"
@@ -108,9 +109,6 @@
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
IOSQE_IO_HARDLINK | IOSQE_ASYNC)
-#define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \
- IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
-
#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
@@ -179,6 +177,26 @@ static const struct ctl_table kernel_io_uring_disabled_table[] = {
};
#endif
+static void io_poison_cached_req(struct io_kiocb *req)
+{
+ req->ctx = IO_URING_PTR_POISON;
+ req->tctx = IO_URING_PTR_POISON;
+ req->file = IO_URING_PTR_POISON;
+ req->creds = IO_URING_PTR_POISON;
+ req->io_task_work.func = IO_URING_PTR_POISON;
+ req->apoll = IO_URING_PTR_POISON;
+}
+
+static void io_poison_req(struct io_kiocb *req)
+{
+ io_poison_cached_req(req);
+ req->async_data = IO_URING_PTR_POISON;
+ req->kbuf = IO_URING_PTR_POISON;
+ req->comp_list.next = IO_URING_PTR_POISON;
+ req->file_node = IO_URING_PTR_POISON;
+ req->link = IO_URING_PTR_POISON;
+}
+
static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
{
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
@@ -235,6 +253,8 @@ static inline void req_fail_link_node(struct io_kiocb *req, int res)
static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx)
{
+ if (IS_ENABLED(CONFIG_KASAN))
+ io_poison_cached_req(req);
wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
}
@@ -594,27 +614,29 @@ static void io_cq_unlock_post(struct io_ring_ctx *ctx)
static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying)
{
- size_t cqe_size = sizeof(struct io_uring_cqe);
-
lockdep_assert_held(&ctx->uring_lock);
/* don't abort if we're dying, entries must get freed */
if (!dying && __io_cqring_events(ctx) == ctx->cq_entries)
return;
- if (ctx->flags & IORING_SETUP_CQE32)
- cqe_size <<= 1;
-
io_cq_lock(ctx);
while (!list_empty(&ctx->cq_overflow_list)) {
+ size_t cqe_size = sizeof(struct io_uring_cqe);
struct io_uring_cqe *cqe;
struct io_overflow_cqe *ocqe;
+ bool is_cqe32 = false;
ocqe = list_first_entry(&ctx->cq_overflow_list,
struct io_overflow_cqe, list);
+ if (ocqe->cqe.flags & IORING_CQE_F_32 ||
+ ctx->flags & IORING_SETUP_CQE32) {
+ is_cqe32 = true;
+ cqe_size <<= 1;
+ }
if (!dying) {
- if (!io_get_cqe_overflow(ctx, &cqe, true))
+ if (!io_get_cqe_overflow(ctx, &cqe, true, is_cqe32))
break;
memcpy(cqe, &ocqe->cqe, cqe_size);
}
@@ -726,10 +748,12 @@ static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
{
struct io_overflow_cqe *ocqe;
size_t ocq_size = sizeof(struct io_overflow_cqe);
- bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+ bool is_cqe32 = false;
- if (is_cqe32)
+ if (cqe->flags & IORING_CQE_F_32 || ctx->flags & IORING_SETUP_CQE32) {
+ is_cqe32 = true;
ocq_si