summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/readdir.c63
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/inode.c7
-rw-r--r--fs/io-wq.c10
-rw-r--r--fs/io_uring.c690
-rw-r--r--fs/locks.c2
-rw-r--r--fs/notify/fsnotify.c4
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/super.c4
11 files changed, 430 insertions, 356 deletions
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ce9bac756c2a..40705e862451 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1693,6 +1693,7 @@ struct cifs_fattr {
struct timespec64 cf_atime;
struct timespec64 cf_mtime;
struct timespec64 cf_ctime;
+ u32 cf_cifstag;
};
static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 3925a7bfc74d..d17587c2c4ab 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -139,6 +139,28 @@ retry:
dput(dentry);
}
+static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
+{
+ if (!(fattr->cf_cifsattrs & ATTR_REPARSE))
+ return false;
+ /*
+ * The DFS tags should be only intepreted by server side as per
+ * MS-FSCC 2.1.2.1, but let's include them anyway.
+ *
+ * Besides, if cf_cifstag is unset (0), then we still need it to be
+ * revalidated to know exactly what reparse point it is.
+ */
+ switch (fattr->cf_cifstag) {
+ case IO_REPARSE_TAG_DFS:
+ case IO_REPARSE_TAG_DFSR:
+ case IO_REPARSE_TAG_SYMLINK:
+ case IO_REPARSE_TAG_NFS:
+ case 0:
+ return true;
+ }
+ return false;
+}
+
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -158,7 +180,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
* is a symbolic link, DFS referral or a reparse point with a direct
* access like junctions, deduplicated files, NFS symlinks.
*/
- if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ if (reparse_file_needs_reval(fattr))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
/* non-unix readdir doesn't provide nlink */
@@ -194,19 +216,37 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
}
}
+static void __dir_info_to_fattr(struct cifs_fattr *fattr, const void *info)
+{
+ const FILE_DIRECTORY_INFO *fi = info;
+
+ memset(fattr, 0, sizeof(*fattr));
+ fattr->cf_cifsattrs = le32_to_cpu(fi->ExtFileAttributes);
+ fattr->cf_eof = le64_to_cpu(fi->EndOfFile);
+ fattr->cf_bytes = le64_to_cpu(fi->AllocationSize);
+ fattr->cf_createtime = le64_to_cpu(fi->CreationTime);
+ fattr->cf_atime = cifs_NTtimeToUnix(fi->LastAccessTime);
+ fattr->cf_ctime = cifs_NTtimeToUnix(fi->ChangeTime);
+ fattr->cf_mtime = cifs_NTtimeToUnix(fi->LastWriteTime);
+}
+
void
cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
struct cifs_sb_info *cifs_sb)
{
- memset(fattr, 0, sizeof(*fattr));
- fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
- fattr->cf_eof = le64_to_cpu(info->EndOfFile);
- fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
- fattr->cf_createtime = le64_to_cpu(info->CreationTime);
- fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
- fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
- fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+ __dir_info_to_fattr(fattr, info);
+ cifs_fill_common_info(fattr, cifs_sb);
+}
+static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
+ SEARCH_ID_FULL_DIR_INFO *info,
+ struct cifs_sb_info *cifs_sb)
+{
+ __dir_info_to_fattr(fattr, info);
+
+ /* See MS-FSCC 2.4.18 FileIdFullDirectoryInformation */
+ if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ fattr->cf_cifstag = le32_to_cpu(info->EaSize);
cifs_fill_common_info(fattr, cifs_sb);
}
@@ -755,6 +795,11 @@ static int cifs_filldir(char *find_entry, struct file *file,
(FIND_FILE_STANDARD_INFO *)find_entry,
cifs_sb);
break;
+ case SMB_FIND_FILE_ID_FULL_DIR_INFO:
+ cifs_fulldir_info_to_fattr(&fattr,
+ (SEARCH_ID_FULL_DIR_INFO *)find_entry,
+ cifs_sb);
+ break;
default:
cifs_dir_info_to_fattr(&fattr,
(FILE_DIRECTORY_INFO *)find_entry,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 8b0b512c5792..afe1f03aabe3 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -67,7 +67,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
goto out;
- if (oparms->tcon->use_resilient) {
+ if (oparms->tcon->use_resilient) {
/* default timeout is 0, servers pick default (120 seconds) */
nr_ioctl_req.Timeout =
cpu_to_le32(oparms->tcon->handle_timeout);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d31b6c72b476..dc1a1d5d825b 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -35,11 +35,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
- cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/inode.c b/fs/inode.c
index fef457a42882..96d62d97694e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -676,6 +676,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next;
LIST_HEAD(dispose);
+again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
@@ -698,6 +699,12 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
+ if (need_resched()) {
+ spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
+ dispose_list(&dispose);
+ goto again;
+ }
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 11e80b7252a8..541c8a3e0bbb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -92,7 +92,6 @@ struct io_wqe {
struct io_wqe_acct acct[2];
struct hlist_nulls_head free_list;
- struct hlist_nulls_head busy_list;
struct list_head all_list;
struct io_wq *wq;
@@ -327,7 +326,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE;
hlist_nulls_del_init_rcu(&worker->nulls_node);
- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->busy_list);
}
/*
@@ -365,7 +363,6 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
{
if (!(worker->flags & IO_WORKER_F_FREE)) {
worker->flags |= IO_WORKER_F_FREE;
- hlist_nulls_del_init_rcu(&worker->nulls_node);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
}
@@ -432,6 +429,8 @@ next:
if (signal_pending(current))
flush_signals(current);
+ cond_resched();
+
spin_lock_irq(&worker->lock);
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
@@ -798,10 +797,6 @@ void io_wq_cancel_all(struct io_wq *wq)
set_bit(IO_WQ_BIT_CANCEL, &wq->state);
- /*
- * Browse both lists, as there's a gap between handing work off
- * to a worker and the worker putting itself on the busy_list
- */
rcu_read_lock();
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
@@ -1049,7 +1044,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
- INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1);
INIT_LIST_HEAD(&wqe->all_list);
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f084e3cf835..562e3a1a1bf9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -330,6 +330,26 @@ struct io_timeout {
struct file *file;
u64 addr;
int flags;
+ unsigned count;
+};
+
+struct io_rw {
+ /* NOTE: kiocb has the file as the first member, so don't do it here */
+ struct kiocb kiocb;
+ u64 addr;
+ u64 len;
+};
+
+struct io_connect {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int addr_len;
+};
+
+struct io_sr_msg {
+ struct file *file;
+ struct user_msghdr __user *msg;
+ int msg_flags;
};
struct io_async_connect {
@@ -351,7 +371,6 @@ struct io_async_rw {
};
struct io_async_ctx {
- struct io_uring_sqe sqe;
union {
struct io_async_rw rw;
struct io_async_msghdr msg;
@@ -369,15 +388,16 @@ struct io_async_ctx {
struct io_kiocb {
union {
struct file *file;
- struct kiocb rw;
+ struct io_rw rw;
struct io_poll_iocb poll;
struct io_accept accept;
struct io_sync sync;
struct io_cancel cancel;
struct io_timeout timeout;
+ struct io_connect connect;
+ struct io_sr_msg sr_msg;
};
- const struct io_uring_sqe *sqe;
struct io_async_ctx *io;
struct file *ring_file;
int ring_fd;
@@ -411,7 +431,6 @@ struct io_kiocb {
#define REQ_F_INFLIGHT 16384 /* on inflight list */
#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
-#define REQ_F_PREPPED 131072 /* request already opcode prepared */
u64 user_data;
u32 result;
u32 sequence;
@@ -609,33 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
{
bool do_hashed = false;
- if (req->sqe) {
- switch (req->opcode) {
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
- /* only regular files should be hashed for writes */
- if (req->flags & REQ_F_ISREG)
- do_hashed = true;
- /* fall-through */
- case IORING_OP_READV:
- case IORING_OP_READ_FIXED:
- case IORING_OP_SENDMSG:
- case IORING_OP_RECVMSG:
- case IORING_OP_ACCEPT:
- case IORING_OP_POLL_ADD:
- case IORING_OP_CONNECT:
- /*
- * We know REQ_F_ISREG is not set on some of these
- * opcodes, but this enables us to keep the check in
- * just one place.
- */
- if (!(req->flags & REQ_F_ISREG))
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- break;
- }
- if (io_req_needs_user(req))
- req->work.flags |= IO_WQ_WORK_NEEDS_USER;
+ switch (req->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ /* only regular files should be hashed for writes */
+ if (req->flags & REQ_F_ISREG)
+ do_hashed = true;
+ /* fall-through */
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_SENDMSG:
+ case IORING_OP_RECVMSG:
+ case IORING_OP_ACCEPT:
+ case IORING_OP_POLL_ADD:
+ case IORING_OP_CONNECT:
+ /*
+ * We know REQ_F_ISREG is not set on some of these
+ * opcodes, but this enables us to keep the check in
+ * just one place.
+ */
+ if (!(req->flags & REQ_F_ISREG))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+ break;
}
+ if (io_req_needs_user(req))
+ req->work.flags |= IO_WQ_WORK_NEEDS_USER;
*link = io_prep_linked_timeout(req);
return do_hashed;
@@ -1180,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
/*
* Move completed entries to our local list. If we find a
@@ -1335,7 +1352,7 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -1347,7 +1364,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
io_complete_rw_common(kiocb, res);
io_put_req(req);
@@ -1355,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_kiocb *nxt = NULL;
io_complete_rw_common(kiocb, res);
@@ -1366,7 +1383,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -1400,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
list);
- if (list_req->rw.ki_filp != req->rw.ki_filp)
+ if (list_req->file != req->file)
ctx->poll_multi_file = true;
}
@@ -1471,11 +1488,11 @@ static bool io_file_supports_async(struct file *file)
return false;
}
-static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
+static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio;
int ret;
@@ -1524,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
+
+ req->rw.addr = READ_ONCE(sqe->addr);
+ req->rw.len = READ_ONCE(sqe->len);
+ /* we own ->private, reuse it for the buffer index */
+ req->rw.kiocb.private = (void *) (unsigned long)
+ READ_ONCE(sqe->buf_index);
return 0;
}
@@ -1557,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
io_rw_done(kiocb, ret);
}
-static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
- const struct io_uring_sqe *sqe,
+static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
struct iov_iter *iter)
{
- size_t len = READ_ONCE(sqe->len);
+ struct io_ring_ctx *ctx = req->ctx;
+ size_t len = req->rw.len;
struct io_mapped_ubuf *imu;
unsigned index, buf_index;
size_t offset;
@@ -1571,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
if (unlikely(!ctx->user_bufs))
return -EFAULT;
- buf_index = READ_ONCE(sqe->buf_index);
+ buf_index = (unsigned long) req->rw.kiocb.private;
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = &ctx->user_bufs[index];
- buf_addr = READ_ONCE(sqe->addr);
+ buf_addr = req->rw.addr;
/* overflow */
if (buf_addr + len < buf_addr)
@@ -1634,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter)
{
- const struct io_uring_sqe *sqe = req->sqe;
- void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
- size_t sqe_len = READ_ONCE(sqe->len);
+ void __user *buf = u64_to_user_ptr(req->rw.addr);
+ size_t sqe_len = req->rw.len;
u8 opcode;
- /*
- * We're reading ->opcode for the second time, but the first read
- * doesn't care whether it's _FIXED or not, so it doesn't matter
- * whether ->opcode changes concurrently. The first read does care
- * about whether it is a READ or a WRITE, so we don't trust this read
- * for that purpose and instead let the caller pass in the read/write
- * flag.
- */
opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL;
- return io_import_fixed(req->ctx, rw, sqe, iter);
+ return io_import_fixed(req, rw, iter);
}
+ /* buffer index only valid with fixed read/write */
+ if (req->rw.kiocb.private)
+ return -EINVAL;
+
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
@@ -1750,13 +1768,7 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
static int io_alloc_async_ctx(struct io_kiocb *req)
{
req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
- if (req->io) {
- memcpy(&req->io->sqe, req->sqe, sizeof(req->io->sqe));
- req->sqe = &req->io->sqe;
- return 0;
- }
-
- return 1;
+ return req->io == NULL;
}
static void io_rw_async(struct io_wq_work **workptr)
@@ -1782,46 +1794,52 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
return 0;
}
-static int io_read_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF;
- return io_import_iovec(READ, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t io_size, ret;
- if (!req->io) {
- ret = io_read_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(READ, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(READ, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = req->file;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1830,20 +1848,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(file)) {
+ if (force_nonblock && !io_file_supports_async(req->file)) {
req->flags |= REQ_F_MUST_PUNT;
goto copy_iov;
}
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
- if (file->f_op->read_iter)
- ret2 = call_read_iter(file, kiocb, &iter);
+ if (req->file->f_op->read_iter)
+ ret2 = call_read_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+ ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
/*
* In case of a short read, punt to async. This can happen
@@ -1875,46 +1893,52 @@ out_free:
return ret;
}
-static int io_write_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
- return io_import_iovec(WRITE, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t ret, io_size;
- if (!req->io) {
- ret = io_write_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(WRITE, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(WRITE, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = kiocb->ki_filp;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1934,7 +1958,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov;
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
@@ -1946,17 +1970,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
- __sb_start_write(file_inode(file)->i_sb,
+ __sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true);
- __sb_writers_release(file_inode(file)->i_sb,
+ __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
kiocb->ki_flags |= IOCB_WRITE;
- if (file->f_op->write_iter)
- ret2 = call_write_iter(file, kiocb, &iter);
+ if (req->file->f_op->write_iter)
+ ret2 = call_write_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
+ ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async);
} else {
@@ -1989,13 +2013,10 @@ static int io_nop(struct io_kiocb *req)
return 0;
}
-static int io_prep_fsync(struct io_kiocb *req)
+static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (!req->file)
return -EBADF;
@@ -2010,7 +2031,6 @@ static int io_prep_fsync(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2036,7 +2056,7 @@ static void io_fsync_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
- ret = vfs_fsync_range(req->rw.ki_filp, req->sync.off,
+ ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0)
@@ -2051,11 +2071,6 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_wq_work *work, *old_work;
- int ret;
-
- ret = io_prep_fsync(req);
- if (ret)
- return ret;
/* fsync always requires a blocking context */
if (force_nonblock) {
@@ -2071,13 +2086,10 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
return 0;
}
-static int io_prep_sfr(struct io_kiocb *req)
+static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (!req->file)
return -EBADF;
@@ -2089,7 +2101,6 @@ static int io_prep_sfr(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len);
req->sync.flags = READ_ONCE(sqe->sync_range_flags);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2102,7 +2113,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
- ret = sync_file_range(req->rw.ki_filp, req->sync.off, req->sync.len,
+ ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
if (ret < 0)
req_set_fail_links(req);
@@ -2116,11 +2127,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_wq_work *work, *old_work;
- int ret;
-
- ret = io_prep_sfr(req);
- if (ret)
- return ret;
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
@@ -2149,19 +2155,23 @@ static void io_sendrecv_async(struct io_wq_work **workptr)
}
#endif
-static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
+
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
- return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov);
+ return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2169,7 +2179,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2183,12 +2192,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
struct sockaddr_storage addr;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
if (req->io) {
kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr;
@@ -2197,13 +2200,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
+ struct io_sr_msg *sr = &req->sr_msg;
+
kmsg = &io.msg;
kmsg->msg.msg_name = &addr;
- ret = io_sendmsg_prep(req, &io);
+
+ io.msg.iov = io.msg.fast_iov;
+ ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (force_nonblock && ret == -EAGAIN) {
if (req->io)
@@ -2218,7 +2232,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
-out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
io_cqring_add_event(req, ret);
@@ -2231,20 +2244,24 @@ out:
#endif
}
-static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_recvmsg_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
+
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
- return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr,
- &io->msg.iov);
+ return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.uaddr, &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2252,7 +2269,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2262,19 +2278,10 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret);
if (sock) {
- struct user_msghdr __user *msg;
struct io_async_ctx io;
struct sockaddr_storage addr;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
- msg = (struct user_msghdr __user *) (unsigned long)
- READ_ONCE(sqe->addr);
if (req->io) {
kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr;
@@ -2283,14 +2290,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
+ struct io_sr_msg *sr = &req->sr_msg;
+
kmsg = &io.msg;
kmsg->msg.msg_name = &addr;
- ret = io_recvmsg_prep(req, &io);
+
+ io.msg.iov = io.msg.fast_iov;
+ ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.uaddr,
+ &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
- ret = __sys_recvmsg_sock(sock, &kmsg->msg, msg, kmsg->uaddr, flags);
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
+ kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN) {
if (req->io)
return -EAGAIN;
@@ -2304,7 +2324,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
-out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
io_cqring_add_event(req, ret);
@@ -2317,25 +2336,19 @@ out:
#endif
}
-static int io_accept_prep(struct io_kiocb *req)
+static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_accept *accept = &req->accept;
- if (req->flags & REQ_F_PREPPED)
- return 0;
-
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index)
return -EINVAL;
- accept->addr = (struct sockaddr __user *)
- (unsigned long) READ_ONCE(sqe->addr);
- accept->addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2);
+ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
- req->flags |= REQ_F_PREPPED;
return 0;
#else
return -EOPNOTSUPP;
@@ -2383,10 +2396,6 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#if defined(CONFIG_NET)
int ret;
- ret = io_accept_prep(req);
- if (ret)
- return ret;
-
ret = __io_accept(req, nxt, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_ac