// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/nospec.h>
#include <linux/hugetlb.h>
#include <linux/compat.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include <uapi/linux/io_uring.h>
#include "filetable.h"
#include "io_uring.h"
#include "openclose.h"
#include "rsrc.h"
#include "memmap.h"
#include "register.h"
struct io_rsrc_update {
struct file *file;
u64 arg;
u32 nr_args;
u32 offset;
};
static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
struct iovec *iov, struct page **last_hpage);
/* only define max */
#define IORING_MAX_FIXED_FILES (1U << 20)
#define IORING_MAX_REG_BUFFERS (1U << 14)
#define IO_CACHED_BVECS_SEGS 32
int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
{
unsigned long page_limit, cur_pages, new_pages;
if (!nr_pages)
return 0;
/* Don't allow more pages than we can safely lock */
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
cur_pages = atomic_long_read(&user->locked_vm);
do {
new_pages = cur_pages + nr_pages;
if (new_pages > page_limit)
return -ENOMEM;
} while (!atomic_long_try_cmpxchg(&user->locked_vm,
&cur_pages, new_pages));
return 0;
}
void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
{
if (ctx->user)
__io_unaccount_mem(ctx->user, nr_pages);
if (ctx->mm_account)
atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
}
int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
{
int ret;
if (ctx->user) {
ret = __io_account_mem(ctx->user, nr_pages);
if (ret)
return ret;
}
if (ctx->mm_account)
atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
return 0;
}
int io_validate_user_buf_range(u64 uaddr, u64 ulen)
{
unsigned long tmp, base = (unsigned long)uaddr;
unsigned long acct_len = (unsigned long)PAGE_ALIGN(ulen);
/* arbitrary limit, but we need something */
if (ulen > SZ_1G || !ulen)
return -EFAULT;
if (check_add_overflow(base, acct_len, &tmp))
return -EOVERFLOW;
return 0;
}
static int io_buffer_validate(struct iovec *iov)
{
/*
* Don't impose further limits on the size and buffer
* constraints here, we'll -EINVAL later when IO is
* submitted if they are wrong.
*/
if (!iov->iov_base)
return iov->iov_len ? -EFAULT : 0;
return io_validate_user_buf_range((unsigned long)iov->iov_base,
iov->iov_len);
}
static void io_release_ubuf(void *priv)
{
struct io_mapped_ubuf *imu = priv;
unsigned int i;
for (i = 0; i < imu->nr_bvecs; i++) {
struct folio *folio = page_folio(imu->bvec[i].bv_page);
unpin_user_folio(folio, 1);
}
}
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
int nr_bvecs)
{
if (nr_bvecs <= IO_CACHED_BVECS_SEGS)
return io_cache_alloc(&ctx->imu_cache, GFP_KERNEL);
return kvmalloc(struct_size_t(struct io_mapped_ubuf, bvec, nr_bvecs),
GFP_KERNEL);
}
static void io_free_imu(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
{
if (imu->nr_bvecs <= IO_CACHED_BVECS_SEGS)
io_cache_free(&ctx->imu_cache, imu);
else
kvfree(imu);
}
static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
{
if (unlikely(refcount_read(&imu->refs) > 1)) {
if (!refcount_dec_and_test(&imu->refs))
return;
}
if (imu->acct_pages)
io_unaccount_mem(ctx, imu->acct_pages);
imu->release(imu->priv);
io_free_imu(ctx, imu);
}
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
{
struct io_rsrc_node *node;
node = io_cache_alloc(&ctx->node_cache, GFP_KERNEL);
if (node) {
node->type = type;
node->refs = 1;
node->tag = 0;
node->file_ptr = 0;
}
return node;
}
bool io_rsrc_cache_init(struct io_ring_ctx *ctx)
{
const int imu_cache_size = struct_size_t(struct io_mapped_ubuf, bvec,
IO_CACHED_BVECS_SEGS);
const int node_size = sizeof(struct io_rsrc_node);
bool ret;
ret = io_alloc_cache_init(&ctx->node_cache, IO_ALLOC_CACHE_MAX,
node_size, 0);
ret |= io_alloc_cache_init(&ctx->imu_cache, IO_ALLOC_CACHE_MAX,
imu_cache_size, 0);
return ret;
}
void io_rsrc_cache_free(struct io_ring_ctx *ctx)
{
io_alloc_cache_free(&ctx->node_cache, kfree);
io_alloc_cache_free(&ctx->imu_cache, kfree);
}
static void io_clear_table_tags(struct io_rsrc_data *data)
{
int i;
for (i = 0; i < data->nr; i++) {
struct io_rsrc_node *node = data->nodes[i];
if (node)
node->tag = 0;
}
}
__cold void io_rsrc_data_free(struct io_ring_ctx *ctx,
struct io_rsrc_data *data)
{
if (!data->nr)
return;
while (data->nr--) {
if (data->nodes[data->nr])
io_put_rsrc_node(ctx, data->nodes[data->nr]);
}
kvfree(data->nodes);
data->nodes = NULL;
data->nr = 0;
}
__cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr)
{
data->nodes = kvmalloc_array(nr, sizeof(struct io_rsrc_node *),
GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (data->nodes) {
data->nr = nr;
return 0;
}
return -ENOMEM;
}
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned nr_args)
{
u64 __user *tags = u64_to_user_ptr(up->tags);
__s32 __user *fds = u64_to_user_ptr(up->data);
int fd, i, err = 0;
unsigned int done;
if (!ctx->file_table.data.nr)
return -ENXIO;
if (up->offset + nr_args > ctx->file_table.data.nr)
return -EINVAL;
for (done = 0; done < nr_args; done++) {
u64 tag = 0;
if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) ||
copy_from_user(&fd, &fds[done], sizeof(fd))) {
err = -EFAULT;
break;
}
if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) {
err = -EINVAL;
break;
}
if (fd == IORING_REGISTER_FILES_SKIP)
continue;
i = up->offset + done;
if (io_reset_rsrc_node(ctx, &ctx->file_table.data, i))
io_file_bitmap_clear(&ctx->file_table, i);
if (fd != -1) {
struct file *file = fget(fd);
struct io_rsrc_node *node;
if (!file) {
err = -EBADF;
break;
}
/*
* Don't allow io_uring instances to be registered.
*/
if (io_is_uring_fops(file)) {
fput(file);
err = -EBADF;
break;
}
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node) {
err = -ENOMEM
|