// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2011-2014, Intel Corporation.
* Copyright (c) 2017-2021 Christoph Hellwig.
*/
#include <linux/ptrace.h> /* for force_successful_syscall_return */
#include <linux/nvme_ioctl.h>
#include <linux/io_uring.h>
#include "nvme.h"
enum {
NVME_IOCTL_VEC = (1 << 0),
NVME_IOCTL_PARTITION = (1 << 1),
};
static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
unsigned int flags, fmode_t mode)
{
u32 effects;
if (capable(CAP_SYS_ADMIN))
return true;
/*
* Do not allow unprivileged passthrough on partitions, as that allows an
* escape from the containment of the partition.
*/
if (flags & NVME_IOCTL_PARTITION)
return false;
/*
* Do not allow unprivileged processes to send vendor specific or fabrics
* commands as we can't be sure about their effects.
*/
if (c->common.opcode >= nvme_cmd_vendor_start ||
c->common.opcode == nvme_fabrics_command)
return false;
/*
* Do not allow unprivileged passthrough of admin commands except
* for a subset of identify commands that contain information required
* to form proper I/O commands in userspace and do not expose any
* potentially sensitive information.
*/
if (!ns) {
if (c->common.opcode == nvme_admin_identify) {
switch (c->identify.cns) {
case NVME_ID_CNS_NS:
case NVME_ID_CNS_CS_NS:
case NVME_ID_CNS_NS_CS_INDEP:
case NVME_ID_CNS_CS_CTRL:
case NVME_ID_CNS_CTRL:
return true;
}
}
return false;
}
/*
* Check if the controller provides a Commands Supported and Effects log
* and marks this command as supported. If not reject unprivileged
* passthrough.
*/
effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
if (!(effects & NVME_CMD_EFFECTS_CSUPP))
return false;
/*
* Don't allow passthrough for command that have intrusive (or unknown)
* effects.
*/
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
NVME_CMD_EFFECTS_UUID_SEL |
NVME_CMD_EFFECTS_SCOPE_MASK))
return false;
/*
* Only allow I/O commands that transfer data to the controller or that
* change the logical block contents if the file descriptor is open for
* writing.
*/
if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
return mode & FMODE_WRITE;
return true;
}
/*
* Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit
* kernels.
*/
static void __user *nvme_to_user_ptr(uintptr_t ptrval)
{
if (in_compat_syscall())
ptrval = (compat_uptr_t)ptrval;
return (void __user *)ptrval;
}
static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
unsigned len, u32 seed)
{
struct bio_integrity_payload *bip;
int ret = -ENOMEM;
void *buf;
struct bio *bio = req->bio;
buf = kmalloc(len, GFP_KERNEL);
if (!buf)
goto out;
ret = -EFAULT;
if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len))
goto out_free_meta;
bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
if (IS_ERR(bip)) {
ret = PTR_ERR(bip);
goto out_free_meta;
}
bip->bip_iter.bi_size = len;
bip->bip_iter.bi_sector = seed;
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf));
if (ret != len) {
ret = -ENOMEM;
goto out_free_meta;
}
req->cmd_flags |= REQ_INTEGRITY;
return buf;
out_free_meta:
kfree(buf);
out:
return ERR_PTR(ret);
}
static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
void *meta, unsigned len, int ret)
{
if (!ret && req_op(req) == REQ_OP_DRV_IN &&
copy_to_user(ubuf, meta, len))
ret = -EFAULT;
kfree(meta);
return ret;
}
static struct request *nvme_alloc_user_request(struct request_queue *q,
struct nvme_command *cmd, blk_opf_t rq_flags,
blk_mq_req_flags_t blk_flags)
{
struct request *req;
req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
if (IS_ERR(req))
return req;
nvme_init_request(req, cmd);
nvme_req(req)->flags |= NVME_REQ_USERCMD;
return req;
}
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd,
unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
struct bio *bio = NULL;
void *meta = NULL;
int ret;
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
struct iov_iter iter;
/* fixedbufs is only for non-vectored io */
if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC))
return -EINVAL;
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd);
if (ret < 0