diff options
| -rw-r--r-- | Documentation/ioctl/ioctl-number.txt | 1 | ||||
| -rw-r--r-- | Documentation/userspace-api/seccomp_filter.rst | 84 | ||||
| -rw-r--r-- | arch/s390/kernel/compat_wrapper.c | 2 | ||||
| -rw-r--r-- | include/linux/seccomp.h | 9 | ||||
| -rw-r--r-- | include/linux/syscalls.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/seccomp.h | 40 | ||||
| -rw-r--r-- | kernel/seccomp.c | 467 | ||||
| -rw-r--r-- | samples/seccomp/.gitignore | 1 | ||||
| -rw-r--r-- | samples/seccomp/Makefile | 7 | ||||
| -rw-r--r-- | samples/seccomp/user-trap.c | 375 | ||||
| -rw-r--r-- | tools/testing/selftests/seccomp/seccomp_bpf.c | 447 |
11 files changed, 1411 insertions, 24 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index af6f6ba1fe80..c9558146ac58 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -79,6 +79,7 @@ Code Seq#(hex) Include File Comments 0x1b all InfiniBand Subsystem <http://infiniband.sourceforge.net/> 0x20 all drivers/cdrom/cm206.h 0x22 all scsi/sg.h +'!' 00-1F uapi/linux/seccomp.h '#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem '$' 00-0F linux/perf_counter.h, linux/perf_event.h '%' 00-0F include/uapi/linux/stm.h diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 82a468bc7560..b1b846d8a094 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -122,6 +122,11 @@ In precedence order, they are: Results in the lower 16-bits of the return value being passed to userland as the errno without executing the system call. +``SECCOMP_RET_USER_NOTIF``: + Results in a ``struct seccomp_notif`` message sent on the userspace + notification fd, if it is attached, or ``-ENOSYS`` if it is not. See below + on discussion of how to handle user notifications. + ``SECCOMP_RET_TRACE``: When returned, this value will cause the kernel to attempt to notify a ``ptrace()``-based tracer prior to executing the system @@ -183,6 +188,85 @@ The ``samples/seccomp/`` directory contains both an x86-specific example and a more generic example of a higher level macro interface for BPF program generation. +Userspace Notification +====================== + +The ``SECCOMP_RET_USER_NOTIF`` return code lets seccomp filters pass a +particular syscall to userspace to be handled. This may be useful for +applications like container managers, which wish to intercept particular +syscalls (``mount()``, ``finit_module()``, etc.) and change their behavior. + +To acquire a notification FD, use the ``SECCOMP_FILTER_FLAG_NEW_LISTENER`` +argument to the ``seccomp()`` syscall: + +.. code-block:: c + + fd = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_NEW_LISTENER, &prog); + +which (on success) will return a listener fd for the filter, which can then be +passed around via ``SCM_RIGHTS`` or similar. Note that filter fds correspond to +a particular filter, and not a particular task. So if this task then forks, +notifications from both tasks will appear on the same filter fd. Reads and +writes to/from a filter fd are also synchronized, so a filter fd can safely +have many readers. + +The interface for a seccomp notification fd consists of two structures: + +.. code-block:: c + + struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; + }; + + struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; + }; + + struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; + }; + +The ``struct seccomp_notif_sizes`` structure can be used to determine the size +of the various structures used in seccomp notifications. The size of ``struct +seccomp_data`` may change in the future, so code should use: + +.. code-block:: c + + struct seccomp_notif_sizes sizes; + seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes); + +to determine the size of the various structures to allocate. See +samples/seccomp/user-trap.c for an example. + +Users can read via ``ioctl(SECCOMP_IOCTL_NOTIF_RECV)`` (or ``poll()``) on a +seccomp notification fd to receive a ``struct seccomp_notif``, which contains +five members: the input length of the structure, a unique-per-filter ``id``, +the ``pid`` of the task which triggered this request (which may be 0 if the +task is in a pid ns not visible from the listener's pid namespace), a ``flags`` +member which for now only has ``SECCOMP_NOTIF_FLAG_SIGNALED``, representing +whether or not the notification is a result of a non-fatal signal, and the +``data`` passed to seccomp. Userspace can then make a decision based on this +information about what to do, and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a +response, indicating what should be returned to userspace. The ``id`` member of +``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct +seccomp_notif``. + +It is worth noting that ``struct seccomp_data`` contains the values of register +arguments to the syscall, but does not contain pointers to memory. The task's +memory is accessible to suitably privileged traces via ``ptrace()`` or +``/proc/pid/mem``. However, care should be taken to avoid the TOCTOU mentioned +above in this document: all arguments being read from the tracee's memory +should be read into the tracer's memory before any policy decisions are made. +This allows for an atomic decision on syscall arguments. + Sysctls ======= diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 2ce28bf0c5ec..48c4ce668244 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -164,7 +164,7 @@ COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, fla COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); -COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs) +COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, void __user *, uargs) COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags) COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size); diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index e5320f6c8654..84868d37b35d 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -4,9 +4,10 @@ #include <uapi/linux/seccomp.h> -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ - SECCOMP_FILTER_FLAG_LOG | \ - SECCOMP_FILTER_FLAG_SPEC_ALLOW) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG | \ + SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ + SECCOMP_FILTER_FLAG_NEW_LISTENER) #ifdef CONFIG_SECCOMP @@ -43,7 +44,7 @@ extern void secure_computing_strict(int this_syscall); #endif extern long prctl_get_seccomp(void); -extern long prctl_set_seccomp(unsigned long, char __user *); +extern long prctl_set_seccomp(unsigned long, void __user *); static inline int seccomp_mode(struct seccomp *s) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 251979d2e709..257cccba3062 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -898,7 +898,7 @@ asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, unsigned int flags); asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, - const char __user *uargs); + void __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 9efc0e73d50b..90734aa5aa36 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -15,11 +15,13 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 #define SECCOMP_GET_ACTION_AVAIL 2 +#define SECCOMP_GET_NOTIF_SIZES 3 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) -#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) -#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) /* * All BPF programs must return a 32-bit value. @@ -35,6 +37,7 @@ #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ @@ -60,4 +63,35 @@ struct seccomp_data { __u64 args[6]; }; +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) #endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f2ae2324c232..d7f538847b84 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -33,12 +33,74 @@ #endif #ifdef CONFIG_SECCOMP_FILTER +#include <linux/file.h> #include <linux/filter.h> #include <linux/pid.h> #include <linux/ptrace.h> #include <linux/security.h> #include <linux/tracehook.h> #include <linux/uaccess.h> +#include <linux/anon_inodes.h> + +enum notify_state { + SECCOMP_NOTIFY_INIT, + SECCOMP_NOTIFY_SENT, + SECCOMP_NOTIFY_REPLIED, +}; + +struct seccomp_knotif { + /* The struct pid of the task whose filter triggered the notification */ + struct task_struct *task; + + /* The "cookie" for this request; this is unique for this filter. */ + u64 id; + + /* + * The seccomp data. This pointer is valid the entire time this + * notification is active, since it comes from __seccomp_filter which + * eclipses the entire lifecycle here. + */ + const struct seccomp_data *data; + + /* + * Notification states. When SECCOMP_RET_USER_NOTIF is returned, a + * struct seccomp_knotif is created and starts out in INIT. Once the + * handler reads the notification off of an FD, it transitions to SENT. + * If a signal is received the state transitions back to INIT and + * another message is sent. When the userspace handler replies, state + * transitions to REPLIED. + */ + enum notify_state state; + + /* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */ + int error; + long val; + + /* Signals when this has entered SECCOMP_NOTIFY_REPLIED */ + struct completion ready; + + struct list_head list; +}; + +/** + * struct notification - container for seccomp userspace notifications. Since + * most seccomp filters will not have notification listeners attached and this + * structure is fairly large, we store the notification-specific stuff in a + * separate structure. + * + * @request: A semaphore that users of this notification can wait on for + * changes. Actual reads and writes are still controlled with + * filter->notify_lock. + * @next_id: The id of the next request. + * @notifications: A list of struct seccomp_knotif elements. + * @wqh: A wait queue for poll. + */ +struct notification { + struct semaphore request; + u64 next_id; + struct list_head notifications; + wait_queue_head_t wqh; +}; /** * struct seccomp_filter - container for seccomp BPF programs @@ -50,6 +112,8 @@ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged * @prev: points to a previously installed, or inherited, filter * @prog: the BPF program to evaluate + * @notif: the struct that holds all notification related information + * @notify_lock: A lock for all notification-related accesses. * * seccomp_filter objects are organized in a tree linked via the @prev * pointer. For any task, it appears to be a singly-linked list starting @@ -66,6 +130,8 @@ struct seccomp_filter { bool log; struct seccomp_filter *prev; struct bpf_prog *prog; + struct notification *notif; + struct mutex notify_lock; }; /* Limit any path through the tree to 256KB worth of instructions. */ @@ -188,7 +254,6 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) static u32 seccomp_run_filters(const struct seccomp_data *sd, struct seccomp_filter **match) { - struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; /* Make sure cross-thread synced filter points somewhere sane. */ struct seccomp_filter *f = @@ -198,11 +263,6 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, if (WARN_ON(f == NULL)) return SECCOMP_RET_KILL_PROCESS; - if (!sd) { - populate_seccomp_data(&sd_local); - sd = &sd_local; - } - /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). @@ -392,6 +452,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (!sfilter) return ERR_PTR(-ENOMEM); + mutex_init(&sfilter->notify_lock); ret = bpf_prog_create_from_user(&sfilter->prog, fprog, seccomp_check_filter, save_orig); if (ret < 0) { @@ -485,7 +546,6 @@ static long seccomp_attach_filter(unsigned int flags, static void __get_seccomp_filter(struct seccomp_filter *filter) { - /* Reference count is bounded by the number of total processes. */ refcount_inc(&filter->usage); } @@ -556,11 +616,13 @@ static void seccomp_send_sigsys(int syscall, int reason) #define SECCOMP_LOG_TRACE (1 << 4) #define SECCOMP_LOG_LOG (1 << 5) #define SECCOMP_LOG_ALLOW (1 << 6) +#define SECCOMP_LOG_USER_NOTIF (1 << 7) static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | SECCOMP_LOG_KILL_THREAD | SECCOMP_LOG_TRAP | SECCOMP_LOG_ERRNO | + SECCOMP_LOG_USER_NOTIF | SECCOMP_LOG_TRACE | SECCOMP_LOG_LOG; @@ -581,6 +643,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, case SECCOMP_RET_TRACE: log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; break; + case SECCOMP_RET_USER_NOTIF: + log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF; + break; case SECCOMP_RET_LOG: log = seccomp_actions_logged & SECCOMP_LOG_LOG; break; @@ -652,12 +717,75 @@ void secure_computing_strict(int this_syscall) #else #ifdef CONFIG_SECCOMP_FILTER +static u64 seccomp_next_notify_id(struct seccomp_filter *filter) +{ + /* + * Note: overflow is ok here, the id just needs to be unique per + * filter. + */ + lockdep_assert_held(&filter->notify_lock); + return filter->notif->next_id++; +} + +static void seccomp_do_user_notification(int this_syscall, + struct seccomp_filter *match, + const struct seccomp_data *sd) +{ + int err; + long ret = 0; + struct seccomp_knotif n = {}; + + mutex_lock(&match->notify_lock); + err = -ENOSYS; + if (!match->notif) + goto out; + + n.task = current; + n.state = SECCOMP_NOTIFY_INIT; + n.data = sd; + n.id = seccomp_next_notify_id(match); + init_completion(&n.ready); + list_add(&n.list, &match->notif->notifications); + + up(&match->notif->request); + wake_up_poll(&match->notif->wqh, EPOLLIN | EPOLLRDNORM); + mutex_unlock(&match->notify_lock); + + /* + * This is where we wait for a reply from userspace. + */ + err = wait_for_completion_interruptible(&n.ready); + mutex_lock(&match->notify_lock); + if (err == 0) { + ret = n.val; + err = n.error; + } + + /* + * Note that it's possible the listener died in between the time when + * we were notified of a respons (or a signal) and when we were able to + * re-acquire the lock, so only delete from the list if the + * notification actually exists. + * + * Also note that this test is only valid because there's no way to + * *reattach* to a notifier right now. If one is added, we'll need to + * keep track of the notif itself and make sure they match here. + */ + if (match->notif) + list_del(&n.list); +out: + mutex_unlock(&match->notify_lock); + syscall_set_return_value(current, task_pt_regs(current), + err, ret); +} + static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, const bool recheck_after_trace) { u32 filter_ret, action; struct seccomp_filter *match = NULL; int data; + struct seccomp_data sd_local; /* * Make sure that any changes to mode from another thread have @@ -665,6 +793,11 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, */ rmb(); + if (!sd) { + populate_seccomp_data(&sd_local); + sd = &sd_local; + } + filter_ret = seccomp_run_filters(sd, &match); data = filter_ret & SECCOMP_RET_DATA; action = filter_ret & SECCOMP_RET_ACTION_FULL; @@ -728,6 +861,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; + case SECCOMP_RET_USER_NOTIF: + seccomp_do_user_notification(this_syscall, match, sd); + goto skip; + case SECCOMP_RET_LOG: seccomp_log(this_syscall, 0, action, true); return 0; @@ -834,6 +971,262 @@ out: } #ifdef CONFIG_SECCOMP_FILTER +static int seccomp_notify_release(struct inode *inode, struct file *file) +{ + struct seccomp_filter *filter = file->private_data; + struct seccomp_knotif *knotif; + + mutex_lock(&filter->notify_lock); + + /* + * If this file is being closed because e.g. the task who owned it + * died, let's wake everyone up who was waiting on us. + */ + list_for_each_entry(knotif, &filter->notif->notifications, list) { + if (knotif->state == SECCOMP_NOTIFY_REPLIED) + continue; + + knotif->state = SECCOMP_NOTIFY_REPLIED; + knotif->error = -ENOSYS; + knotif->val = 0; + + complete(&knotif->ready); + } + + kfree(filter->notif); + filter->notif = NULL; + mutex_unlock(&filter->notify_lock); + __put_seccomp_filter(filter); + return 0; +} + +static long seccomp_notify_recv(struct seccomp_filter *filter, + void __user *buf) +{ + struct seccomp_knotif *knotif = NULL, *cur; + struct seccomp_notif unotif; + ssize_t ret; + + memset(&unotif, 0, sizeof(unotif)); + + ret = down_interruptible(&filter->notif->request); + if (ret < 0) + return ret; + + mutex_lock(&filter->notify_lock); + list_for_each_entry(cur, &filter->notif->notifications, list) { + if (cur->state == SECCOMP_NOTIFY_INIT) { + knotif = cur; + break; + } + } + + /* + * If we didn't find a notification, it could be that the task was + * interrupted by a fatal signal between the time we were woken and + * when we were able to acquire the rw lock. + */ + if (!knotif) { + ret = -ENOENT; + goto out; + } + + unotif.id = knotif->id; + unotif.pid = task_pid_vnr(knotif->task); + unotif.data = *(knotif->data); + + knotif->state = SECCOMP_NOTIFY_SENT; + wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM); + ret = 0; +out: + mutex_unlock(&filter->notify_lock); + + if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) { + ret = -EFAULT; + + /* + * Userspace screwed up. To make sure that we keep this + * notification alive, let's reset it back to INIT. It + * may have died when we released the lock, so we need to make + * sure it's still around. + */ + knotif = NULL; + mutex_lock(&filter->notify_lock); + list_for_each_entry(cur, &filter->notif->notifications, list) { + if (cur->id == unotif.id) { + knotif = cur; + break; + } + } + + if (knotif) { + knotif->state = SECCOMP_NOTIFY_INIT; + up(&filter->notif->request); + } + mutex_unlock(&filter->notify_lock); + } + + return ret; +} + +static long seccomp_notify_send(struct seccomp_filter *filter, + void __user *buf) +{ + struct seccomp_notif_resp resp = {}; + struct seccomp_knotif *knotif = NULL, *cur; + long ret; + + if (copy_from_user(&resp, buf, sizeof(resp))) + return -EFAULT; + + if (resp.flags) + return -EINVAL; + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + + list_for_each_entry(cur, &filter->notif->notifications, list) { + if (cur->id == resp.id) { + knotif = cur; + break; + } + } + + if (!knotif) { + ret = -ENOENT; + goto out; + } + + /* Allow exactly one reply. */ + if (knotif->state != SECCOMP_NOTIFY_SENT) { + ret = -EINPROGRESS; + goto out; + } + + ret = 0; + knotif->state = SECCOMP_NOTIFY_REPLIED; + knotif->error = resp.error; + knotif->val = resp.val; + complete(&knotif->ready); +out: + mutex_unlock(&filter->notify_lock); + return ret; +} + +static long seccomp_notify_id_valid(struct seccomp_filter *filter, + void __user *buf) +{ + struct seccomp_knotif *knotif = NULL; + u64 id; + long ret; + + if (copy_from_user(&id, buf, sizeof(id))) + return -EFAULT; + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + + ret = -ENOENT; + list_for_each_entry(knotif, &filter->notif->notifications, list) { + if (knotif->id == id) { + if (knotif->state == SECCOMP_NOTIFY_SENT) + ret = 0; + goto out; + } + } + +out: + mutex_unlock(&filter->notify_lock); + return ret; +} + +static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct seccomp_filter *filter = file->private_data; + void __user *buf = (void __user *)arg; + + switch (cmd) { + case SECCOMP_IOCTL_NOTIF_RECV: + return seccomp_notify_recv(filter, buf); + case SECCOMP_IOCTL_NOTIF_SEND: + return seccomp_notify_send(filter, buf); + case SECCOMP_IOCTL_NOTIF_ID_VALID: + return seccomp_notify_id_valid(filter, buf); + default: + return -EINVAL; + } +} + +static __poll_t seccomp_notify_poll(struct file *file, + struct poll_table_struct *poll_tab) +{ + struct seccomp_filter *filter = file->private_data; + __poll_t ret = 0; + struct seccomp_knotif *cur; + + poll_wait(file, &filter->notif->wqh, poll_tab); + + if (mutex_lock_interruptible(&filter->notify_lock) < 0) + return EPOLLERR; + + list_for_each_entry(cur, &filter->notif->notifications, list) { + if (cur->state == SECCOMP_NOTIFY_INIT) + ret |= EPOLLIN | EPOLLRDNORM; + if (cur->state == SECCOMP_NOTIFY_SENT) + ret |= EPOLLOUT | EPOLLWRNORM; + if ((ret & EPOLLIN) && (ret & EPOLLOUT)) + break; + } + + mutex_unlock(&filter->notify_lock); + + return ret; +} + +static const struct file_operations seccomp_notify_ops = { + .poll = seccomp_notify_poll, + .release = seccomp_notify_release, + .unlocked_ioctl = seccomp_notify_ioctl, +}; + +static struct file *init_listener(struct seccomp_filter *filter) +{ + struct file *ret = ERR_PTR(-EBUSY); + struct seccomp_filter *cur; + + for (cur = current->seccomp.filter; cur; cur = cur->prev) { + if (cur->notif) + goto out; + } + + ret = ERR_PTR(-ENOMEM); + filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); + if (!filter->notif) + goto out; + + sema_init(&filter->notif->request, 0); + filter->notif->next_id = get_random_u64(); + INIT_LIST_HEAD(&filter->notif->notifications); + init_waitqueue_head(&filter->notif->wqh); + + ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops, + filter, O_RDWR); + if (IS_ERR(ret)) + goto out_notif; + + /* The file has a reference to it now */ + __get_seccomp_filter(filter); + +out_notif: + if (IS_ERR(ret)) + kfree(filter->notif); +out: + return ret; +} + /** * seccomp_set_mode_filter: internal function for setting seccomp filter * @flags: flags to change filter behavior @@ -853,6 +1246,8 @@ static long seccomp_set_mode_filter(unsigned int flags, const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; struct seccomp_filter *prepared = NULL; long ret = -EINVAL; + int listener = -1; + struct file *listener_f = NULL; /* Validate flags. */ if (flags & ~SECCOMP_FILTER_FLAG_MASK) @@ -863,13 +1258,28 @@ static long seccomp_set_mode_filter(unsigned int flags, if (IS_ERR(prepared)) return PTR_ERR(prepared); + if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { + listener = get_unused_fd_flags(O_CLOEXEC); + if (listener < 0) { + ret = listener; + goto out_free; + } + + listener_f = init_listener(prepared); + if (IS_ERR(listener_f)) { + put_unused_fd(listener); + ret = PTR_ERR(listener_f); + goto out_free; + } + } + /* * Make sure we cannot change seccomp or nnp state via TSYNC * while another thread is in the middle of calling exec. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC && mutex_lock_killable(¤t->signal->cred_guard_mutex)) - goto out_free; + goto out_put_fd; spin_lock_irq(¤t->sighand->siglock); @@ -887,6 +1297,16 @@ out: spin_unlock_irq(¤t->sighand->siglock); if (flags & SECCOMP_FILTER_FLAG_TSYNC) mutex_unlock(¤t->signal->cred_guard_mutex); +out_put_fd: + if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { + if (ret < 0) { + fput(listener_f); + put_unused_fd(listener); + } else { + fd_install(listener, listener_f); + ret = listener; + } + } out_free: seccomp_filter_free(prepared); return ret; @@ -911,6 +1331,7 @@ static long seccomp_get_action_avail(const char __user *uaction) case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: + case SECCOMP_RET_USER_NOTIF: case SECCOMP_RET_TRACE: case SECCOMP_RET_LOG: case SECCOMP_RET_ALLOW: @@ -922,9 +1343,23 @@ static long seccomp_get_action_avail(const char __user *uaction) return 0; } +static long seccomp_get_notif_sizes(void __user *usizes) +{ + struct seccomp_notif_sizes sizes = { + .seccomp_notif = sizeof(struct seccomp_notif), + .seccomp_notif_resp = sizeof(struct seccomp_notif_resp), + .seccomp_data = sizeof(struct seccomp_data), + }; + + if (copy_to_user(usizes, &sizes, sizeof(sizes))) + return -EFAULT; + + return 0; +} + /* Common entry point for both prctl and syscall. */ static long do_seccomp(unsigned int op, unsigned int flags, - const char __user *uargs) + void __user *uargs) { switch (op) { case SECCOMP_SET_MODE_STRICT: @@ -938,13 +1373,18 @@ static long do_seccomp(unsigned int op, unsigned int flags, return -EINVAL; return seccomp_get_action_avail(uargs); + case SECCOMP_GET_NOTIF_SIZES: + if (flags != 0) + return -EINVAL; + + return seccomp_get_notif_sizes(uargs); default: return -EINVAL; } } SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, - const char __user *, uargs) + void __user *, uargs) { return do_seccomp(op, flags, uargs); } @@ -956,10 +1396,10 @@ SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, * * Returns 0 on success or -EINVAL on failure. */ -long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter) { unsigned int op; - char __user *uargs; + void __user *uargs; switch (seccomp_mode) { case SECCOMP_MODE_STRICT: @@ -1111,6 +1551,7 @@ long seccomp_get_metadata(struct task_struct *task, #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" +#define SECCOMP_RET_USER_NOTIF_NAME "user_notif" #define SECCOMP_RET_TRACE_NAME "trace" #define SECCOMP_RET_LOG_NAME "log" #define SECCOMP_RET_ALLOW_NAME "allow" @@ -1120,6 +1561,7 @@ static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_THREAD_NAME " " SECCOMP_RET_TRAP_NAME " " SECCOMP_RET_ERRNO_NAME " " + SECCOMP_RET_USER_NOTIF_NAME " " SECCOMP_RET_TRACE_NAME " " SECCOMP_RET_LOG_NAME " " SECCOMP_RET_ALLOW_NAME; @@ -1134,6 +1576,7 @@ static const struct seccomp_log_name seccomp_log_names[] = { { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, + { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME }, { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore index 78fb78184291..d1e2e817d556 100644 --- a/samples/seccomp/.gitignore +++ b/samples/seccomp/.gitignore @@ -1,3 +1,4 @@ bpf-direct bpf-fancy dropper +user-trap diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile index cf34ff6b4065..4920903c8009 100644 --- a/samples/seccomp/Makefile +++ b/samples/seccomp/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE -hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct +hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct user-trap HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include @@ -16,6 +16,10 @@ HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include bpf-direct-objs := bpf-direct.o +HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include +HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include +user-trap-objs := user-trap.o + # Try to match the kernel target. ifndef CONFIG_64BIT @@ -33,6 +37,7 @@ HOSTCFLAGS_bpf-fancy.o += $(MFLAG) HOSTLDLIBS_bpf-direct += $(MFLAG) HOSTLDLIBS_bpf-fancy += $(MFLAG) HOSTLDLIBS_dropper += $(MFLAG) +HOSTLDLIBS_user-trap += $(MFLAG) endif always := $(hostprogs-m) endif diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c new file mode 100644 index 000000000000..6d0125ca8af7 --- /dev/null +++ b/samples/seccomp/user-trap.c @@ -0,0 +1,375 @@ +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stddef.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/user.h> +#include <sys/ioctl.h> +#include <sys/ptrace.h> +#include <sys/mount.h> +#include <linux/limits.h> +#include <linux/filter.h> +#include <linux/seccomp.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +static int seccomp(unsigned int op, unsigned int flags, void *args) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, args); +} + +static int send_fd(int sock, int fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c'; + struct iovec io = { + .iov_base = &c, + .iov_len = 1, + }; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + *((int *)CMSG_DATA(cmsg)) = fd; |
