diff options
Diffstat (limited to 'kernel')
95 files changed, 10762 insertions, 4012 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index cb41b9547c9f..c0cc67ad764c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,12 +5,12 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o workqueue.o pid.o \ + signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files @@ -25,6 +25,9 @@ endif obj-y += sched/ obj-y += power/ +ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) +obj-$(CONFIG_X86) += kcmp.o +endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -43,6 +46,7 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smpboot.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif diff --git a/kernel/auditsc.c b/kernel/auditsc.c index af1de0f34eae..4b96415527b8 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -67,6 +67,7 @@ #include <linux/syscalls.h> #include <linux/capability.h> #include <linux/fs_struct.h> +#include <linux/compat.h> #include "audit.h" @@ -2710,13 +2711,16 @@ void audit_core_dumps(long signr) audit_log_end(ab); } -void __audit_seccomp(unsigned long syscall) +void __audit_seccomp(unsigned long syscall, long signr, int code) { struct audit_buffer *ab; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); - audit_log_abend(ab, "seccomp", SIGKILL); + audit_log_abend(ab, "seccomp", signr); audit_log_format(ab, " syscall=%ld", syscall); + audit_log_format(ab, " compat=%d", is_compat_task()); + audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); + audit_log_format(ab, " code=0x%x", code); audit_log_end(ab); } diff --git a/kernel/capability.c b/kernel/capability.c index 3f1adb6c6470..493d97259484 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -419,3 +419,24 @@ bool nsown_capable(int cap) { return ns_capable(current_user_ns(), cap); } + +/** + * inode_capable - Check superior capability over inode + * @inode: The inode in question + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at it's own user namespace and that the given inode is owned + * by the current user namespace or a child namespace. + * + * Currently we check to see if an inode is owned by the current + * user namespace by seeing if the inode's owner maps into the + * current user namespace. + * + */ +bool inode_capable(const struct inode *inode, int cap) +{ + struct user_namespace *ns = current_user_ns(); + + return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid); +} diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ed64ccac67c9..72fcd3069a90 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -60,9 +60,13 @@ #include <linux/eventfd.h> #include <linux/poll.h> #include <linux/flex_array.h> /* used in cgroup_attach_proc */ +#include <linux/kthread.h> #include <linux/atomic.h> +/* css deactivation bias, makes css->refcnt negative to deny new trygets */ +#define CSS_DEACT_BIAS INT_MIN + /* * cgroup_mutex is the master lock. Any modification to cgroup or its * hierarchy must be performed while holding it. @@ -127,6 +131,9 @@ struct cgroupfs_root { /* A list running through the active hierarchies */ struct list_head root_list; + /* All cgroups on this root, cgroup_mutex protected */ + struct list_head allcg_list; + /* Hierarchy-specific flags */ unsigned long flags; @@ -145,6 +152,15 @@ struct cgroupfs_root { static struct cgroupfs_root rootnode; /* + * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. + */ +struct cfent { + struct list_head node; + struct dentry *dentry; + struct cftype *type; +}; + +/* * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when * cgroup_subsys->use_id != 0. */ @@ -239,6 +255,14 @@ int cgroup_lock_is_held(void) EXPORT_SYMBOL_GPL(cgroup_lock_is_held); +/* the current nr of refs, always >= 0 whether @css is deactivated or not */ +static int css_refcnt(struct cgroup_subsys_state *css) +{ + int v = atomic_read(&css->refcnt); + + return v >= 0 ? v : v - CSS_DEACT_BIAS; +} + /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) { @@ -279,6 +303,21 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling) #define for_each_active_root(_root) \ list_for_each_entry(_root, &roots, root_list) +static inline struct cgroup *__d_cgrp(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline struct cfent *__d_cfe(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline struct cftype *__d_cft(struct dentry *dentry) +{ + return __d_cfe(dentry)->type; +} + /* the list of cgroups eligible for automatic release. Protected by * release_list_lock */ static LIST_HEAD(release_list); @@ -816,12 +855,17 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) struct cgroup_subsys *ss; int ret = 0; - for_each_subsys(cgrp->root, ss) - if (ss->pre_destroy) { - ret = ss->pre_destroy(cgrp); - if (ret) - break; + for_each_subsys(cgrp->root, ss) { + if (!ss->pre_destroy) + continue; + + ret = ss->pre_destroy(cgrp); + if (ret) { + /* ->pre_destroy() failure is being deprecated */ + WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs); + break; } + } return ret; } @@ -852,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) mutex_unlock(&cgroup_mutex); /* - * Drop the active superblock reference that we took when we - * created the cgroup + * We want to drop the active superblock reference from the + * cgroup creation after all the dentry refs are gone - + * kill_sb gets mighty unhappy otherwise. Mark + * dentry->d_fsdata with cgroup_diput() to tell + * cgroup_d_release() to call deactivate_super(). */ - deactivate_super(cgrp->root->sb); + dentry->d_fsdata = cgroup_diput; /* * if we're getting rid of the cgroup, refcount should ensure @@ -864,6 +911,14 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) BUG_ON(!list_empty(&cgrp->pidlists)); kfree_rcu(cgrp, rcu_head); + } else { + struct cfent *cfe = __d_cfe(dentry); + struct cgroup *cgrp = dentry->d_parent->d_fsdata; + + WARN_ONCE(!list_empty(&cfe->node) && + cgrp != &cgrp->root->top_cgroup, + "cfe still linked for %s\n", cfe->type->name); + kfree(cfe); } iput(inode); } @@ -873,6 +928,13 @@ static int cgroup_delete(const struct dentry *d) return 1; } +static void cgroup_d_release(struct dentry *dentry) +{ + /* did cgroup_diput() tell me to deactivate super? */ + if (dentry->d_fsdata == cgroup_diput) + deactivate_super(dentry->d_sb); +} + static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -882,34 +944,36 @@ static void remove_dir(struct dentry *d) dput(parent); } -static void cgroup_clear_directory(struct dentry *dentry) -{ - struct list_head *node; - - BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); - spin_lock(&dentry->d_lock); - node = dentry->d_subdirs.next; - while (node != &dentry->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); - - spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); - list_del_init(node); - if (d->d_inode) { - /* This should never be called on a cgroup - * directory with child cgroups */ - BUG_ON(d->d_inode->i_mode & S_IFDIR); - dget_dlock(d); - spin_unlock(&d->d_lock); - spin_unlock(&dentry->d_lock); - d_delete(d); - simple_unlink(dentry->d_inode, d); - dput(d); - spin_lock(&dentry->d_lock); - } else - spin_unlock(&d->d_lock); - node = dentry->d_subdirs.next; +static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) +{ + struct cfent *cfe; + + lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); + lockdep_assert_held(&cgroup_mutex); + + list_for_each_entry(cfe, &cgrp->files, node) { + struct dentry *d = cfe->dentry; + + if (cft && cfe->type != cft) + continue; + + dget(d); + d_delete(d); + simple_unlink(d->d_inode, d); + list_del_init(&cfe->node); + dput(d); + + return 0; } - spin_unlock(&dentry->d_lock); + return -ENOENT; +} + +static void cgroup_clear_directory(struct dentry *dir) +{ + struct cgroup *cgrp = __d_cgrp(dir); + + while (!list_empty(&cgrp->files)) + cgroup_rm_file(cgrp, NULL); } /* @@ -1294,6 +1358,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) if (ret) goto out_unlock; + /* See feature-removal-schedule.txt */ + if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) + pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", + task_tgid_nr(current), current->comm); + /* Don't allow flags or name to change at remount */ if (opts.flags != root->flags || (opts.name && strcmp(opts.name, root->name))) { @@ -1308,7 +1377,8 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) goto out_unlock; } - /* (re)populate subsystem files */ + /* clear out any existing files and repopulate subsystem files */ + cgroup_clear_directory(cgrp->dentry); cgroup_populate_dir(cgrp); if (opts.release_agent) @@ -1333,6 +1403,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) { INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); + INIT_LIST_HEAD(&cgrp->files); INIT_LIST_HEAD(&cgrp->css_sets); INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); @@ -1344,11 +1415,14 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) static void init_cgroup_root(struct cgroupfs_root *root) { struct cgroup *cgrp = &root->top_cgroup; + INIT_LIST_HEAD(&root->subsys_list); INIT_LIST_HEAD(&root->root_list); + INIT_LIST_HEAD(&root->allcg_list); root->number_of_cgroups = 1; cgrp->root = root; cgrp->top_cgroup = cgrp; + list_add_tail(&cgrp->allcg_node, &root->allcg_list); init_cgroup_housekeeping(cgrp); } @@ -1468,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb) static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, .d_delete = cgroup_delete, + .d_release = cgroup_d_release, }; struct inode *inode = @@ -1692,16 +1767,6 @@ static struct file_system_type cgroup_fs_type = { static struct kobject *cgroup_kobj; -static inline struct cgroup *__d_cgrp(struct dentry *dentry) -{ - return dentry->d_fsdata; -} - -static inline struct cftype *__d_cft(struct dentry *dentry) -{ - return dentry->d_fsdata; -} - /** * cgroup_path - generate the path of a cgroup * @cgrp: the cgroup in question @@ -2160,9 +2225,9 @@ retry_find_task: * only need to check permissions on one of them. */ tcred = __task_cred(tsk); - if (cred->euid && - cred->euid != tcred->uid && - cred->euid != tcred->suid) { + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) { rcu_read_unlock(); ret = -EACCES; goto out_unlock_cgroup; @@ -2172,6 +2237,18 @@ retry_find_task: if (threadgroup) tsk = tsk->group_leader; + + /* + * Workqueue threads may acquire PF_THREAD_ |
