diff options
Diffstat (limited to 'kernel')
71 files changed, 4989 insertions, 867 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f0c40bf49d9f..e2ec54e2b952 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,9 +91,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o -obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o -obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o +obj-$(CONFIG_ELFCORE) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_TRACE_CLOCK) += trace/ diff --git a/kernel/audit.c b/kernel/audit.c index 678c3f000191..22bb4f24f071 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -64,7 +64,6 @@ #include <linux/security.h> #endif #include <linux/freezer.h> -#include <linux/tty.h> #include <linux/pid_namespace.h> #include <net/netns/generic.h> @@ -430,7 +429,6 @@ restart: attempts, audit_pid); set_current_state(TASK_INTERRUPTIBLE); schedule(); - __set_current_state(TASK_RUNNING); goto restart; } } @@ -1341,15 +1339,14 @@ static inline void audit_get_stamp(struct audit_context *ctx, static long wait_for_auditd(long sleep_time) { DECLARE_WAITQUEUE(wait, current); - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue_exclusive(&audit_backlog_wait, &wait); if (audit_backlog_limit && - skb_queue_len(&audit_skb_queue) > audit_backlog_limit) + skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { + add_wait_queue_exclusive(&audit_backlog_wait, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); sleep_time = schedule_timeout(sleep_time); - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&audit_backlog_wait, &wait); + remove_wait_queue(&audit_backlog_wait, &wait); + } return sleep_time; } @@ -1890,21 +1887,14 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; char comm[sizeof(tsk->comm)]; - char *tty; + struct tty_struct *tty; if (!ab) return; /* tsk == current */ cred = current_cred(); - - spin_lock_irq(&tsk->sighand->siglock); - if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) - tty = tsk->signal->tty->name; - else - tty = "(none)"; - spin_unlock_irq(&tsk->sighand->siglock); - + tty = audit_get_tty(tsk); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" @@ -1920,11 +1910,11 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), - tty, audit_get_sessionid(tsk)); - + tty ? tty_name(tty) : "(none)", + audit_get_sessionid(tsk)); + audit_put_tty(tty); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - audit_log_d_path_exe(ab, tsk->mm); audit_log_task_context(ab); } diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5efe9b299a12..25772476fa4a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -661,10 +661,10 @@ static int tag_mount(struct vfsmount *mnt, void *arg) static int prune_tree_thread(void *unused) { for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&prune_list)) + if (list_empty(&prune_list)) { + set_current_state(TASK_INTERRUPTIBLE); schedule(); - __set_current_state(TASK_RUNNING); + } mutex_lock(&audit_cmd_mutex); mutex_lock(&audit_filter_mutex); @@ -693,16 +693,14 @@ static int audit_launch_prune(void) { if (prune_thread) return 0; - prune_thread = kthread_create(prune_tree_thread, NULL, + prune_thread = kthread_run(prune_tree_thread, NULL, "audit_prune_tree"); if (IS_ERR(prune_thread)) { pr_err("cannot start thread audit_prune_tree"); prune_thread = NULL; return -ENOMEM; - } else { - wake_up_process(prune_thread); - return 0; } + return 0; } /* called with audit_filter_mutex */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7d0e3cf8abe1..62ab53d7619c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1980,6 +1980,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, { struct audit_buffer *ab; uid_t uid, oldloginuid, loginuid; + struct tty_struct *tty; if (!audit_enabled) return; @@ -1987,14 +1988,17 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid), + tty = audit_get_tty(current); ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); audit_log_task_context(ab); - audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d", - oldloginuid, loginuid, oldsessionid, sessionid, !rc); + audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", + oldsessionid, sessionid, !rc); + audit_put_tty(tty); audit_log_end(ab); } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f1e8a0def99b..b94a36550591 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -231,7 +231,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, hdr->pages = size / PAGE_SIZE; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); - start = (prandom_u32() % hole) & ~(alignment - 1); + start = (get_random_int() % hole) & ~(alignment - 1); /* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; @@ -251,7 +251,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, struct bpf_insn *to_buff) { struct bpf_insn *to = to_buff; - u32 imm_rnd = prandom_u32(); + u32 imm_rnd = get_random_int(); s16 off; BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 8f94ca1860cf..318858edb1cd 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -119,18 +119,10 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) return 0; } -static bool bpf_dname_reserved(const struct dentry *dentry) -{ - return strchr(dentry->d_name.name, '.'); -} - static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; - if (bpf_dname_reserved(dentry)) - return -EPERM; - inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -152,9 +144,6 @@ static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, { struct inode *inode; - if (bpf_dname_reserved(dentry)) - return -EPERM; - inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -187,31 +176,21 @@ static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, } } -static int bpf_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry) +static struct dentry * +bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { - if (bpf_dname_reserved(new_dentry)) - return -EPERM; - - return simple_link(old_dentry, dir, new_dentry); -} - -static int bpf_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - if (bpf_dname_reserved(new_dentry)) - return -EPERM; - - return simple_rename(old_dir, old_dentry, new_dir, new_dentry); + if (strchr(dentry->d_name.name, '.')) + return ERR_PTR(-EPERM); + return simple_lookup(dir, dentry, flags); } static const struct inode_operations bpf_dir_iops = { - .lookup = simple_lookup, + .lookup = bpf_lookup, .mknod = bpf_mkobj, .mkdir = bpf_mkdir, .rmdir = simple_rmdir, - .rename = bpf_rename, - .link = bpf_link, + .rename = simple_rename, + .link = simple_link, .unlink = simple_unlink, }; @@ -378,7 +357,7 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *bpf_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { - return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); + return mount_nodev(type, flags, data, bpf_fill_super); } static struct file_system_type bpf_fs_type = { @@ -386,7 +365,6 @@ static struct file_system_type bpf_fs_type = { .name = "bpf", .mount = bpf_mount, .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT, }; MODULE_ALIAS_FS("bpf"); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index c8ee35287bfe..080a2dfb5800 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -136,7 +136,8 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; - trace = get_perf_callchain(regs, init_nr, kernel, user, false, false); + trace = get_perf_callchain(regs, init_nr, kernel, user, + sysctl_perf_event_max_stack, false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a08d66215245..668e07903c8f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -683,15 +683,11 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, { struct reg_state *regs = env->cur_state.regs; struct reg_state *reg = ®s[regno]; - int linear_size = (int) reg->range - (int) reg->off; - if (linear_size < 0 || linear_size >= MAX_PACKET_OFF) { - verbose("verifier bug\n"); - return -EFAULT; - } - if (off < 0 || off + size > linear_size) { - verbose("invalid access to packet, off=%d size=%d, allowed=%d\n", - off, size, linear_size); + off += reg->off; + if (off < 0 || off + size > reg->range) { + verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", + off, size, regno, reg->id, reg->off, reg->range); return -EACCES; } return 0; @@ -1249,6 +1245,7 @@ static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) struct reg_state *regs = env->cur_state.regs; struct reg_state *dst_reg = ®s[insn->dst_reg]; struct reg_state *src_reg = ®s[insn->src_reg]; + struct reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1271,6 +1268,19 @@ add_imm: */ dst_reg->off += imm; } else { + if (src_reg->type == PTR_TO_PACKET) { + /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ + tmp_reg = *dst_reg; /* save r7 state */ + *dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */ + src_reg = &tmp_reg; /* pretend it's src_reg state */ + /* if the checks below reject it, the copy won't matter, + * since we're rejecting the whole program. If all ok, + * then imm22 state will be added to r7 + * and r7 will be pkt(id=0,off=22,r=62) while + * r6 will stay as pkt(id=0,off=0,r=62) + */ + } + if (src_reg->type == CONST_IMM) { /* pkt_ptr += reg where reg is known constant */ imm = src_reg->imm; @@ -1569,7 +1579,9 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && - dst_reg->type == PTR_TO_PACKET) { + (dst_reg->type == PTR_TO_PACKET || + (BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == PTR_TO_PACKET))) { /* ptr_to_packet += K|X */ return check_packet_ptr_add(env, insn); } else if (BPF_CLASS(insn->code) == BPF_ALU64 && diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1902956baba1..73e93e53884d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,7 +61,7 @@ #include <linux/cgroup.h> #include <linux/wait.h> -struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; +DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); /* See "Frequency meter" comments, below. */ @@ -2528,27 +2528,27 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. */ -int __cpuset_node_allowed(int node, gfp_t gfp_mask) +bool __cpuset_node_allowed(int node, gfp_t gfp_mask) { struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ unsigned long flags; if (in_interrupt()) - return 1; + return true; if (node_isset(node, current->mems_allowed)) - return 1; + return true; /* * Allow tasks that have access to memory reserves because they have * been OOM killed to get memory anywhere. */ if (unlikely(test_thread_flag(TIF_MEMDIE))) - return 1; + return true; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ - return 0; + return false; if (current->flags & PF_EXITING) /* Let dying task have memory */ - return 1; + return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ spin_lock_irqsave(&callback_lock, flags); @@ -2591,13 +2591,7 @@ int __cpuset_node_allowed(int node, gfp_t gfp_mask) static int cpuset_spread_node(int *rotor) { - int node; - - node = next_node(*rotor, current->mems_allowed); - if (node == MAX_NUMNODES) - node = first_node(current->mems_allowed); - *rotor = node; - return node; + return *rotor = next_node_in(*rotor, current->mems_allowed); } int cpuset_mem_spread_node(void) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index b9325e7dcba1..179ef4640964 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -19,11 +19,13 @@ struct callchain_cpus_entries { }; int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK; static inline size_t perf_callchain_entry__sizeof(void) { return (sizeof(struct perf_callchain_entry) + - sizeof(__u64) * sysctl_perf_event_max_stack); + sizeof(__u64) * (sysctl_perf_event_max_stack + + sysctl_perf_event_max_contexts_per_stack)); } static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); @@ -32,12 +34,12 @@ static DEFINE_MUTEX(callchain_mutex); static struct callchain_cpus_entries *callchain_cpus_entries; -__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, +__weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { } -__weak void perf_callchain_user(struct perf_callchain_entry *entry, +__weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { } @@ -176,14 +178,15 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) if (!kernel && !user) return NULL; - return get_perf_callchain(regs, 0, kernel, user, crosstask, true); + return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true); } struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, - bool crosstask, bool add_mark) + u32 max_stack, bool crosstask, bool add_mark) { struct perf_callchain_entry *entry; + struct perf_callchain_entry_ctx ctx; int rctx; entry = get_callchain_entry(&rctx); @@ -193,12 +196,16 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, if (!entry) goto exit_put; - entry->nr = init_nr; + ctx.entry = entry; + ctx.max_stack = max_stack; + ctx.nr = entry->nr = init_nr; + ctx.contexts = 0; + ctx.contexts_maxed = false; if (kernel && !user_mode(regs)) { if (add_mark) - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); - perf_callchain_kernel(entry, regs); + perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL); + perf_callchain_kernel(&ctx, regs); } if (user) { @@ -214,8 +221,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, goto exit_put; if (add_mark) - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); + perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); + perf_callchain_user(&ctx, regs); } } @@ -225,10 +232,15 @@ exit_put: return entry; } +/* + * Used for sysctl_perf_event_max_stack and + * sysctl_perf_event_max_contexts_per_stack. + */ int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int new_value = sysctl_perf_event_max_stack, ret; + int *value = table->data; + int new_value = *value, ret; struct ctl_table new_table = *table; new_table.data = &new_value; @@ -240,7 +252,7 @@ int perf_event_max_stack_handler(struct ctl_table *table, int write, if (atomic_read(&nr_callchain_events)) ret = -EBUSY; else - sysctl_perf_event_max_stack = new_value; + *value = new_value; mutex_unlock(&callchain_mutex); diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..9c51ec3f0f44 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); - if (event->pmu) { - exclusive_event_destroy(even |
