diff options
| author | Ingo Molnar <mingo@kernel.org> | 2016-04-13 08:57:03 +0200 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2016-04-13 08:57:03 +0200 |
| commit | 889fac6d67d46a5e781c08fb26fec9016db1c307 (patch) | |
| tree | 7e01d04928f7ce343afff5b77ba13c2f3d083326 /kernel | |
| parent | dad38ca64a252144b4ccdfe9730a3fe2b7c61957 (diff) | |
| parent | bf16200689118d19de1b8d2a3c314fc21f5dc7bb (diff) | |
| download | linux-889fac6d67d46a5e781c08fb26fec9016db1c307.tar.gz linux-889fac6d67d46a5e781c08fb26fec9016db1c307.tar.bz2 linux-889fac6d67d46a5e781c08fb26fec9016db1c307.zip | |
Merge tag 'v4.6-rc3' into perf/core, to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
89 files changed, 3485 insertions, 1299 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 53abf008ecb3..f0c40bf49d9f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -14,11 +14,21 @@ obj-y = fork.o exec_domain.o panic.o \ obj-$(CONFIG_MULTIUSER) += groups.o ifdef CONFIG_FUNCTION_TRACER -# Do not trace debug files and internal ftrace files -CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE) +# Do not trace internal ftrace files CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE) endif +# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip() +# in coverage traces. +KCOV_INSTRUMENT_softirq.o := n +# These are called from save_stack_trace() on slub debug path, +# and produce insane amounts of uninteresting coverage. +KCOV_INSTRUMENT_module.o := n +KCOV_INSTRUMENT_extable.o := n +# Don't self-instrument. +KCOV_INSTRUMENT_kcov.o := n +KASAN_SANITIZE_kcov.o := n + # cond_syscall is currently not LTO compatible CFLAGS_sys_ni.o = $(DISABLE_LTO) @@ -69,6 +79,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_GCOV_KERNEL) += gcov/ +obj-$(CONFIG_KCOV) += kcov.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o diff --git a/kernel/audit.c b/kernel/audit.c index 3a3e5deeda8d..678c3f000191 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -809,6 +809,16 @@ static int audit_set_feature(struct sk_buff *skb) return 0; } +static int audit_replace(pid_t pid) +{ + struct sk_buff *skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, + &pid, sizeof(pid)); + + if (!skb) + return -ENOMEM; + return netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); +} + static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; @@ -870,9 +880,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } if (s.mask & AUDIT_STATUS_PID) { int new_pid = s.pid; + pid_t requesting_pid = task_tgid_vnr(current); - if ((!new_pid) && (task_tgid_vnr(current) != audit_pid)) + if ((!new_pid) && (requesting_pid != audit_pid)) { + audit_log_config_change("audit_pid", new_pid, audit_pid, 0); return -EACCES; + } + if (audit_pid && new_pid && + audit_replace(requesting_pid) != -ECONNREFUSED) { + audit_log_config_change("audit_pid", new_pid, audit_pid, 0); + return -EEXIST; + } if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, audit_pid, 1); audit_pid = new_pid; @@ -920,7 +938,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err == 1) { /* match or error */ err = 0; if (msg_type == AUDIT_USER_TTY) { - err = tty_audit_push_current(); + err = tty_audit_push(); if (err) break; } @@ -1030,20 +1048,19 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; case AUDIT_TTY_GET: { struct audit_tty_status s; - struct task_struct *tsk = current; + unsigned int t; - spin_lock(&tsk->sighand->siglock); - s.enabled = tsk->signal->audit_tty; - s.log_passwd = tsk->signal->audit_tty_log_passwd; - spin_unlock(&tsk->sighand->siglock); + t = READ_ONCE(current->signal->audit_tty); + s.enabled = t & AUDIT_TTY_ENABLE; + s.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD); audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { struct audit_tty_status s, old; - struct task_struct *tsk = current; struct audit_buffer *ab; + unsigned int t; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ @@ -1053,14 +1070,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) (s.log_passwd != 0 && s.log_passwd != 1)) err = -EINVAL; - spin_lock(&tsk->sighand->siglock); - old.enabled = tsk->signal->audit_tty; - old.log_passwd = tsk->signal->audit_tty_log_passwd; - if (!err) { - tsk->signal->audit_tty = s.enabled; - tsk->signal->audit_tty_log_passwd = s.log_passwd; + if (err) + t = READ_ONCE(current->signal->audit_tty); + else { + t = s.enabled | (-s.log_passwd & AUDIT_TTY_LOG_PASSWD); + t = xchg(¤t->signal->audit_tty, t); } - spin_unlock(&tsk->sighand->siglock); + old.enabled = t & AUDIT_TTY_ENABLE; + old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD); audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d" diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 9f194aad0adc..3cf1c5978d39 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -185,7 +185,7 @@ static struct audit_watch *audit_init_watch(char *path) return watch; } -/* Translate a watch string to kernel respresentation. */ +/* Translate a watch string to kernel representation. */ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op) { struct audit_watch *watch; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b8ff9e193753..94ca7b1e5e7e 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -158,7 +158,7 @@ char *audit_unpack_string(void **bufp, size_t *remain, size_t len) return str; } -/* Translate an inode field to kernel respresentation. */ +/* Translate an inode field to kernel representation. */ static inline int audit_to_inode(struct audit_krule *krule, struct audit_field *f) { @@ -415,7 +415,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) return 0; } -/* Translate struct audit_rule_data to kernel's rule respresentation. */ +/* Translate struct audit_rule_data to kernel's rule representation. */ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, size_t datasz) { @@ -593,7 +593,7 @@ static inline size_t audit_pack_string(void **bufp, const char *str) return len; } -/* Translate kernel rule respresentation to struct audit_rule_data. */ +/* Translate kernel rule representation to struct audit_rule_data. */ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) { struct audit_rule_data *data; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 195ffaee50b9..7d0e3cf8abe1 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2412,8 +2412,8 @@ void __audit_seccomp(unsigned long syscall, long signr, int code) return; audit_log_task(ab); audit_log_format(ab, " sig=%ld arch=%x syscall=%ld compat=%d ip=0x%lx code=0x%x", - signr, syscall_get_arch(), syscall, is_compat_task(), - KSTK_EIP(current), code); + signr, syscall_get_arch(), syscall, + in_compat_syscall(), KSTK_EIP(current), code); audit_log_end(ab); } diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 13272582eee0..eed911d091da 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,4 +1,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o +ifeq ($(CONFIG_PERF_EVENTS),y) +obj-$(CONFIG_BPF_SYSCALL) += stackmap.o +endif diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 89ebbc4d1164..76d5a794e426 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -17,15 +17,43 @@ #include <linux/filter.h> #include <linux/perf_event.h> +static void bpf_array_free_percpu(struct bpf_array *array) +{ + int i; + + for (i = 0; i < array->map.max_entries; i++) + free_percpu(array->pptrs[i]); +} + +static int bpf_array_alloc_percpu(struct bpf_array *array) +{ + void __percpu *ptr; + int i; + + for (i = 0; i < array->map.max_entries; i++) { + ptr = __alloc_percpu_gfp(array->elem_size, 8, + GFP_USER | __GFP_NOWARN); + if (!ptr) { + bpf_array_free_percpu(array); + return -ENOMEM; + } + array->pptrs[i] = ptr; + } + + return 0; +} + /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) { + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; struct bpf_array *array; - u32 elem_size, array_size; + u64 array_size; + u32 elem_size; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size == 0) + attr->value_size == 0 || attr->map_flags) return ERR_PTR(-EINVAL); if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) @@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); - /* check round_up into zero and u32 overflow */ - if (elem_size == 0 || - attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) + array_size = sizeof(*array); + if (percpu) + array_size += (u64) attr->max_entries * sizeof(void *); + else + array_size += (u64) attr->max_entries * elem_size; + + /* make sure there is no u32 overflow later in round_up() */ + if (array_size >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); - array_size = sizeof(*array) + attr->max_entries * elem_size; /* allocate all map elements and zero-initialize them */ array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); @@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) } /* copy mandatory map attributes */ + array->map.map_type = attr->map_type; array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; array->elem_size = elem_size; + if (!percpu) + goto out; + + array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); + + if (array_size >= U32_MAX - PAGE_SIZE || + elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { + kvfree(array); + return ERR_PTR(-ENOMEM); + } +out: + array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; + return &array->map; } @@ -67,12 +112,50 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; - if (index >= array->map.max_entries) + if (unlikely(index >= array->map.max_entries)) return NULL; return array->value + array->elem_size * index; } +/* Called from eBPF program */ +static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (unlikely(index >= array->map.max_entries)) + return NULL; + + return this_cpu_ptr(array->pptrs[index]); +} + +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + void __percpu *pptr; + int cpu, off = 0; + u32 size; + + if (unlikely(index >= array->map.max_entries)) + return -ENOENT; + + /* per_cpu areas are zero-filled and bpf programs can only + * access 'value_size' of them, so copying rounded areas + * will not leak any kernel data + */ + size = round_up(map->value_size, 8); + rcu_read_lock(); + pptr = array->pptrs[index]; + for_each_possible_cpu(cpu) { + bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); + off += size; + } + rcu_read_unlock(); + return 0; +} + /* Called from syscall */ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { @@ -99,19 +182,62 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; - if (map_flags > BPF_EXIST) + if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; - if (index >= array->map.max_entries) + if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; - if (map_flags == BPF_NOEXIST) + if (unlikely(map_flags == BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_ |
