diff options
| author | Ingo Molnar <mingo@kernel.org> | 2016-06-08 09:26:46 +0200 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2016-06-08 09:26:46 +0200 |
| commit | 616d1c1b98ac79f30216a57a170dd7cea19b3df3 (patch) | |
| tree | 6f244c2e5a7160190e73bc82b4cd7fa7bb22ee31 /kernel | |
| parent | a4f144ebbdf6f7807c477bce8e136047ed27321f (diff) | |
| parent | c8ae067f2635be0f8c7e5db1bb74b757d623e05b (diff) | |
| download | linux-616d1c1b98ac79f30216a57a170dd7cea19b3df3.tar.gz linux-616d1c1b98ac79f30216a57a170dd7cea19b3df3.tar.bz2 linux-616d1c1b98ac79f30216a57a170dd7cea19b3df3.zip | |
Merge branch 'linus' into perf/core, to refresh the branch
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
74 files changed, 6742 insertions, 1104 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f0c40bf49d9f..e2ec54e2b952 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,9 +91,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o -obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o -obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o +obj-$(CONFIG_ELFCORE) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_TRACE_CLOCK) += trace/ diff --git a/kernel/audit.c b/kernel/audit.c index 678c3f000191..22bb4f24f071 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -64,7 +64,6 @@ #include <linux/security.h> #endif #include <linux/freezer.h> -#include <linux/tty.h> #include <linux/pid_namespace.h> #include <net/netns/generic.h> @@ -430,7 +429,6 @@ restart: attempts, audit_pid); set_current_state(TASK_INTERRUPTIBLE); schedule(); - __set_current_state(TASK_RUNNING); goto restart; } } @@ -1341,15 +1339,14 @@ static inline void audit_get_stamp(struct audit_context *ctx, static long wait_for_auditd(long sleep_time) { DECLARE_WAITQUEUE(wait, current); - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue_exclusive(&audit_backlog_wait, &wait); if (audit_backlog_limit && - skb_queue_len(&audit_skb_queue) > audit_backlog_limit) + skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { + add_wait_queue_exclusive(&audit_backlog_wait, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); sleep_time = schedule_timeout(sleep_time); - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&audit_backlog_wait, &wait); + remove_wait_queue(&audit_backlog_wait, &wait); + } return sleep_time; } @@ -1890,21 +1887,14 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; char comm[sizeof(tsk->comm)]; - char *tty; + struct tty_struct *tty; if (!ab) return; /* tsk == current */ cred = current_cred(); - - spin_lock_irq(&tsk->sighand->siglock); - if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) - tty = tsk->signal->tty->name; - else - tty = "(none)"; - spin_unlock_irq(&tsk->sighand->siglock); - + tty = audit_get_tty(tsk); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" @@ -1920,11 +1910,11 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), - tty, audit_get_sessionid(tsk)); - + tty ? tty_name(tty) : "(none)", + audit_get_sessionid(tsk)); + audit_put_tty(tty); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - audit_log_d_path_exe(ab, tsk->mm); audit_log_task_context(ab); } diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5efe9b299a12..25772476fa4a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -661,10 +661,10 @@ static int tag_mount(struct vfsmount *mnt, void *arg) static int prune_tree_thread(void *unused) { for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&prune_list)) + if (list_empty(&prune_list)) { + set_current_state(TASK_INTERRUPTIBLE); schedule(); - __set_current_state(TASK_RUNNING); + } mutex_lock(&audit_cmd_mutex); mutex_lock(&audit_filter_mutex); @@ -693,16 +693,14 @@ static int audit_launch_prune(void) { if (prune_thread) return 0; - prune_thread = kthread_create(prune_tree_thread, NULL, + prune_thread = kthread_run(prune_tree_thread, NULL, "audit_prune_tree"); if (IS_ERR(prune_thread)) { pr_err("cannot start thread audit_prune_tree"); prune_thread = NULL; return -ENOMEM; - } else { - wake_up_process(prune_thread); - return 0; } + return 0; } /* called with audit_filter_mutex */ diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3cf1c5978d39..d6709eb70970 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -367,7 +367,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ - watch->dev = d_backing_inode(d)->i_sb->s_dev; + watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } dput(d); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7d0e3cf8abe1..62ab53d7619c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1980,6 +1980,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, { struct audit_buffer *ab; uid_t uid, oldloginuid, loginuid; + struct tty_struct *tty; if (!audit_enabled) return; @@ -1987,14 +1988,17 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid), + tty = audit_get_tty(current); ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); audit_log_task_context(ab); - audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d", - oldloginuid, loginuid, oldsessionid, sessionid, !rc); + audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", + oldsessionid, sessionid, !rc); + audit_put_tty(tty); audit_log_end(ab); } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index be0abf669ced..b94a36550591 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -129,14 +129,83 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, return fp; } -EXPORT_SYMBOL_GPL(bpf_prog_realloc); void __bpf_prog_free(struct bpf_prog *fp) { kfree(fp->aux); vfree(fp); } -EXPORT_SYMBOL_GPL(__bpf_prog_free); + +static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_JMP && + /* Call and Exit are both special jumps with no + * target inside the BPF instruction image. + */ + BPF_OP(insn->code) != BPF_CALL && + BPF_OP(insn->code) != BPF_EXIT; +} + +static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) +{ + struct bpf_insn *insn = prog->insnsi; + u32 i, insn_cnt = prog->len; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (!bpf_is_jmp_and_has_target(insn)) + continue; + + /* Adjust offset of jmps if we cross boundaries. */ + if (i < pos && i + insn->off + 1 > pos) + insn->off += delta; + else if (i > pos + delta && i + insn->off + 1 <= pos + delta) + insn->off -= delta; + } +} + +struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, + const struct bpf_insn *patch, u32 len) +{ + u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; + struct bpf_prog *prog_adj; + + /* Since our patchlet doesn't expand the image, we're done. */ + if (insn_delta == 0) { + memcpy(prog->insnsi + off, patch, sizeof(*patch)); + return prog; + } + + insn_adj_cnt = prog->len + insn_delta; + + /* Several new instructions need to be inserted. Make room + * for them. Likely, there's no need for a new allocation as + * last page could have large enough tailroom. + */ + prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), + GFP_USER); + if (!prog_adj) + return NULL; + + prog_adj->len = insn_adj_cnt; + + /* Patching happens in 3 steps: + * + * 1) Move over tail of insnsi from next instruction onwards, + * so we can patch the single target insn with one or more + * new ones (patching is always from 1 to n insns, n > 0). + * 2) Inject new instructions at the target location. + * 3) Adjust branch offsets if necessary. + */ + insn_rest = insn_adj_cnt - off - len; + + memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, + sizeof(*patch) * insn_rest); + memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); + + bpf_adj_branches(prog_adj, off, insn_delta); + + return prog_adj; +} #ifdef CONFIG_BPF_JIT struct bpf_binary_header * @@ -162,7 +231,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, hdr->pages = size / PAGE_SIZE; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); - start = (prandom_u32() % hole) & ~(alignment - 1); + start = (get_random_int() % hole) & ~(alignment - 1); /* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; @@ -174,6 +243,209 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) { module_memfree(hdr); } + +int bpf_jit_harden __read_mostly; + +static int bpf_jit_blind_insn(const struct bpf_insn *from, + const struct bpf_insn *aux, + struct bpf_insn *to_buff) +{ + struct bpf_insn *to = to_buff; + u32 imm_rnd = get_random_int(); + s16 off; + + BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); + BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + + if (from->imm == 0 && + (from->code == (BPF_ALU | BPF_MOV | BPF_K) || + from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { + *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); + goto out; + } + + switch (from->code) { + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX); + break; + + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX); + break; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); + break; + + case BPF_LD | BPF_ABS | BPF_W: + case BPF_LD | BPF_ABS | BPF_H: + case BPF_LD | BPF_ABS | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); + break; + + case BPF_LD | BPF_IND | BPF_W: + case BPF_LD | BPF_IND | BPF_H: + case BPF_LD | BPF_IND | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg); + *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); + break; + + case BPF_LD | BPF_IMM | BPF_DW: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); + *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX); + break; + case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX); + break; + + case BPF_ST | BPF_MEM | BPF_DW: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); + break; + } +out: + return to - to_buff; +} + +static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, + gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_prog *fp; + + fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); + if (fp != NULL) { + kmemcheck_annotate_bitfield(fp, meta); + + /* aux->prog still points to the fp_other one, so + * when promoting the clone to the real program, + * this still needs to be adapted. + */ + memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE); + } + + return fp; +} + +static void bpf_prog_clone_free(struct bpf_prog *fp) +{ + /* aux was stolen by the other clone, so we cannot free + * it from this path! It will be freed eventually by the + * other program on release. + * + * At this point, we don't need a deferred release since + * clone is guaranteed to not be locked. + */ + fp->aux = NULL; + __bpf_prog_free(fp); +} + +void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) +{ + /* We have to repoint aux->prog to self, as we don't + * know whether fp here is the clone or the original. + */ + fp->aux->prog = fp; + bpf_prog_clone_free(fp_other); +} + +struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) +{ + struct bpf_insn insn_buff[16], aux[2]; + struct bpf_prog *clone, *tmp; + int insn_delta, insn_cnt; + struct bpf_insn *insn; + int i, rewritten; + + if (!bpf_jit_blinding_enabled()) + return prog; + + clone = bpf_prog_clone_create(prog, GFP_USER); + if (!clone) + return ERR_PTR(-ENOMEM); + + insn_cnt = clone->len; + insn = clone->insnsi; + + for (i = 0; i < insn_cnt; i++, insn++) { + /* We temporarily need to hold the original ld64 insn + * so that we can still access the first part in the + * second blinding run. + */ + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) && + insn[1].code == 0) + memcpy(aux, insn, sizeof(aux)); + + rewritten = bpf_jit_blind_insn(insn, aux, insn_buff); + if (!rewritten) + continue; + + tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); + if (!tmp) { + /* Patching may have repointed aux->prog during + * realloc from the original one, so we need to + * fix it up here on error. + */ + bpf_jit_prog_release_other(prog, clone); + return ERR_PTR(-ENOMEM); + } + + clone = tmp; + insn_delta = rewritten - 1; + + /* Walk new program and skip insns we just inserted. */ + insn = clone->insnsi + i + insn_delta; + insn_cnt += insn_delta; + i += insn_delta; + } + + return clone; +} #endif /* CONFIG_BPF_JIT */ /* Base function for offset calculation. Needs to go into .text section, @@ -692,15 +964,22 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) /** * bpf_prog_select_runtime - select exec runtime for BPF program * @fp: bpf_prog populated with internal BPF program + * @err: pointer to error variable * * Try to JIT eBPF program, if JIT is not available, use interpreter. * The BPF program will be executed via BPF_PROG_RUN() macro. */ -int bpf_prog_sele |
