diff options
| -rw-r--r-- | arch/arm/net/bpf_jit_32.c | 2 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 70 | ||||
| -rw-r--r-- | arch/mips/net/ebpf_jit.c | 2 | ||||
| -rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 2 | ||||
| -rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 2 | ||||
| -rw-r--r-- | arch/sparc/net/bpf_jit_comp_64.c | 2 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 49 | ||||
| -rw-r--r-- | include/linux/bpf.h | 4 | ||||
| -rw-r--r-- | include/linux/bpf_verifier.h | 45 | ||||
| -rw-r--r-- | include/linux/filter.h | 13 | ||||
| -rw-r--r-- | include/uapi/linux/bpf.h | 6 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 104 | ||||
| -rw-r--r-- | kernel/bpf/disasm.c | 8 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 1120 | ||||
| -rw-r--r-- | tools/include/uapi/linux/bpf.h | 6 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.h | 2 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 170 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/Makefile | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_l4lb_noinline.c | 473 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_progs.c | 95 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_verifier.c | 1624 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_xdp_noinline.c | 833 |
23 files changed, 4378 insertions, 269 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c199990e12b6..4425189bb24c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1824,7 +1824,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* If BPF JIT was not enabled then we must fall back to * the interpreter. */ - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; /* If constant blinding was enabled and we failed during blinding diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ba38d403abb2..396490cf7316 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -99,6 +99,20 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } } +static inline void emit_addr_mov_i64(const int reg, const u64 val, + struct jit_ctx *ctx) +{ + u64 tmp = val; + int shift = 0; + + emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx); + for (;shift < 48;) { + tmp >>= 16; + shift += 16; + emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); + } +} + static inline void emit_a64_mov_i(const int is64, const int reg, const s32 val, struct jit_ctx *ctx) { @@ -603,7 +617,10 @@ emit_cond_jmp: const u8 r0 = bpf2a64[BPF_REG_0]; const u64 func = (u64)__bpf_call_base + imm; - emit_a64_mov_i64(tmp, func, ctx); + if (ctx->prog->is_func) + emit_addr_mov_i64(tmp, func, ctx); + else + emit_a64_mov_i64(tmp, func, ctx); emit(A64_BLR(tmp), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; @@ -835,16 +852,24 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } +struct arm64_jit_data { + struct bpf_binary_header *header; + u8 *image; + struct jit_ctx ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; + struct arm64_jit_data *jit_data; bool tmp_blinded = false; + bool extra_pass = false; struct jit_ctx ctx; int image_size; u8 *image_ptr; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); @@ -858,13 +883,29 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = tmp; } + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + goto skip_init_ctx; + } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; - goto out; + goto out_off; } /* 1. Initial fake pass to compute ctx->idx. */ @@ -895,6 +936,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass. */ ctx.image = (__le32 *)image_ptr; +skip_init_ctx: ctx.idx = 0; build_prologue(&ctx); @@ -920,13 +962,31 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, ctx.image + ctx.idx); - bpf_jit_binary_lock_ro(header); + if (!prog->is_func || extra_pass) { + if (extra_pass && ctx.idx != jit_data->ctx.idx) { + pr_err_once("multi-func JIT bug %d != %d\n", + ctx.idx, jit_data->ctx.idx); + bpf_jit_binary_free(header); + prog->bpf_func = NULL; + prog->jited = 0; + goto out_off; + } + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } prog->bpf_func = (void *)ctx.image; prog->jited = 1; prog->jited_len = image_size; + if (!prog->is_func || extra_pass) { out_off: - kfree(ctx.offset); + kfree(ctx.offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } out: if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 962b0259b4b6..97069a1b6f43 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1869,7 +1869,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!bpf_jit_enable || !cpu_has_mips64r2) + if (!prog->jit_requested || !cpu_has_mips64r2) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1..d5a5bc43cf8f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -993,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *tmp_fp; bool bpf_blinded = false; - if (!bpf_jit_enable) + if (!fp->jit_requested) return org_fp; tmp_fp = bpf_jit_blind_constants(org_fp); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e81c16838b90..f4baa8c514d3 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1300,7 +1300,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (!bpf_jit_enable) + if (!fp->jit_requested) return orig_fp; tmp = bpf_jit_blind_constants(fp); diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 5765e7e711f7..a2f1b5e774a7 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1517,7 +1517,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) u8 *image_ptr; int pass; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0554e8aef4d5..87f214fbe66e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1109,19 +1109,29 @@ common_load: return proglen; } +struct x64_jit_data { + struct bpf_binary_header *header; + int *addrs; + u8 *image; + int proglen; + struct jit_context ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; struct bpf_prog *tmp, *orig_prog = prog; + struct x64_jit_data *jit_data; int proglen, oldproglen = 0; struct jit_context ctx = {}; bool tmp_blinded = false; + bool extra_pass = false; u8 *image = NULL; int *addrs; int pass; int i; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); @@ -1135,10 +1145,28 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = tmp; } + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + addrs = jit_data->addrs; + if (addrs) { + ctx = jit_data->ctx; + oldproglen = jit_data->proglen; + image = jit_data->image; + header = jit_data->header; + extra_pass = true; + goto skip_init_addrs; + } addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); if (!addrs) { prog = orig_prog; - goto out; + goto out_addrs; } /* Before first pass, make a rough estimation of addrs[] @@ -1149,6 +1177,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) addrs[i] = proglen; } ctx.cleanup_addr = proglen; +skip_init_addrs: /* JITed image shrinks with every pass and the loop iterates * until the image stops shrinking. Very large bpf programs @@ -1189,7 +1218,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (image) { bpf_flush_icache(header, image + proglen); - bpf_jit_binary_lock_ro(header); + if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(header); + } else { + jit_data->addrs = addrs; + jit_data->ctx = ctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = header; + } prog->bpf_func = (void *)image; prog->jited = 1; prog->jited_len = proglen; @@ -1197,8 +1234,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; } + if (!prog->is_func || extra_pass) { out_addrs: - kfree(addrs); + kfree(addrs); + kfree(jit_data); + prog->aux->jit_data = NULL; + } out: if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 54dc7cae2949..da54ef644fcd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -200,6 +200,9 @@ struct bpf_prog_aux { u32 max_ctx_offset; u32 stack_depth; u32 id; + u32 func_cnt; + struct bpf_prog **func; + void *jit_data; /* JIT specific data. arch dependent */ struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_prog_ops *ops; @@ -402,6 +405,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); +void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c561b986bab0..aaac589e490c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -76,6 +76,14 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* Inside the callee two registers can be both PTR_TO_STACK like + * R1=fp-8 and R2=fp-8, but one of them points to this function stack + * while another to the caller's stack. To differentiate them 'frameno' + * is used which is an index in bpf_verifier_state->frame[] array + * pointing to bpf_func_state. + * This field must be second to last, for states_equal() reasons. + */ + u32 frameno; /* This field must be last, for states_equal() reasons. */ enum bpf_reg_liveness live; }; @@ -83,7 +91,8 @@ struct bpf_reg_state { enum bpf_stack_slot_type { STACK_INVALID, /* nothing was stored in this stack slot */ STACK_SPILL, /* register spilled into stack */ - STACK_MISC /* BPF program wrote some data into this slot */ + STACK_MISC, /* BPF program wrote some data into this slot */ + STACK_ZERO, /* BPF program wrote constant zero */ }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ @@ -96,13 +105,34 @@ struct bpf_stack_state { /* state of the program: * type of all registers and stack info */ -struct bpf_verifier_state { +struct bpf_func_state { struct bpf_reg_state regs[MAX_BPF_REG]; struct bpf_verifier_state *parent; + /* index of call instruction that called into this func */ + int callsite; + /* stack frame number of this function state from pov of + * enclosing bpf_verifier_state. + * 0 = main function, 1 = first callee. + */ + u32 frameno; + /* subprog number == index within subprog_stack_depth + * zero == main subprog + */ + u32 subprogno; + + /* should be second to last. See copy_func_state() */ int allocated_stack; struct bpf_stack_state *stack; }; +#define MAX_CALL_FRAMES 8 +struct bpf_verifier_state { + /* call stack tracking */ + struct bpf_func_state *frame[MAX_CALL_FRAMES]; + struct bpf_verifier_state *parent; + u32 curframe; +}; + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -113,6 +143,7 @@ struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + s32 call_imm; /* saved imm field of call insn */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ bool seen; /* this insn was processed by the verifier */ @@ -141,6 +172,8 @@ struct bpf_ext_analyzer_ops { int insn_idx, int prev_insn_idx); }; +#define BPF_MAX_SUBPROGS 256 + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -159,13 +192,17 @@ struct bpf_verifier_env { bool allow_ptr_leaks; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ - struct bpf_verifer_log log; + u32 subprog_starts[BPF_MAX_SUBPROGS]; + u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; + u32 subprog_cnt; }; static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) { - return env->cur_state->regs; + struct bpf_verifier_state *cur = env->cur_state; + + return cur->frame[cur->curframe]->regs; } #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/linux/filter.h b/include/linux/filter.h index 5feb441d3dd9..e872b4ebaa57 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -58,6 +58,9 @@ struct bpf_prog_aux; /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 +/* unused opcode to mark call to interpreter with arguments */ +#define BPF_CALL_ARGS 0xe0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. @@ -455,10 +458,13 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ + jit_requested:1,/* archs need to JIT the prog */ locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ + blinded:1, /* Was blinded */ + is_func:1, /* program is a bpf function */ kprobe_override:1; /* Do we override a kprobe? */ enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ @@ -710,6 +716,9 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +#define __bpf_call_base_args \ + ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ + __bpf_call_base) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); @@ -798,7 +807,7 @@ static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) return fp->jited && bpf_jit_is_ebpf(); } -static inline bool bpf_jit_blinding_enabled(void) +static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) { /* These are the prerequisites, should someone ever have the * idea to call blinding outside of them, we make sure to @@ -806,7 +815,7 @@ static inline bool bpf_jit_blinding_enabled(void) */ if (!bpf_jit_is_ebpf()) return false; - if (!bpf_jit_enable) + if (!prog->jit_requested) return false; if (!bpf_jit_harden) return false; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 595bda120cfb..d01f1cb3cfc0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d32bebf4f2de..768e0a02d8c8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -94,6 +94,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) fp->pages = size / PAGE_SIZE; fp->aux = aux; fp->aux->prog = fp; + fp->jit_requested = ebpf_jit_enabled(); INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); @@ -217,30 +218,40 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) return 0; } -static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) -{ - return BPF_CLASS(insn->code) == BPF_JMP && - /* Call and Exit are both special jumps with no - * target inside the BPF instruction image. - */ - BPF_OP(insn->code) != BPF_CALL && - BPF_OP(insn->code) != BPF_EXIT; -} - static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) { struct bpf_insn *insn = prog->insnsi; u32 i, insn_cnt = prog->len; + bool pseudo_call; + u8 code; + int off; for (i = 0; i < insn_cnt; i++, insn++) { - if (!bpf_is_jmp_and_has_target(insn)) + code = insn->code; + if (BPF_CLASS(code) != BPF_JMP) continue; + if (BPF_OP(code) == BPF_EXIT) + continue; + if (BPF_OP(code) == BPF_CALL) { + if (insn->src_reg == BPF_PSEUDO_CALL) + pseudo_call = true; + else + continue; + } else { + pseudo_call = false; + } + off = pseudo_call ? insn->imm : insn->off; /* Adjust offset of jmps if we cross boundaries. */ - if (i < pos && i + insn->off + 1 > pos) - insn->off += delta; - else if (i > pos + delta && i + insn->off + 1 <= pos + delta) - insn->off -= delta; + if (i < pos && i + off + 1 > pos) + off += delta; + else if (i > pos + delta && i + off + 1 <= pos + delta) + off -= delta; + + if (pseudo_call) + insn->imm = off; + else + insn->off = off; } } @@ -711,7 +722,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) struct bpf_insn *insn; int i, rewritten; - if (!bpf_jit_blinding_enabled()) + if (!bpf_jit_blinding_enabled(prog) || prog->blinded) return prog; clone = bpf_prog_clone_create(prog, GFP_USER); @@ -753,6 +764,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) i += insn_delta; } + clone->blinded = 1; return clone; } #endif /* CONFIG_BPF_JIT */ @@ -774,8 +786,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); * * Decode and execute eBPF instructions. */ -static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, - u64 *stack) +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { u64 tmp; static const void *jumptable[256] = { @@ -835,6 +846,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, /* Call instruction */ [BPF_JMP | BPF_CALL] = &&JMP_CALL, + [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, /* Jumps */ [BPF_JMP | BPF_JA] = &&JMP_JA, @@ -1025,6 +1037,13 @@ select_insn: BPF_R4, BPF_R5); CONT; + JMP_CALL_ARGS: + BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2, + BPF_R3, BPF_R4, + BPF_R5, + insn + insn->off + 1); + CONT; + JMP_TAIL_CALL: { struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -1297,6 +1316,23 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn return ___bpf_prog_run(regs, insn, stack); \ } +#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size +#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \ +static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ + const struct bpf_insn *insn) \ +{ \ + u64 stack[stack_size / sizeof(u64)]; \ + u64 regs[MAX_BPF_REG]; \ +\ + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ + BPF_R1 = r1; \ + BPF_R2 = r2; \ + BPF_R3 = r3; \ + BPF_R4 = r4; \ + BPF_R5 = r5; \ + return ___bpf_prog_run(regs, insn, stack); \ +} + #define EVAL1(FN, X) FN(X) #define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y) #define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y) @@ -1308,6 +1344,10 @@ EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192); EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384); EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512); +EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192); +EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384); +EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512); + #define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), static unsigned int (*interpreters[])(const void *ctx, @@ -1316,6 +1356,24 @@ EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; +#undef PROG_NAME_LIST +#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), +static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, + const struct bpf_insn *insn) = { +EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) +EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) +EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) +}; +#undef PROG_NAME_LIST + +void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) +{ + stack_depth = max_t(u32, stack_depth, 1); + insn->off = (s16) insn->imm; + insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - + __bpf_call_base_args; + insn->code = BPF_JMP | BPF_CALL_ARGS; +} bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) @@ -1572,11 +1630,19 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; + int i; aux = container_of(work, struct bpf_prog_aux, work); if (bpf_prog_is_dev_bound(aux)) bpf_prog_offload_destroy(aux->prog); - bpf_jit_free(aux->prog); + for (i = 0; i < aux->func_cnt; i++) + bpf_jit_free(aux->func[i]); + if (aux->func_cnt) { + kfree(aux->func); + bpf_prog_unlock_free(aux->prog); + } else { + bpf_jit_free(aux->prog); + } } /* Free internal BPF program */ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index e682850c9715..883f88fa5bfc 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -189,8 +189,12 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { - verbose(env, "(%02x) call %s#%d\n", insn->code, - func_id_name(insn->imm), insn->imm); + if (insn->src_reg == BPF_PSEUDO_CALL) + verbose(env, "(%02x) call pc%+d\n", insn->code, + insn->imm); + else + verbose(env, "(%02x) call %s#%d\n", insn->code, + func_id_name(insn->imm), insn->imm); } else if (insn->code == (BPF_JMP | BPF_JA)) { verbose(env, "(%02x) goto pc%+d\n", insn->code, insn->off); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2c4cfeaa8d5e..e2e1c78ce1dc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1194,7 +1194,8 @@ static int bpf_prog_load(union bpf_attr *attr) goto free_used_maps; /* eBPF program is ready to be JITed */ - prog = bpf_prog_select_runtime(prog, &err); + if (!prog->bpf_func) + prog = bpf_prog_select_runtime(prog, &err); if (err < 0) goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e807bda7fe29..48b2901cf483 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20,6 +20,8 @@ #include <linux/file.h> #include <linux/vmalloc.h> #include <linux/stringify.h> +#include <linux/bsearch.h> +#include <linux/sort.h> #include "disasm.h" @@ -227,13 +229,23 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "w"); } +static struct bpf_func_state *func(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg) +{ + struct bpf_verifier_state *cur = env->cur_state; + + return cur->frame[reg->frameno]; +} + static void print_verifier_state(struct bpf_verifier_env *env, - struct bpf_verifier_state *state) + const struct bpf_func_state *state) { - struct bpf_reg_state *reg; + const struct bpf_reg_state *reg; enum bpf_reg_type t; int i; + if (state->frameno) + verbose(env, " frame%d:", state->frameno); for (i = 0; i < MAX_BPF_REG; i++) { reg = &state->regs[i]; t = reg->type; @@ -246,6 +258,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); + if (t == PTR_TO_STACK) + verbose(env, ",call_%d", func(env, reg)->callsite); } else { verbose(env, "(id=%d", reg->id); if (t != SCALAR_VALUE) @@ -297,12 +311,14 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "=%s", reg_type_str[state->stack[i].spilled_ptr.type]); } + if (state->stack[i].slot_type[0] == STACK_ZERO) + verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE); } verbose(env, "\n"); } -static int copy_stack_state(struct bpf_verifier_state *dst, - const struct bpf_verifier_state *src) +static int copy_stack_state(struct bpf_func_state *dst, + const struct bpf_func_state *src) { if (!src->stack) return 0; @@ -318,13 +334,13 @@ static int copy_stack_state(struct bpf_verifier_state *dst, /* do_check() starts with zero-sized stack in struct bpf_verifier_state to * make it consume minimal amount of memory. check_stack_write() access from - * the program calls into realloc_verifier_state() to grow the stack size. + * the program calls into realloc_func_state() to grow the stack size. * Note there is a non-zero 'parent' pointer inside bpf_verifier_state * which this function copies over. It points to previous bpf_verifier_state * which is never reallocated */ -static int realloc_verifier_state(struct bpf_verifier_state *state, int size, - bool copy_old) +static int realloc_func_state(struct bpf_func_state *state, int size, + bool copy_old) { u32 old_size = state->allocated_stack; struct bpf_stack_state *new_stack; @@ -357,10 +373,21 @@ static int realloc_verifier_state(struct bpf_verifier_state *state, int size, return 0; } +static void free_func_state(struct bpf_func_state *state) +{ + kfree(state->stack); + kfree(state); +} + static void free_verifier_state(struct bpf_verifier_state *state, bool free_self) { - kfree(state->stack); + int i; + + for (i = 0; i <= state->curframe; i++) { + free_func_state(state->frame[i]); + state->frame[i] = NULL; + } if (free_self) kfree(state); } @@ -368,18 +395,46 @@ static void free_verifier_state(struct bpf_verifier_state *state, /* copy verifier state from src to dst growing dst stack space * when necessary to accommodate larger src stack */ -static int copy_verifier_state(struct bpf_verifier_state *dst, - const struct bpf_verifier_state *src) +static int copy_func_state(struct bpf_func_state *dst, + const struct bpf_func_state *src) { int err; - err = realloc_verifier_state(dst, src->allocated_stack, false); + err = realloc_func_state(dst, src->allocated_stack, false); if (err) return err; - memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack)); + memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack)); return copy_stack_state(dst, src); } +static int copy_verifier_state(struct bpf_verifier_state *dst_state, + const struct bpf_verifier_state *src) +{ + struct bpf_func_state *dst; + int i, err; + + /* if dst has more stack frames then src frame, free them */ + for (i = src->curframe + 1; i <= dst_state->curframe; i++) { + free_func_state(dst_state->frame[i]); + dst_state->frame[i] = NULL; + } + dst_state->curframe = src->curframe; + dst_state->parent = src->parent; + for (i = 0; i <= src->curframe; i++) { + dst = dst_state->frame[i]; + if (!dst) { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return -ENOMEM; + dst_state->frame[i] = dst; + } + err = copy_func_state(dst, src->frame[i]); + if (err) + return err; + } + retur |
