summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/btf.rst43
-rw-r--r--Documentation/bpf/instruction-set.rst2
-rw-r--r--arch/arm/net/bpf_jit_32.c16
-rw-r--r--arch/riscv/net/bpf_jit.h1
-rw-r--r--arch/riscv/net/bpf_jit_core.c8
-rw-r--r--include/linux/bpf.h105
-rw-r--r--include/linux/bpf_verifier.h2
-rw-r--r--include/linux/btf.h28
-rw-r--r--include/linux/filter.h34
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/uapi/linux/bpf.h88
-rw-r--r--include/uapi/linux/btf.h17
-rw-r--r--kernel/bpf/btf.c142
-rw-r--r--kernel/bpf/cgroup.c70
-rw-r--r--kernel/bpf/core.c17
-rw-r--r--kernel/bpf/helpers.c12
-rw-r--r--kernel/bpf/percpu_freelist.c20
-rw-r--r--kernel/bpf/syscall.c7
-rw-r--r--kernel/bpf/verifier.c49
-rw-r--r--kernel/events/core.c16
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--kernel/trace/trace_uprobe.c5
-rw-r--r--net/bpf/test_run.c6
-rw-r--r--net/core/filter.c130
-rw-r--r--net/core/skmsg.c1
-rw-r--r--net/core/sock_map.c23
-rw-r--r--net/ipv4/tcp_bpf.c1
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/xdp/xdp_umem.c6
-rw-r--r--samples/bpf/xdp_fwd_user.c55
-rw-r--r--samples/bpf/xdp_router_ipv4.bpf.c9
-rwxr-xr-xscripts/bpf_doc.py4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst5
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool18
-rw-r--r--tools/bpf/bpftool/btf.c57
-rw-r--r--tools/bpf/bpftool/btf_dumper.c29
-rw-r--r--tools/bpf/bpftool/cgroup.c53
-rw-r--r--tools/bpf/bpftool/common.c90
-rw-r--r--tools/bpf/bpftool/feature.c89
-rw-r--r--tools/bpf/bpftool/gen.c4
-rw-r--r--tools/bpf/bpftool/link.c61
-rw-r--r--tools/bpf/bpftool/main.c2
-rw-r--r--tools/bpf/bpftool/main.h22
-rw-r--r--tools/bpf/bpftool/map.c84
-rw-r--r--tools/bpf/bpftool/pids.c1
-rw-r--r--tools/bpf/bpftool/prog.c79
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/include/uapi/linux/bpf.h88
-rw-r--r--tools/include/uapi/linux/btf.h17
-rw-r--r--tools/lib/bpf/btf.c229
-rw-r--r--tools/lib/bpf/btf.h32
-rw-r--r--tools/lib/bpf/btf_dump.c137
-rw-r--r--tools/lib/bpf/libbpf.c296
-rw-r--r--tools/lib/bpf/libbpf.h38
-rw-r--r--tools/lib/bpf/libbpf.map8
-rw-r--r--tools/lib/bpf/libbpf_internal.h7
-rw-r--r--tools/lib/bpf/linker.c7
-rw-r--r--tools/lib/bpf/relo_core.c113
-rw-r--r--tools/lib/bpf/relo_core.h4
-rw-r--r--tools/lib/bpf/usdt.c123
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/bench.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c96
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh11
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c153
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c126
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c207
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c183
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c40
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h78
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c8
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c833
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py166
-rw-r--r--tools/testing/selftests/bpf/test_btf.h1
-rwxr-xr-xtools/testing/selftests/bpf/test_xdping.sh4
-rw-r--r--tools/testing/selftests/bpf/xdp_synproxy.c466
92 files changed, 4582 insertions, 834 deletions
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 7940da9bc6c1..f49aeef62d0c 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -74,7 +74,7 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_ARRAY 3 /* Array */
#define BTF_KIND_STRUCT 4 /* Struct */
#define BTF_KIND_UNION 5 /* Union */
- #define BTF_KIND_ENUM 6 /* Enumeration */
+ #define BTF_KIND_ENUM 6 /* Enumeration up to 32-bit values */
#define BTF_KIND_FWD 7 /* Forward */
#define BTF_KIND_TYPEDEF 8 /* Typedef */
#define BTF_KIND_VOLATILE 9 /* Volatile */
@@ -87,6 +87,7 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_FLOAT 16 /* Floating point */
#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+ #define BTF_KIND_ENUM64 19 /* Enumeration up to 64-bit values */
Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -101,10 +102,10 @@ Each type contains the following common data::
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union and fwd
+ * struct, union, fwd, enum and enum64.
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
@@ -281,10 +282,10 @@ modes exist:
``struct btf_type`` encoding requirement:
* ``name_off``: 0 or offset to a valid C identifier
- * ``info.kind_flag``: 0
+ * ``info.kind_flag``: 0 for unsigned, 1 for signed
* ``info.kind``: BTF_KIND_ENUM
* ``info.vlen``: number of enum values
- * ``size``: 4
+ * ``size``: 1/2/4/8
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
@@ -297,6 +298,10 @@ The ``btf_enum`` encoding:
* ``name_off``: offset to a valid C identifier
* ``val``: any value
+If the original enum value is signed and the size is less than 4,
+that value will be sign extended into 4 bytes. If the size is 8,
+the value will be truncated into 4 bytes.
+
2.2.7 BTF_KIND_FWD
~~~~~~~~~~~~~~~~~~
@@ -493,7 +498,7 @@ the attribute is applied to a ``struct``/``union`` member or
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
valid index (starting from 0) pointing to a member or an argument.
-2.2.17 BTF_KIND_TYPE_TAG
+2.2.18 BTF_KIND_TYPE_TAG
~~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
@@ -516,6 +521,32 @@ type_tag, then zero or more const/volatile/restrict/typedef
and finally the base type. The base type is one of
int, ptr, array, struct, union, enum, func_proto and float types.
+2.2.19 BTF_KIND_ENUM64
+~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0 or offset to a valid C identifier
+ * ``info.kind_flag``: 0 for unsigned, 1 for signed
+ * ``info.kind``: BTF_KIND_ENUM64
+ * ``info.vlen``: number of enum values
+ * ``size``: 1/2/4/8
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum64``.::
+
+ struct btf_enum64 {
+ __u32 name_off;
+ __u32 val_lo32;
+ __u32 val_hi32;
+ };
+
+The ``btf_enum64`` encoding:
+ * ``name_off``: offset to a valid C identifier
+ * ``val_lo32``: lower 32-bit value for a 64-bit value
+ * ``val_hi32``: high 32-bit value for a 64-bit value
+
+If the original enum value is signed and the size is less than 8,
+that value will be sign extended into 8 bytes.
+
3. BTF Kernel API
=================
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 1de6a57c7e1e..9e27fbdb2206 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -127,7 +127,7 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
Byte swap instructions
----------------------
-The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
+The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit
code field of ``BPF_END``.
The byte swap instructions operate on the destination register
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 9e457156ad4d..6a1c9fca5260 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -712,22 +712,6 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
}
}
-/* ALU operation (32 bit)
- * dst = dst (op) src
- */
-static inline void emit_a32_alu_r(const s8 dst, const s8 src,
- struct jit_ctx *ctx, const bool is64,
- const bool hi, const u8 op) {
- const s8 *tmp = bpf2a32[TMP_REG_1];
- s8 rn, rd;
-
- rn = arm_bpf_get_reg32(src, tmp[1], ctx);
- rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
- /* ALU operation */
- emit_alu_r(rd, rn, is64, hi, op, ctx);
- arm_bpf_put_reg32(dst, rd, ctx);
-}
-
/* ALU operation (64 bit) */
static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
const s8 src[], struct jit_ctx *ctx,
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 2a3715bf29fe..d926e0f7ef57 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -69,6 +69,7 @@ struct rv_jit_context {
struct bpf_prog *prog;
u16 *insns; /* RV insns */
int ninsns;
+ int body_len;
int epilogue_offset;
int *offset; /* BPF to RV */
int nexentries;
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index be743d700aa7..737baf8715da 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
unsigned int prog_size = 0, extable_size = 0;
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
- int pass = 0, prev_ninsns = 0, i;
+ int pass = 0, prev_ninsns = 0, prologue_len, i;
struct rv_jit_data *jit_data;
struct rv_jit_context *ctx;
@@ -95,6 +95,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = orig_prog;
goto out_offset;
}
+ ctx->body_len = ctx->ninsns;
bpf_jit_build_prologue(ctx);
ctx->epilogue_offset = ctx->ninsns;
bpf_jit_build_epilogue(ctx);
@@ -161,6 +162,11 @@ skip_init_ctx:
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(jit_data->header);
+ prologue_len = ctx->epilogue_offset - ctx->body_len;
+ for (i = 0; i < prog->len; i++)
+ ctx->offset[i] = ninsns_rvoff(prologue_len +
+ ctx->offset[i]);
+ bpf_prog_fill_jited_linfo(prog, ctx->offset);
out_offset:
kfree(ctx->offset);
kfree(jit_data);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b914a56a2c5..0edd7d2c0064 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -5,6 +5,7 @@
#define _LINUX_BPF_H 1
#include <uapi/linux/bpf.h>
+#include <uapi/linux/filter.h>
#include <linux/workqueue.h>
#include <linux/file.h>
@@ -22,8 +23,10 @@
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
+#include <linux/stddef.h>
#include <linux/bpfptr.h>
#include <linux/btf.h>
+#include <linux/rcupdate_trace.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -398,6 +401,9 @@ enum bpf_type_flag {
/* DYNPTR points to a ringbuf record. */
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
+ /* Size is known at compile time. */
+ MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@@ -461,6 +467,8 @@ enum bpf_arg_type {
* all bytes or clear them in error case.
*/
ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
+ /* Pointer to valid memory of size known at compile time. */
+ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
@@ -526,6 +534,14 @@ struct bpf_func_proto {
u32 *arg5_btf_id;
};
u32 *arg_btf_id[5];
+ struct {
+ size_t arg1_size;
+ size_t arg2_size;
+ size_t arg3_size;
+ size_t arg4_size;
+ size_t arg5_size;
+ };
+ size_t arg_size[5];
};
int *ret_btf_id; /* return value btf_id */
bool (*allowed)(const struct bpf_prog *prog);
@@ -1084,6 +1100,40 @@ struct bpf_prog_aux {
};
};
+struct bpf_prog {
+ u16 pages; /* Number of allocated pages */
+ u16 jited:1, /* Is our filter JIT'ed? */
+ jit_requested:1,/* archs need to JIT the prog */
+ gpl_compatible:1, /* Is filter GPL compatible? */
+ cb_access:1, /* Is control block accessed? */
+ dst_needed:1, /* Do we need dst entry? */
+ blinding_requested:1, /* needs constant blinding */
+ blinded:1, /* Was blinded */
+ is_func:1, /* program is a bpf function */
+ kprobe_override:1, /* Do we override a kprobe? */
+ has_callchain_buf:1, /* callchain buffer allocated? */
+ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
+ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
+ call_get_func_ip:1, /* Do we call get_func_ip() */
+ tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
+ enum bpf_prog_type type; /* Type of BPF program */
+ enum bpf_attach_type expected_attach_type; /* For some prog types */
+ u32 len; /* Number of filter blocks */
+ u32 jited_len; /* Size of jited insns in bytes */
+ u8 tag[BPF_TAG_SIZE];
+ struct bpf_prog_stats __percpu *stats;
+ int __percpu *active;
+ unsigned int (*bpf_func)(const void *ctx,
+ const struct bpf_insn *insn);
+ struct bpf_prog_aux *aux; /* Auxiliary fields */
+ struct sock_fprog_kern *orig_prog; /* Original BPF program */
+ /* Instructions for interpreter */
+ union {
+ DECLARE_FLEX_ARRAY(struct sock_filter, insns);
+ DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi);
+ };
+};
+
struct bpf_array_aux {
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
@@ -1336,6 +1386,8 @@ extern struct bpf_empty_prog_array bpf_empty_prog_array;
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array *progs);
+/* Use when traversal over the bpf_prog_array uses tasks_trace rcu */
+void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs);
int bpf_prog_array_length(struct bpf_prog_array *progs);
bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
@@ -1427,6 +1479,55 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
return ret;
}
+/* Notes on RCU design for bpf_prog_arrays containing sleepable programs:
+ *
+ * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array
+ * overall. As a result, we must use the bpf_prog_array_free_sleepable
+ * in order to use the tasks_trace rcu grace period.
+ *
+ * When a non-sleepable program is inside the array, we take the rcu read
+ * section and disable preemption for that program alone, so it can access
+ * rcu-protected dynamically sized maps.
+ */
+static __always_inline u32
+bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+ u32 ret = 1;
+
+ might_fault();
+
+ rcu_read_lock_trace();
+ migrate_disable();
+
+ array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
+ if (unlikely(!array))
+ goto out;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ if (!prog->aux->sleepable)
+ rcu_read_lock()