summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2021-11-30 10:56:28 -0800
committerAlexei Starovoitov <ast@kernel.org>2021-11-30 10:56:28 -0800
commitb98057ef730ad91b35d96658d9d8a306b618b8e5 (patch)
tree348a28f660d90113801e47f6685a6ee2afcfa8cf /kernel
parent88691e9e1ef59fa917b2bc2df47d550e7635e73c (diff)
parentec151037af4f56065d5b258af82f13dbbf279ebd (diff)
downloadlinux-b98057ef730ad91b35d96658d9d8a306b618b8e5.tar.gz
linux-b98057ef730ad91b35d96658d9d8a306b618b8e5.tar.bz2
linux-b98057ef730ad91b35d96658d9d8a306b618b8e5.zip
Merge branch 'Add bpf_loop helper'
Joanne Koong says: ==================== This patchset add a new helper, bpf_loop. One of the complexities of using for loops in bpf programs is that the verifier needs to ensure that in every possibility of the loop logic, the loop will always terminate. As such, there is a limit on how many iterations the loop can do. The bpf_loop helper moves the loop logic into the kernel and can thereby guarantee that the loop will always terminate. The bpf_loop helper simplifies a lot of the complexity the verifier needs to check, as well as removes the constraint on the number of loops able to be run. From the test results, we see that using bpf_loop in place of the traditional for loop led to a decrease in verification time and number of bpf instructions by ~99%. The benchmark results show that as the number of iterations increases, the overhead per iteration decreases. The high-level overview of the patches - Patch 1 - kernel-side + API changes for adding bpf_loop Patch 2 - tests Patch 3 - use bpf_loop in strobemeta + pyperf600 and measure verifier performance Patch 4 - benchmark for throughput + latency of bpf_loop call v3 -> v4: ~ Address nits: use usleep for triggering bpf programs, fix copyright style v2 -> v3: ~ Rerun benchmarks on physical machine, update results ~ Propagate original error codes in the verifier v1 -> v2: ~ Change helper name to bpf_loop (instead of bpf_for_each) ~ Set max nr_loops (~8 million loops) for bpf_loop call ~ Split tests + strobemeta/pyperf600 changes into two patches ~ Add new ops_report_final helper for outputting throughput and latency ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_iter.c35
-rw-r--r--kernel/bpf/helpers.c2
-rw-r--r--kernel/bpf/verifier.c88
3 files changed, 91 insertions, 34 deletions
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b2ee45064e06..b7aef5b3416d 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
+
+/* maximum number of loops */
+#define MAX_LOOPS BIT(23)
+
+BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
+ u64, flags)
+{
+ bpf_callback_t callback = (bpf_callback_t)callback_fn;
+ u64 ret;
+ u32 i;
+
+ if (flags)
+ return -EINVAL;
+ if (nr_loops > MAX_LOOPS)
+ return -E2BIG;
+
+ for (i = 0; i < nr_loops; i++) {
+ ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0);
+ /* return value: 0 - continue, 1 - stop and return */
+ if (ret)
+ return i + 1;
+ }
+
+ return i;
+}
+
+const struct bpf_func_proto bpf_loop_proto = {
+ .func = bpf_loop,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_PTR_TO_FUNC,
+ .arg3_type = ARG_PTR_TO_STACK_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ffd469c217f..52188004a9c3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
+ case BPF_FUNC_loop:
+ return &bpf_loop_proto;
default:
break;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0763cca139a7..d7678d8a925c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_loop_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
+ * u64 flags);
+ * callback_fn(u32 index, void *callback_ctx);
+ */
+ callee->regs[BPF_REG_1].type = SCALAR_VALUE;
+ callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
+
+ /* unused */
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+
+ callee->in_callback_fn = true;
+ return 0;
+}
+
static int set_timer_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
@@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
- if (func_id == BPF_FUNC_tail_call) {
- err = check_reference_leak(env);
- if (err) {
- verbose(env, "tail_call would lead to reference leak\n");
- return err;
- }
- } else if (is_release_function(func_id)) {
+ if (is_release_function(func_id)) {
err = release_reference(env, meta.ref_obj_id);
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
@@ -6475,42 +6490,47 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env);
- /* check that flags argument in get_local_storage(map, flags) is 0,
- * this is required because get_local_storage() can't return an error.
- */
- if (func_id == BPF_FUNC_get_local_storage &&
- !register_is_null(&regs[BPF_REG_2])) {
- verbose(env, "get_local_storage() doesn't support non-zero flags\n");
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_for_each_map_elem) {
+ switch (func_id) {
+ case BPF_FUNC_tail_call:
+ err = check_reference_leak(env);
+ if (err) {
+ verbose(env, "tail_call would lead to reference leak\n");
+ return err;
+ }
+ break;
+ case BPF_FUNC_get_local_storage:
+ /* check that flags argument in get_local_storage(map, flags) is 0,
+ * this is required because get_local_storage() can't return an error.
+ */
+ if (!register_is_null(&regs[BPF_REG_2])) {
+ verbose(env, "get_local_storage() doesn't support non-zero flags\n");
+ return -EINVAL;
+ }
+ break;
+ case BPF_FUNC_for_each_map_elem:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_map_elem_callback_state);
- if (err < 0)
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_timer_set_callback) {
+ break;
+ case BPF_FUNC_timer_set_callback:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_timer_callback_state);
- if (err < 0)
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_find_vma) {
+ break;
+ case BPF_FUNC_find_vma:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_find_vma_callback_state);
- if (err < 0)
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_snprintf) {
+ break;
+ case BPF_FUNC_snprintf:
err = check_bpf_snprintf_call(env, regs);
- if (err < 0)
- return err;
+ break;
+ case BPF_FUNC_loop:
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_loop_callback_state);
+ break;
}
+ if (err)
+ return err;
+
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);