summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c40
-rw-r--r--kernel/bpf/bpf_lsm.c8
-rw-r--r--kernel/bpf/bpf_struct_ops.c3
-rw-r--r--kernel/bpf/btf.c126
-rw-r--r--kernel/bpf/core.c100
-rw-r--r--kernel/bpf/devmap.c2
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/lpm_trie.c2
-rw-r--r--kernel/bpf/preload/iterators/Makefile10
-rw-r--r--kernel/bpf/syscall.c36
-rw-r--r--kernel/bpf/trampoline.c163
-rw-r--r--kernel/bpf/verifier.c89
13 files changed, 355 insertions, 232 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index fe40d3b9458f..d3e734bf8056 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -70,10 +70,8 @@ int array_map_alloc_check(union bpf_attr *attr)
attr->map_flags & BPF_F_PRESERVE_ELEMS)
return -EINVAL;
- if (attr->value_size > KMALLOC_MAX_SIZE)
- /* if value_size is bigger, the user space won't be able to
- * access the elements.
- */
+ /* avoid overflow on round_up(map->value_size) */
+ if (attr->value_size > INT_MAX)
return -E2BIG;
return 0;
@@ -156,6 +154,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return &array->map;
}
+static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
+{
+ return array->value + (u64)array->elem_size * index;
+}
+
/* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{
@@ -165,7 +168,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;
- return array->value + array->elem_size * (index & array->index_mask);
+ return array->value + (u64)array->elem_size * (index & array->index_mask);
}
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
@@ -203,7 +206,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
- u32 elem_size = round_up(map->value_size, 8);
+ u32 elem_size = array->elem_size;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
@@ -272,7 +275,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
- size = round_up(map->value_size, 8);
+ size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
@@ -339,7 +342,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
value, map->value_size);
} else {
val = array->value +
- array->elem_size * (index & array->index_mask);
+ (u64)array->elem_size * (index & array->index_mask);
if (map_flags & BPF_F_LOCK)
copy_map_value_locked(map, val, value, false);
else
@@ -376,7 +379,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
* returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible
*/
- size = round_up(map->value_size, 8);
+ size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
@@ -408,8 +411,7 @@ static void array_map_free_timers(struct bpf_map *map)
return;
for (i = 0; i < array->map.max_entries; i++)
- bpf_timer_cancel_and_free(array->value + array->elem_size * i +
- map->timer_off);
+ bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -420,7 +422,7 @@ static void array_map_free(struct bpf_map *map)
if (map_value_has_kptrs(map)) {
for (i = 0; i < array->map.max_entries; i++)
- bpf_map_free_kptrs(map, array->value + array->elem_size * i);
+ bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
bpf_map_free_kptr_off_tab(map);
}
@@ -556,7 +558,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
- return array->value + array->elem_size * index;
+ return array_map_elem_ptr(array, index);
}
static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -575,7 +577,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
- return array->value + array->elem_size * index;
+ return array_map_elem_ptr(array, index);
}
static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
@@ -583,6 +585,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_seq_array_map_info *info = seq->private;
struct bpf_iter__bpf_map_elem ctx = {};
struct bpf_map *map = info->map;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
@@ -603,7 +606,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
ctx.value = v;
} else {
pptr = v;
- size = round_up(map->value_size, 8);
+ size = array->elem_size;
for_each_possible_cpu(cpu) {
bpf_long_memcpy(info->percpu_value_buf + off,
per_cpu_ptr(pptr, cpu),
@@ -633,11 +636,12 @@ static int bpf_iter_init_array_map(void *priv_data,
{
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
struct bpf_map *map = aux->map;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
void *value_buf;
u32 buf_size;
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ buf_size = array->elem_size * num_possible_cpus();
value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
if (!value_buf)
return -ENOMEM;
@@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
else
- val = array->value + array->elem_size * i;
+ val = array_map_elem_ptr(array, i);
num_elems++;
key = i;
ret = callback_fn((u64)(long)map, (u64)(long)&key,
@@ -1322,7 +1326,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
- u32 elem_size = round_up(map->value_size, 8);
+ u32 elem_size = array->elem_size;
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index d469b7f3deef..fa71d58b7ded 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -63,10 +63,11 @@ BTF_ID(func, bpf_lsm_socket_post_create)
BTF_ID(func, bpf_lsm_socket_socketpair)
BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks)
+#ifdef CONFIG_CGROUP_BPF
void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
bpf_func_t *bpf_func)
{
- const struct btf_param *args;
+ const struct btf_param *args __maybe_unused;
if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
btf_id_set_contains(&bpf_lsm_current_hooks,
@@ -75,9 +76,9 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
return;
}
+#ifdef CONFIG_NET
args = btf_params(prog->aux->attach_func_proto);
-#ifdef CONFIG_NET
if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET])
*bpf_func = __cgroup_bpf_run_lsm_socket;
else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK])
@@ -86,6 +87,7 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
#endif
*bpf_func = __cgroup_bpf_run_lsm_current;
}
+#endif
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
@@ -219,6 +221,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_retval:
return prog->expected_attach_type == BPF_LSM_CGROUP ?
&bpf_get_retval_proto : NULL;
+#ifdef CONFIG_NET
case BPF_FUNC_setsockopt:
if (prog->expected_attach_type != BPF_LSM_CGROUP)
return NULL;
@@ -239,6 +242,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
prog->aux->attach_btf_id))
return &bpf_unlocked_sk_getsockopt_proto;
return NULL;
+#endif
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 7e0068c3399c..84b2d9dba79a 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -341,6 +341,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
+ /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
+ * and it must be used alone.
+ */
flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
return arch_prepare_bpf_trampoline(NULL, image, image_end,
model, flags, tlinks, NULL);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4423045b8ff3..7ac971ea98d1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -213,7 +213,7 @@ enum {
};
struct btf_kfunc_set_tab {
- struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+ struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
};
struct btf_id_dtor_kfunc_tab {
@@ -1116,7 +1116,8 @@ __printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...)
*/
#define btf_show_type_value(show, fmt, value) \
do { \
- if ((value) != 0 || (show->flags & BTF_SHOW_ZERO) || \
+ if ((value) != (__typeof__(value))0 || \
+ (show->flags & BTF_SHOW_ZERO) || \
show->state.depth == 0) { \
btf_show(show, "%s%s" fmt "%s%s", \
btf_show_indent(show), \
@@ -1615,7 +1616,7 @@ static void btf_free_id(struct btf *btf)
static void btf_free_kfunc_set_tab(struct btf *btf)
{
struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
- int hook, type;
+ int hook;
if (!tab)
return;
@@ -1624,10 +1625,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf)
*/
if (btf_is_module(btf))
goto free_tab;
- for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
- for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
- kfree(tab->sets[hook][type]);
- }
+ for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++)
+ kfree(tab->sets[hook]);
free_tab:
kfree(tab);
btf->kfunc_set_tab = NULL;
@@ -6171,13 +6170,14 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf,
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
- bool ptr_to_mem_ok)
+ bool ptr_to_mem_ok,
+ u32 kfunc_flags)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+ bool rel = false, kptr_get = false, trusted_arg = false;
struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
- bool rel = false, kptr_get = false;
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
@@ -6209,10 +6209,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (is_kfunc) {
/* Only kfunc can be release func */
- rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_RELEASE, func_id);
- kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id);
+ rel = kfunc_flags & KF_RELEASE;
+ kptr_get = kfunc_flags & KF_KPTR_GET;
+ trusted_arg = kfunc_flags & KF_TRUSTED_ARGS;
}
/* check that BTF function arguments match actual types that the
@@ -6237,10 +6236,19 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ /* Check if argument must be a referenced pointer, args + i has
+ * been verified to be a pointer (after skipping modifiers).
+ */
+ if (is_kfunc && trusted_arg && !reg->ref_obj_id) {
+ bpf_log(log, "R%d must be referenced\n", regno);
+ return -EINVAL;
+ }
+
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
- if (rel && reg->ref_obj_id)
+ /* Trusted args have the same offset checks as release arguments */
+ if (trusted_arg || (rel && reg->ref_obj_id))
arg_type |= OBJ_RELEASE;
ret = check_func_arg_reg_off(env, reg, regno, arg_type);
if (ret < 0)
@@ -6338,7 +6346,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_tname = btf_name_by_offset(reg_btf,
reg_ref_t->name_off);
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
- reg->off, btf, ref_id, rel && reg->ref_obj_id)) {
+ reg->off, btf, ref_id,
+ trusted_arg || (rel && reg->ref_obj_id))) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
func_name, i,
btf_type_str(ref_t), ref_tname,
@@ -6441,7 +6450,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
- err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, 0);
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function.
@@ -6454,9 +6463,10 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs)
+ struct bpf_reg_state *regs,
+ u32 kfunc_flags)
{
- return btf_check_func_arg_match(env, btf, func_id, regs, true);
+ return btf_check_func_arg_match(env, btf, func_id, regs, true, kfunc_flags);
}
/* Convert BTF of a function into bpf_reg_state if possible
@@ -6853,6 +6863,11 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
+static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
enum {
BTF_MODULE_F_LIVE = (1 << 0),
};
@@ -7101,16 +7116,16 @@ BTF_TRACING_TYPE_xxx
/* Kernel Function (kfunc) BTF ID set registration API */
-static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
- enum btf_kfunc_type type,
- struct btf_id_set *add_set, bool vmlinux_set)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ struct btf_id_set8 *add_set)
{
+ bool vmlinux_set = !btf_is_module(btf);
struct btf_kfunc_set_tab *tab;
- struct btf_id_set *set;
+ struct btf_id_set8 *set;
u32 set_cnt;
int ret;
- if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+ if (hook >= BTF_KFUNC_HOOK_MAX) {
ret = -EINVAL;
goto end;
}
@@ -7126,7 +7141,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
btf->kfunc_set_tab = tab;
}
- set = tab->sets[hook][type];
+ set = tab->sets[hook];
/* Warn when register_btf_kfunc_id_set is called twice for the same hook
* for module sets.
*/
@@ -7140,7 +7155,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
* pointer and return.
*/
if (!vmlinux_set) {
- tab->sets[hook][type] = add_set;
+ tab->sets[hook] = add_set;
return 0;
}
@@ -7149,7 +7164,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
* and concatenate all individual sets being registered. While each set
* is individually sorted, they may become unsorted when concatenated,
* hence re-sorting the final set again is required to make binary
- * searching the set using btf_id_set_contains function work.
+ * searching the set using btf_id_set8_contains function work.
*/
set_cnt = set ? set->cnt : 0;
@@ -7164,8 +7179,8 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
}
/* Grow set */
- set = krealloc(tab->sets[hook][type],
- offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+ set = krealloc(tab->sets[hook],
+ offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
GFP_KERNEL | __GFP_NOWARN);
if (!set) {
ret = -ENOMEM;
@@ -7173,15 +7188,15 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
}
/* For newly allocated set, initialize set->cnt to 0 */
- if (!tab->sets[hook][type])
+ if (!tab->sets[hook])
set->cnt = 0;
- tab->sets[hook][type] = set;
+ tab->sets[hook] = set;
/* Concatenate the two sets */
- memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+ memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0]));
set->cnt += add_set->cnt;
- sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+ sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
return 0;
end:
@@ -7189,38 +7204,25 @@ end:
return ret;
}
-static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
- const struct btf_kfunc_id_set *kset)
-{
- bool vmlinux_set = !btf_is_module(btf);
- int type, ret = 0;
-
- for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
- if (!kset->sets[type])
- continue;
-
- ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
- if (ret)
- break;
- }
- return ret;
-}
-
-static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
enum btf_kfunc_hook hook,
- enum btf_kfunc_type type,
u32 kfunc_btf_id)
{
- struct btf_id_set *set;
+ struct btf_id_set8 *set;
+ u32 *id;
- if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
- return false;
+ if (hook >= BTF_KFUNC_HOOK_MAX)
+ return NULL;
if (!btf->kfunc_set_tab)
- return false;
- set = btf->kfunc_set_tab->sets[hook][type];
+ return NULL;
+ set = btf->kfunc_set_tab->sets[hook];
if (!set)
- return false;
- return btf_id_set_contains(set, kfunc_btf_id);
+ return NULL;
+ id = btf_id_set8_contains(set, kfunc_btf_id);
+ if (!id)
+ return NULL;
+ /* The flags for BTF ID are located next to it */
+ return id + 1;
}
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
@@ -7248,14 +7250,14 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
* keeping the reference for the duration of the call provides the necessary
* protection for looking up a well-formed btf->kfunc_set_tab.
*/
-bool btf_kfunc_id_set_contains(const struct btf *btf,
+u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
- enum btf_kfunc_type type, u32 kfunc_btf_id)
+ u32 kfunc_btf_id)
{
enum btf_kfunc_hook hook;
hook = bpf_prog_type_to_kfunc_hook(prog_type);
- return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+ return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
}
/* This function must be invoked only from initcalls/module init functions */
@@ -7282,7 +7284,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
return PTR_ERR(btf);
hook = bpf_prog_type_to_kfunc_hook(prog_type);
- ret = btf_populate_kfunc_set(btf, hook, kset);
+ ret = btf_populate_kfunc_set(btf, hook, kset->set);
btf_put(btf);
return ret;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bfeb9b937315..c1e10d088dbb 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -652,12 +652,6 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
return fp->jited && !bpf_prog_was_classic(fp);
}
-static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
-{
- return list_empty(&fp->aux->ksym.lnode) ||
- fp->aux->ksym.lnode.prev == LIST_POISON2;
-}
-
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
if (!bpf_prog_kallsyms_candidate(fp) ||
@@ -833,15 +827,6 @@ struct bpf_prog_pack {
#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
-static size_t bpf_prog_pack_size = -1;
-static size_t bpf_prog_pack_mask = -1;
-
-static int bpf_prog_chunk_count(void)
-{
- WARN_ON_ONCE(bpf_prog_pack_size == -1);
- return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE;
-}
-
static DEFINE_MUTEX(pack_mutex);
static LIST_HEAD(pack_list);
@@ -849,55 +834,33 @@ static LIST_HEAD(pack_list);
* CONFIG_MMU=n. Use PAGE_SIZE in these cases.
*/
#ifdef PMD_SIZE
-#define BPF_HPAGE_SIZE PMD_SIZE
-#define BPF_HPAGE_MASK PMD_MASK
+#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes())
#else
-#define BPF_HPAGE_SIZE PAGE_SIZE
-#define BPF_HPAGE_MASK PAGE_MASK
+#define BPF_PROG_PACK_SIZE PAGE_SIZE
#endif
-static size_t select_bpf_prog_pack_size(void)
-{
- size_t size;
- void *ptr;
-
- size = BPF_HPAGE_SIZE * num_online_nodes();
- ptr = module_alloc(size);
-
- /* Test whether we can get huge pages. If not just use PAGE_SIZE
- * packs.
- */
- if (!ptr || !is_vm_area_hugepages(ptr)) {
- size = PAGE_SIZE;
- bpf_prog_pack_mask = PAGE_MASK;
- } else {
- bpf_prog_pack_mask = BPF_HPAGE_MASK;
- }
-
- vfree(ptr);
- return size;
-}
+#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_prog_pack *pack;
- pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())),
+ pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)),
GFP_KERNEL);
if (!pack)
return NULL;
- pack->ptr = module_alloc(bpf_prog_pack_size);
+ pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
if (!pack->ptr) {
kfree(pack);
return NULL;
}
- bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size);
- bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE);
+ bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE);
+ bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr);
- set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
- set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+ set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
return pack;
}
@@ -909,10 +872,7 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn
void *ptr = NULL;
mutex_lock(&pack_mutex);
- if (bpf_prog_pack_size == -1)
- bpf_prog_pack_size = select_bpf_prog_pack_size();
-
- if (size > bpf_prog_pack_size) {
+ if (size > BPF_PROG_PACK_SIZE) {
size = round_up(size, PAGE_SIZE);
ptr = module_alloc(size);
if (ptr) {
@@ -924,9 +884,9 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn
goto out;
}
list_for_each_entry(pack, &pack_list, list) {
- pos = bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
+ pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
nbits, 0);
- if (pos < bpf_prog_chunk_count())
+ if (pos < BPF_PROG_CHUNK_COUNT)
goto found_free_area;
}
@@ -950,18 +910,15 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
struct bpf_prog_pack *pack = NULL, *tmp;
unsigned int nbits;
unsigned long pos;
- void *pack_ptr;
mutex_lock(&pack_mutex);
- if (hdr->size > bpf_prog_pack_size) {
+ if (hdr->size > BPF_PROG_PACK_SIZE) {
module_memfree(hdr);
goto out;
}
- pack_ptr = (void *)((unsigned long)hdr & bpf_prog_pack_mask);
-
list_for_each_entry(tmp, &pack_list, list) {
- if (tmp->ptr == pack_ptr) {
+ if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) {
pack = tmp;
break;
}
@@ -971,14 +928,14 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
goto out;
nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
- pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
+ pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
bitmap_clear(pack->bitmap, pos, nbits);
- if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
- bpf_prog_chunk_count(), 0) == 0) {
+ if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+ BPF_PROG_CHUNK_COUNT, 0) == 0) {
list_del(&pack->list);
module_memfree(pack->ptr);
kfree(pack);
@@ -1155,7 +1112,6 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
bpf_prog_pack_free(ro_header);
return PTR_ERR(ptr);
}
- prog->aux->use_bpf_prog_pack = true;
return 0;
}
@@ -1179,17 +1135,23 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
bpf_jit_uncharge_modmem(size);
}
+struct bpf_binary_header *
+bpf_jit_binary_pack_hdr(const struct bpf_prog *fp)
+{
+ unsigned long real_start = (unsigned long)fp->bpf_func;
+ unsigned long addr;
+
+ addr = real_start & BPF_PROG_CHUNK_MASK;
+ return (void *)addr;
+}
+
static inline struct bpf_binary_header *
bpf_jit_binary_hdr(const struct bpf_prog *fp)
{
unsigned long real_start = (unsigned long)fp->bpf_func;
unsigned long addr;
- if (fp->aux->use_bpf_prog_pack)
- addr = real_start & BPF_PROG_CHUNK_MASK;
- else
- addr = real_start & PAGE_MASK;
-
+ addr = real_start & PAGE_MASK;
return (void *)addr;
}
@@ -1202,11 +1164,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
if (fp->jited) {
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
- if (fp->aux->use_bpf_prog_pack)
- bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
- else
- bpf_jit_binary_free(hdr);
-
+ bpf_jit_binary_free(hdr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index c2867068e5bd..1400561efb15 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -845,7 +845,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab_netdev *dev;
dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
- GFP_ATOMIC | __GFP_NOWARN,
+ GFP_NOWAIT | __GFP_NOWARN,
dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 17fb69c0e0dc..da7578426a46 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -61,7 +61,7 @@
*
* As regular device interrupt handlers and soft interrupts are forced into
* thread context, the existing code which does
- * spin_lock*(); alloc(GPF_ATOMIC); spin_unlock*();
+ * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*();
* just works.
*
* In theory the BPF locks could be converted to regular spinlocks as well,
@@ -978,7 +978,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
goto dec_count;
}
l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
- GFP_ATOMIC | __GFP_NOWARN,
+ GFP_NOWAIT | __GFP_NOWARN,
htab->map.numa_node);
if (!l_new) {
l_new = ERR_PTR(-ENOMEM);
@@ -996,7 +996,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
} else {
/* alloc_percpu zero-fills */
pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
- GFP_ATOMIC | __GFP_NOWARN);
+ GFP_NOWAIT | __GFP_NOWARN);
if (!pptr) {
kfree(l_new);
l_new = ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 8654fc97f5fe..49ef0ce040c7 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -165,7 +165,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
}
new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size),
- __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
+ __GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN,
map->numa_node);
if (!new)
return -ENOMEM;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index f0d05a3cc4b9..d789e3b831ad 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -285,7 +285,7 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
if (value)
size += trie->map.value_size;
- node = bpf_map_kmalloc_node(&trie->map, size, GFP_ATOMIC | __GFP_NOWARN,
+ node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN,
trie->map.numa_node);
if (!node)
return NULL;
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index bfe24f8c5a20..6762b1260f2f 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -9,7 +9,7 @@ LLVM_STRIP ?= llvm-strip
TOOLS_PATH := $(abspath ../../../../tools)
BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool
BPFTOOL_OUTPUT := $(abs_out)/bpftool
-DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf
@@ -61,9 +61,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU
OUTPUT=$(abspath $(dir $@))/ prefix= \
DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers
-$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \
- OUTPUT=$(BPFTOOL_OUTPUT)/ \
- LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \
- LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \
- prefix= DESTDIR=$(abs_out)/ install-bin
+$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ab688d85b2c6..83c7136c5788 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -419,35 +419,53 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
#ifdef CONFIG_MEMCG_KMEM
static void bpf_map_save_memcg(struct bpf_map *map)
{
- map->memcg = get_mem_cgroup_from_mm(current->mm);
+ /* Currently if a map is created by a process belonging to the root
+ * memory cgroup, get_obj_cgroup_from_current() will return NULL.
+ * So we have to check map->objcg for being NULL each time it's
+ * being used.
+ */
+ map->objcg = get_obj_cgroup_from_current();
}
static void bpf_map_release_memcg(struct bpf_map *map)
{
- mem_cgroup_put(map->memcg);
+ if (map->objcg)
+ obj_cgroup_put(map->objcg);
+}
+
+static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
+{
+ if (map->objcg)
+ return get_mem_cgroup_from_objcg(map->objcg);
+
+ return root_mem_cgroup;
}
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node)
{
- struct mem_cgroup *old_memcg;
+ struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- old_memcg = set_active_memcg(map->memcg);
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
return ptr;
}
void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
{
- struct mem_cgroup *old_memcg;
+ struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- old_memcg = set_active_memcg(map->memcg);
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
ptr = kzalloc(size, flags | __GFP_ACCOUNT);
set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
return ptr;
}
@@ -455,12 +473,14 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags)
{
- struct mem_cgroup *old_memcg;
+ struct mem_cgroup *memcg, *old_memcg;
void __percpu *ptr;
- old_memcg = set_active_memcg(map->memcg);
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);