diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/bloom_filter.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/bpf_local_storage.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/btf.c | 19 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 8 | ||||
| -rw-r--r-- | kernel/bpf/cpumap.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/cpumask.c | 38 | ||||
| -rw-r--r-- | kernel/bpf/devmap.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 6 | ||||
| -rw-r--r-- | kernel/bpf/helpers.c | 12 | ||||
| -rw-r--r-- | kernel/bpf/lpm_trie.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/memalloc.c | 31 | ||||
| -rw-r--r-- | kernel/bpf/preload/bpf_preload_kern.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/queue_stack_maps.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/reuseport_array.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/stackmap.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 184 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 248 |
18 files changed, 395 insertions, 184 deletions
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 540331b610a9..addf3dd57b59 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -86,9 +86,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct bpf_bloom_filter *bloom; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 47d9948d768f..b5149cfce7d4 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -723,9 +723,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) !attr->btf_key_type_id || !attr->btf_value_type_id) return -EINVAL; - if (!bpf_capable()) - return -EPERM; - if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) return -E2BIG; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d3f0a4825fa6..116a0ce378ec 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -655,9 +655,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) const struct btf_type *t, *vt; struct bpf_map *map; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); if (!st_ops) return ERR_PTR(-ENOTSUPP); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bbcae434fda5..29fe21099298 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -492,25 +492,26 @@ static bool btf_type_is_fwd(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; } -static bool btf_type_nosize(const struct btf_type *t) +static bool btf_type_is_datasec(const struct btf_type *t) { - return btf_type_is_void(t) || btf_type_is_fwd(t) || - btf_type_is_func(t) || btf_type_is_func_proto(t); + return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_nosize_or_null(const struct btf_type *t) +static bool btf_type_is_decl_tag(const struct btf_type *t) { - return !t || btf_type_nosize(t); + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } -static bool btf_type_is_datasec(const struct btf_type *t) +static bool btf_type_nosize(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; + return btf_type_is_void(t) || btf_type_is_fwd(t) || + btf_type_is_func(t) || btf_type_is_func_proto(t) || + btf_type_is_decl_tag(t); } -static bool btf_type_is_decl_tag(const struct btf_type *t) +static bool btf_type_nosize_or_null(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; + return !t || btf_type_nosize(t); } static bool btf_type_is_decl_tag_target(const struct btf_type *t) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7421487422d4..dc85240a0134 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2064,14 +2064,16 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), -static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, - const struct bpf_insn *insn) = { +static __maybe_unused +u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, + const struct bpf_insn *insn) = { EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST +#ifdef CONFIG_BPF_SYSCALL void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) { stack_depth = max_t(u32, stack_depth, 1); @@ -2080,7 +2082,7 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) __bpf_call_base_args; insn->code = BPF_JMP | BPF_CALL_ARGS; } - +#endif #else static unsigned int __bpf_prog_ret0_warn(const void *ctx, const struct bpf_insn *insn) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8ec18faa74ac..8a33e8747a0e 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -28,7 +28,6 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/capability.h> #include <trace/events/xdp.h> #include <linux/btf_ids.h> @@ -89,9 +88,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) u32 value_size = attr->value_size; struct bpf_cpu_map *cmap; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || (value_size != offsetofend(struct bpf_cpumap_val, qsize) && diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 7efdf5d770ca..938a60ff4295 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -132,6 +132,21 @@ __bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) } /** + * bpf_cpumask_first_and() - Return the index of the first nonzero bit from the + * AND of two cpumasks. + * @src1: The first cpumask. + * @src2: The second cpumask. + * + * Find the index of the first nonzero bit of the AND of two cpumasks. + * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. + */ +__bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1, + const struct cpumask *src2) +{ + return cpumask_first_and(src1, src2); +} + +/** * bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask. * @cpu: The CPU to be set in the cpumask. * @cpumask: The BPF cpumask in which a bit is being set. @@ -367,7 +382,7 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask } /** - * bpf_cpumask_any() - Return a random set CPU from a cpumask. + * bpf_cpumask_any_distribute() - Return a random set CPU from a cpumask. * @cpumask: The cpumask being queried. * * Return: @@ -376,26 +391,28 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask * * A struct bpf_cpumask pointer may be safely passed to @src. */ -__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask) +__bpf_kfunc u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) { - return cpumask_any(cpumask); + return cpumask_any_distribute(cpumask); } /** - * bpf_cpumask_any_and() - Return a random set CPU from the AND of two - * cpumasks. + * bpf_cpumask_any_and_distribute() - Return a random set CPU from the AND of + * two cpumasks. * @src1: The first cpumask. * @src2: The second cpumask. * * Return: - * * A random set bit within [0, num_cpus) if at least one bit is set. + * * A random set bit within [0, num_cpus) from the AND of two cpumasks, if at + * least one bit is set. * * >= num_cpus if no bit is set. * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, + const struct cpumask *src2) { - return cpumask_any_and(src1, src2); + return cpumask_any_and_distribute(src1, src2); } __diag_pop(); @@ -406,6 +423,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_first_and, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU) @@ -422,8 +440,8 @@ BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) -BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU) -BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_SET8_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 802692fa3905..49cc0b5671c6 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -160,9 +160,6 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) struct bpf_dtab *dtab; int err; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE); if (!dtab) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 9901efee4339..56d3da7d0bc6 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -422,12 +422,6 @@ static int htab_map_alloc_check(union bpf_attr *attr) BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != offsetof(struct htab_elem, hash_node.pprev)); - if (lru && !bpf_capable()) - /* LRU implementation is much complicated than other - * maps. Hence, limit to CAP_BPF. - */ - return -EPERM; - if (zero_seed && !capable(CAP_SYS_ADMIN)) /* Guard against local DoS, and discourage production use. */ return -EPERM; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 4ef4c4f8a355..9e80efa59a5d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1933,8 +1933,12 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta * bpf_refcount type so that it is emitted in vmlinux BTF */ ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); + if (!refcount_inc_not_zero((refcount_t *)ref)) + return NULL; - refcount_inc((refcount_t *)ref); + /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null + * in verifier.c + */ return (void *)p__refcounted_kptr; } @@ -1950,7 +1954,7 @@ static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head INIT_LIST_HEAD(h); if (!list_empty(n)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl(n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec); return -EINVAL; } @@ -2032,7 +2036,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, if (!RB_EMPTY_NODE(n)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl(n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec); return -EINVAL; } @@ -2406,7 +2410,7 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e0d3ddf2037a..17c7e7782a1f 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -544,9 +544,6 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || !(attr->map_flags & BPF_F_NO_PREALLOC) || diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 410637c225fb..0668bcd7c926 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -211,9 +211,9 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node) mem_cgroup_put(memcg); } -static void free_one(struct bpf_mem_cache *c, void *obj) +static void free_one(void *obj, bool percpu) { - if (c->percpu_size) { + if (percpu) { free_percpu(((void **)obj)[1]); kfree(obj); return; @@ -222,14 +222,19 @@ static void free_one(struct bpf_mem_cache *c, void *obj) kfree(obj); } -static void __free_rcu(struct rcu_head *head) +static void free_all(struct llist_node *llnode, bool percpu) { - struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); - struct llist_node *llnode = llist_del_all(&c->waiting_for_gp); struct llist_node *pos, *t; llist_for_each_safe(pos, t, llnode) - free_one(c, pos); + free_one(pos, percpu); +} + +static void __free_rcu(struct rcu_head *head) +{ + struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); + + free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } @@ -432,7 +437,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) static void drain_mem_cache(struct bpf_mem_cache *c) { - struct llist_node *llnode, *t; + bool percpu = !!c->percpu_size; /* No progs are using this bpf_mem_cache, but htab_map_free() called * bpf_mem_cache_free() for all remaining elements and they can be in @@ -441,14 +446,10 @@ static void drain_mem_cache(struct bpf_mem_cache *c) * Except for waiting_for_gp list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu)) - free_one(c, llnode); - llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp)) - free_one(c, llnode); - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist)) - free_one(c, llnode); - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra)) - free_one(c, llnode); + free_all(__llist_del_all(&c->free_by_rcu), percpu); + free_all(llist_del_all(&c->waiting_for_gp), percpu); + free_all(__llist_del_all(&c->free_llist), percpu); + free_all(__llist_del_all(&c->free_llist_extra), percpu); } static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index b56f9f3314fd..0c63bc2cd895 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -23,9 +23,9 @@ static void free_links_and_skel(void) static int preload(struct bpf_preload_info *obj) { - strlcpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); + strscpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); obj[0].link = maps_link; - strlcpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); + strscpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); obj[1].link = progs_link; return 0; } diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 601609164ef3..8d2ddcb7566b 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -7,7 +7,6 @@ #include <linux/bpf.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/capability.h> #include <linux/btf_ids.h> #include "percpu_freelist.h" @@ -46,9 +45,6 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) /* Called from syscall */ static int queue_stack_map_alloc_check(union bpf_attr *attr) { - if (!bpf_capable()) - return -EPERM; - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 0 || attr->value_size == 0 || diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index cbf2d8d784b8..4b4f9670f1a9 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -151,9 +151,6 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); if (!array) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index b25fce425b2c..458bb80b14d5 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -74,9 +74,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) u64 cost, n_buckets; int err; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4497b193dd20..a2aef900519c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = { .map_mem_usage = bpf_map_offload_map_mem_usage, }; -static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) -{ - const struct bpf_map_ops *ops; - u32 type = attr->map_type; - struct bpf_map *map; - int err; - - if (type >= ARRAY_SIZE(bpf_map_types)) - return ERR_PTR(-EINVAL); - type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); - ops = bpf_map_types[type]; - if (!ops) - return ERR_PTR(-EINVAL); - - if (ops->map_alloc_check) { - err = ops->map_alloc_check(attr); - if (err) - return ERR_PTR(err); - } - if (attr->map_ifindex) - ops = &bpf_map_offload_ops; - if (!ops->map_mem_usage) - return ERR_PTR(-EINVAL); - map = ops->map_alloc(attr); - if (IS_ERR(map)) - return map; - map->ops = ops; - map->map_type = type; - return map; -} - static void bpf_map_write_active_inc(struct bpf_map *map) { atomic64_inc(&map->writecnt); @@ -1127,7 +1096,9 @@ free_map_tab: /* called via syscall */ static int map_create(union bpf_attr *attr) { + const struct bpf_map_ops *ops; int numa_node = bpf_map_attr_numa_node(attr); + u32 map_type = attr->map_type; struct bpf_map *map; int f_flags; int err; @@ -1158,9 +1129,85 @@ static int map_create(union bpf_attr *attr) return -EINVAL; /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ - map = find_and_alloc_map(attr); + map_type = attr->map_type; + if (map_type >= ARRAY_SIZE(bpf_map_types)) + return -EINVAL; + map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); + ops = bpf_map_types[map_type]; + if (!ops) + return -EINVAL; + + if (ops->map_alloc_check) { + err = ops->map_alloc_check(attr); + if (err) + return err; + } + if (attr->map_ifindex) + ops = &bpf_map_offload_ops; + if (!ops->map_mem_usage) + return -EINVAL; + + /* Intent here is for unprivileged_bpf_disabled to block BPF map + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; + + /* check privileged map type permissions */ + switch (map_type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + /* unprivileged */ + break; + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: + case BPF_MAP_TYPE_BLOOM_FILTER: + case BPF_MAP_TYPE_LPM_TRIE: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_STRUCT_OPS: + case BPF_MAP_TYPE_CPUMAP: + if (!bpf_capable()) + return -EPERM; + break; + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_XSKMAP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + WARN(1, "unsupported map type %d", map_type); + return -EPERM; + } + + map = ops->map_alloc(attr); if (IS_ERR(map)) return PTR_ERR(map); + map->ops = ops; + map->map_type = map_type; err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); @@ -2507,7 +2554,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) struct btf *attach_btf = NULL; int err; char license[128]; - bool is_gpl; if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; @@ -2526,15 +2572,15 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) !bpf_capable()) return -EPERM; - /* copy eBPF program license from user space */ - if (strncpy_from_bpfptr(license, - make_bpfptr(attr->license, uattr.is_kernel), - sizeof(license) - 1) < 0) - return -EFAULT; - license[sizeof(license) - 1] = 0; - - /* eBPF programs must be GPL compatible to use GPL-ed functions */ - is_gpl = license_is_gpl_compatible(license); + /* Intent here is for unprivileged_bpf_disabled to block BPF program + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out for these + * and other operations. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) @@ -2618,12 +2664,20 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) make_bpfptr(attr->insns, uattr.is_kernel), bpf_prog_insn_size(prog)) != 0) goto free_prog_sec; + /* copy eBPF program license from user space */ + if (strncpy_from_bpfptr(license, + make_bpfptr(attr->license, uattr.is_kernel), + sizeof(license) - 1) < 0) + goto free_prog_sec; + license[sizeof(license) - 1] = 0; + + /* eBPF programs must be GPL compatible to use GPL-ed functions */ + prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; prog->orig_prog = NULL; prog->jited = 0; atomic64_set(&prog->aux->refcnt, 1); - prog->gpl_compatible = is_gpl ? 1 : 0; if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_dev_bound_init(prog, attr); @@ -2797,28 +2851,31 @@ static void bpf_link_put_deferred(struct work_struct *work) bpf_link_free(link); } -/* bpf_link_put can be called from atomic context, but ensures that resources - * are freed from process context +/* bpf_link_put might be called from atomic context. It needs to be called + * from sleepable context in order to acquire sleeping locks during the process. */ void bpf_link_put(struct bpf_link *link) { if (!atomic64_dec_and_test(&link->refcnt)) return; - if (in_atomic()) { - INIT_WORK(&link->work, bpf_link_put_deferred); - schedule_work(&link->work); - } else { - bpf_link_free(link); - } + INIT_WORK(&link->work, bpf_link_put_deferred); + schedule_work(&link->work); } EXPORT_SYMBOL(bpf_link_put); +static void bpf_link_put_direct(struct bpf_link *link) +{ + if (!atomic64_dec_and_test(&link->refcnt)) + return; + bpf_link_free(link); +} + static int bpf_link_release(struct inode *inode, struct file *filp) { struct bpf_link *link = filp->private_data; - bpf_link_put(link); + bpf_link_put_direct(link); return 0; } @@ -4801,7 +4858,7 @@ out_put_progs: if (ret) bpf_prog_put(new_prog); out_put_link: - bpf_link_put(link); + bpf_link_put_direct(link); return ret; } @@ -4824,7 +4881,7 @@ static int link_detach(union bpf_attr *attr) else ret = -EOPNOTSUPP; - bpf_link_put(link); + bpf_link_put_direct(link); return ret; } @@ -4894,7 +4951,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) fd = bpf_link_new_fd(link); if (fd < 0) - bpf_link_put(link); + bpf_link_put_direct(link); return fd; } @@ -4971,7 +5028,7 @@ static int bpf_iter_create(union bpf_attr *attr) return PTR_ERR(link); err = bpf_iter_new_fd(link); - bpf_link_put(link); + bpf_link_put_direct(link); return err; } @@ -5041,23 +5098,8 @@ out_prog_put: static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; - bool capable; int err; - capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; - - /* Intent here is for unprivileged_bpf_disabled to block key object - * creation commands for unprivileged users; other actions depend - * of fd availability and access to bpffs, so are dependent on - * object creation success. Capabilities are later verified for - * operations such as load and map create, so even with unprivileged - * BPF disabled, capability checks are still carried out for these - * and other operations. - */ - if (!capable && - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) - return -EPERM; - err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); if (err) return err; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b54193de762b..11e54dd8b6dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -197,6 +197,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); static void specialize_kfunc(struct bpf_verifier_env *env, u32 func_id, u16 offset, unsigned long *addr); +static bool is_trusted_reg(const struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { @@ -298,16 +299,19 @@ struct bpf_kfunc_call_arg_meta { bool found; } arg_constant; - /* arg_btf and arg_btf_id are used by kfunc-specific handling, + /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, * generally to pass info about user-defined local kptr types to later * verification logic * bpf_obj_drop * Record the local kptr type to be drop'd * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) - * Record the local kptr type to be refcount_incr'd + * Record the local kptr type to be refcount_incr'd and use + * arg_owning_ref to determine whether refcount_acquire should be + * fallible */ struct btf *arg_btf; u32 arg_btf_id; + bool arg_owning_ref; struct { struct btf_field *field; @@ -439,8 +443,11 @@ static bool type_may_be_null(u32 type) return type & PTR_MAYBE_NULL; } -static bool reg_type_not_null(enum bpf_reg_type type) +static bool reg_not_null(const struct bpf_reg_state *reg) { + enum bpf_reg_type type; + + type = reg->type; if (type_may_be_null(type)) return false; @@ -450,6 +457,7 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_MAP_VALUE || type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON || + (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || type == PTR_TO_MEM; } @@ -3771,6 +3779,96 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ } } +static bool idset_contains(struct bpf_idset *s, u32 id) +{ + u32 i; + + for (i = 0; i < s->count; ++i) + if (s->ids[i] == id) + return true; + + return false; +} + +static int idset_push(struct bpf_idset *s, u32 id) +{ + if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) + return -EFAULT; + s->ids[s->count++] = id; + return 0; +} + +static void idset_reset(struct bpf_idset *s) +{ + s->count = 0; +} + +/* Collect a set of IDs for all registers currently marked as precise in env->bt. + * Mark all registers with these IDs as precise. + */ +static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + struct bpf_idset *precise_ids = &env->idset_scratch; + struct backtrack_state *bt = &env->bt; + struct bpf_func_state *func; + struct bpf_reg_state *reg; + DECLARE_BITMAP(mask, 64); + int i, fr; + + idset_reset(precise_ids); + + for (fr = bt->frame; fr >= 0; fr--) { + func = st->frame[fr]; + + bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); + for_each_set_bit(i, mask, 32) { + reg = &func->regs[i]; + if (!reg->id || reg->type != SCALAR_VALUE) + continue; + if (idset_push(precise_ids, reg->id)) + return -EFAULT; + } + + bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); + for_each_set_bit(i, mask, 64) { + if (i >= func->allocated_stack / BPF_REG_SIZE) + break; + if (!is_spilled_scalar_reg(&func->stack[i])) + continue; + reg = &func->stack[i].spilled_ptr; + if (!reg->id) + continue; + if (idset_push(precise_ids, reg->id)) + return -EFAULT; + } + } + + for (fr = 0; fr <= st->curframe; ++fr) { + func = st->frame[fr]; + + for (i = BPF_REG_0; i < BPF_REG_10; ++i) { + reg = &func->regs[i]; + if (!reg->id) + continue; + if (!idset_contains(precise_ids, reg->id)) + continue; + bt_set_frame_reg(bt, fr, i); + } |
