diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2023-10-20 10:12:55 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2023-10-20 14:15:13 -0700 |
| commit | cf559a416f9bc061f3b96f8afc6ceae5eec9b2b0 (patch) | |
| tree | e00e8bcbaedd9a6bc3783abe6edcb71b3c2ac382 /kernel | |
| parent | da1055b673f3baac2249571c9882ce767a0aa746 (diff) | |
| parent | d440ba91ca4d3004709876779d40713a99e21f6a (diff) | |
| download | linux-cf559a416f9bc061f3b96f8afc6ceae5eec9b2b0.tar.gz linux-cf559a416f9bc061f3b96f8afc6ceae5eec9b2b0.tar.bz2 linux-cf559a416f9bc061f3b96f8afc6ceae5eec9b2b0.zip | |
Merge branch 'bpf-fixes-for-per-cpu-kptr'
Hou Tao says:
====================
bpf: Fixes for per-cpu kptr
From: Hou Tao <houtao1@huawei.com>
Hi,
The patchset aims to fix the problems found in the review of per-cpu
kptr patch-set [0]. Patch #1 moves pcpu_lock after the invocation of
pcpu_chunk_addr_search() and it is a micro-optimization for
free_percpu(). The reason includes it in the patch is that the same
logic is used in newly-added API pcpu_alloc_size(). Patch #2 introduces
pcpu_alloc_size() for dynamic per-cpu area. Patch #2 and #3 use
pcpu_alloc_size() to check whether or not unit_size matches with the
size of underlying per-cpu area and to select a matching bpf_mem_cache.
Patch #4 fixes the freeing of per-cpu kptr when these kptrs are freed by
map destruction. The last patch adds test cases for these problems.
Please see individual patches for details. And comments are always
welcome.
Change Log:
v3:
* rebased on bpf-next
* patch 2: update API document to note that pcpu_alloc_size() doesn't
support statically allocated per-cpu area. (Dennis)
* patch 1 & 2: add Acked-by from Dennis
v2: https://lore.kernel.org/bpf/20231018113343.2446300-1-houtao@huaweicloud.com/
* add a new patch "don't acquire pcpu_lock for pcpu_chunk_addr_search()"
* patch 2: change type of bit_off and end to unsigned long (Andrew)
* patch 2: rename the new API as pcpu_alloc_size and follow 80-column convention (Dennis)
* patch 5: move the common declaration into bpf.h (Stanislav, Alxei)
v1: https://lore.kernel.org/bpf/20231007135106.3031284-1-houtao@huaweicloud.com/
[0]: https://lore.kernel.org/bpf/20230827152729.1995219-1-yonghong.song@linux.dev
====================
Link: https://lore.kernel.org/r/20231020133202.4043247-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/helpers.c | 24 | ||||
| -rw-r--r-- | kernel/bpf/memalloc.c | 38 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 6 |
3 files changed, 42 insertions, 26 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index da058aead20c..e46ac288a108 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1811,8 +1811,6 @@ bpf_base_func_proto(enum bpf_func_id func_id) } } -void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); - void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) { @@ -1844,7 +1842,7 @@ unlock: * bpf_list_head which needs to be freed. */ migrate_disable(); - __bpf_obj_drop_impl(obj, field->graph_root.value_rec); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); migrate_enable(); } } @@ -1883,7 +1881,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, migrate_disable(); - __bpf_obj_drop_impl(obj, field->graph_root.value_rec); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); migrate_enable(); } } @@ -1915,8 +1913,10 @@ __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign) } /* Must be called under migrate_disable(), as required by bpf_mem_free */ -void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) { + struct bpf_mem_alloc *ma; + if (rec && rec->refcount_off >= 0 && !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { /* Object is refcounted and refcount_dec didn't result in 0 @@ -1928,10 +1928,14 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) if (rec) bpf_obj_free_fields(rec, p); + if (percpu) + ma = &bpf_global_percpu_ma; + else + ma = &bpf_global_ma; if (rec && rec->refcount_off >= 0) - bpf_mem_free_rcu(&bpf_global_ma, p); + bpf_mem_free_rcu(ma, p); else - bpf_mem_free(&bpf_global_ma, p); + bpf_mem_free(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) @@ -1939,7 +1943,7 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; - __bpf_obj_drop_impl(p, meta ? meta->record : NULL); + __bpf_obj_drop_impl(p, meta ? meta->record : NULL, false); } __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign) @@ -1983,7 +1987,7 @@ static int __bpf_list_add(struct bpf_list_node_kern *node, */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl((void *)n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } @@ -2082,7 +2086,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl((void *)n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 39ea316c55e7..5308e386380a 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -491,21 +491,17 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) struct llist_node *first; unsigned int obj_size; - /* For per-cpu allocator, the size of free objects in free list doesn't - * match with unit_size and now there is no way to get the size of - * per-cpu pointer saved in free object, so just skip the checking. - */ - if (c->percpu_size) - return 0; - first = c->free_llist.first; if (!first) return 0; - obj_size = ksize(first); + if (c->percpu_size) + obj_size = pcpu_alloc_size(((void **)first)[1]); + else + obj_size = ksize(first); if (obj_size != c->unit_size) { - WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", - idx, obj_size, c->unit_size); + WARN_ONCE(1, "bpf_mem_cache[%u]: percpu %d, unexpected object size %u, expect %u\n", + idx, c->percpu_size, obj_size, c->unit_size); return -EINVAL; } return 0; @@ -529,6 +525,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) /* room for llist_node and per-cpu pointer */ if (percpu) percpu_size = LLIST_NODE_SZ + sizeof(void *); + ma->percpu = percpu; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); @@ -878,6 +875,17 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) return !ret ? NULL : ret + LLIST_NODE_SZ; } +static notrace int bpf_mem_free_idx(void *ptr, bool percpu) +{ + size_t size; + + if (percpu) + size = pcpu_alloc_size(*((void **)ptr)); + else + size = ksize(ptr - LLIST_NODE_SZ); + return bpf_mem_cache_idx(size); +} + void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) { int idx; @@ -885,7 +893,7 @@ void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) if (!ptr) return; - idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ)); + idx = bpf_mem_free_idx(ptr, ma->percpu); if (idx < 0) return; @@ -899,7 +907,7 @@ void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr) if (!ptr) return; - idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ)); + idx = bpf_mem_free_idx(ptr, ma->percpu); if (idx < 0) return; @@ -973,6 +981,12 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } +/* The alignment of dynamic per-cpu area is 8, so c->unit_size and the + * actual size of dynamic per-cpu area will always be matched and there is + * no need to adjust size_index for per-cpu allocation. However for the + * simplicity of the implementation, use an unified size_index for both + * kmalloc and per-cpu allocation. + */ static __init int bpf_mem_cache_adjust_size(void) { unsigned int size; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 341f8cb4405c..a9b2cb500bf7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -626,8 +626,6 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(obj + rec->timer_off); } -extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); - void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; @@ -662,8 +660,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) field->kptr.btf_id); migrate_disable(); __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? - pointee_struct_meta->record : - NULL); + pointee_struct_meta->record : NULL, + fields[i].type == BPF_KPTR_PERCPU); migrate_enable(); } else { field->kptr.dtor(xchgd_field); |
