diff options
Diffstat (limited to 'kernel')
34 files changed, 3908 insertions, 1428 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 02242614dcc7..1d3892168d32 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,8 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o +obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 484706959556..2058e89b5ddd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -307,8 +307,8 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key } /* Called from syscall or from eBPF program */ -static int array_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long array_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; @@ -386,7 +386,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, } /* Called from syscall or from eBPF program */ -static int array_map_delete_elem(struct bpf_map *map, void *key) +static long array_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } @@ -686,8 +686,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), }; -static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, - void *callback_ctx, u64 flags) +static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, + void *callback_ctx, u64 flags) { u32 i, key, num_elems = 0; struct bpf_array *array; @@ -721,6 +721,28 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ return num_elems; } +static u64 array_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + u32 elem_size = array->elem_size; + u64 entries = map->max_entries; + u64 usage = sizeof(*array); + + if (percpu) { + usage += entries * sizeof(void *); + usage += entries * elem_size * num_possible_cpus(); + } else { + if (map->map_flags & BPF_F_MMAPABLE) { + usage = PAGE_ALIGN(usage); + usage += PAGE_ALIGN(entries * elem_size); + } else { + usage += entries * elem_size; + } + } + return usage; +} + BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) const struct bpf_map_ops array_map_ops = { .map_meta_equal = array_map_meta_equal, @@ -742,6 +764,7 @@ const struct bpf_map_ops array_map_ops = { .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -762,6 +785,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -847,7 +871,7 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, return 0; } -static int fd_array_map_delete_elem(struct bpf_map *map, void *key) +static long fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *old_ptr; @@ -1156,6 +1180,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; @@ -1257,6 +1282,7 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; @@ -1291,6 +1317,7 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; #endif @@ -1379,5 +1406,6 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_check_btf = map_check_no_btf, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 48ee750849f2..540331b610a9 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -16,13 +16,6 @@ struct bpf_bloom_filter { struct bpf_map map; u32 bitset_mask; u32 hash_seed; - /* If the size of the values in the bloom filter is u32 aligned, - * then it is more performant to use jhash2 as the underlying hash - * function, else we use jhash. This tracks the number of u32s - * in an u32-aligned value size. If the value size is not u32 aligned, - * this will be 0. - */ - u32 aligned_u32_count; u32 nr_hash_funcs; unsigned long bitset[]; }; @@ -32,16 +25,15 @@ static u32 hash(struct bpf_bloom_filter *bloom, void *value, { u32 h; - if (bloom->aligned_u32_count) - h = jhash2(value, bloom->aligned_u32_count, - bloom->hash_seed + index); + if (likely(value_size % 4 == 0)) + h = jhash2(value, value_size / 4, bloom->hash_seed + index); else h = jhash(value, value_size, bloom->hash_seed + index); return h & bloom->bitset_mask; } -static int bloom_map_peek_elem(struct bpf_map *map, void *value) +static long bloom_map_peek_elem(struct bpf_map *map, void *value) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -56,7 +48,7 @@ static int bloom_map_peek_elem(struct bpf_map *map, void *value) return 0; } -static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) +static long bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -73,12 +65,12 @@ static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) return 0; } -static int bloom_map_pop_elem(struct bpf_map *map, void *value) +static long bloom_map_pop_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } -static int bloom_map_delete_elem(struct bpf_map *map, void *value) +static long bloom_map_delete_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } @@ -152,11 +144,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) bloom->nr_hash_funcs = nr_hash_funcs; bloom->bitset_mask = bitset_mask; - /* Check whether the value size is u32-aligned */ - if ((attr->value_size & (sizeof(u32) - 1)) == 0) - bloom->aligned_u32_count = - attr->value_size / sizeof(u32); - if (!(attr->map_flags & BPF_F_ZERO_SEED)) bloom->hash_seed = get_random_u32(); @@ -177,8 +164,8 @@ static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(-EINVAL); } -static int bloom_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long bloom_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { /* The eBPF program should use map_push_elem instead */ return -EINVAL; @@ -193,6 +180,17 @@ static int bloom_map_check_btf(const struct bpf_map *map, return btf_type_is_void(key_type) ? 0 : -EINVAL; } +static u64 bloom_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom; + u64 bitset_bytes; + + bloom = container_of(map, struct bpf_bloom_filter, map); + bitset_bytes = BITS_TO_BYTES((u64)bloom->bitset_mask + 1); + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); + return sizeof(*bloom) + bitset_bytes; +} + BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -206,5 +204,6 @@ const struct bpf_map_ops bloom_filter_map_ops = { .map_update_elem = bloom_map_update_elem, .map_delete_elem = bloom_map_delete_elem, .map_check_btf = bloom_map_check_btf, + .map_mem_usage = bloom_map_mem_usage, .map_btf_id = &bpf_bloom_map_btf_ids[0], }; diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 6cdf6d9ed91d..d44fe8dd9732 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -46,8 +46,6 @@ static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner) void bpf_cgrp_storage_free(struct cgroup *cgroup) { struct bpf_local_storage *local_storage; - bool free_cgroup_storage = false; - unsigned long flags; rcu_read_lock(); local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); @@ -57,14 +55,9 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) } bpf_cgrp_storage_lock(); - raw_spin_lock_irqsave(&local_storage->lock, flags); - free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_local_storage_destroy(local_storage); bpf_cgrp_storage_unlock(); rcu_read_unlock(); - - if (free_cgroup_storage) - kfree_rcu(local_storage, rcu); } static struct bpf_local_storage_data * @@ -100,8 +93,8 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key) return sdata ? sdata->data : NULL; } -static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct cgroup *cgroup; @@ -128,11 +121,11 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } -static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) { struct cgroup *cgroup; int err, fd; @@ -156,7 +149,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &cgroup_cache); + return bpf_local_storage_map_alloc(attr, &cgroup_cache, true); } static void cgroup_storage_map_free(struct bpf_map *map) @@ -221,6 +214,7 @@ const struct bpf_map_ops cgrp_storage_map_ops = { .map_update_elem = bpf_cgrp_storage_update_elem, .map_delete_elem = bpf_cgrp_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, + .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = cgroup_storage_ptr, }; @@ -230,7 +224,7 @@ const struct bpf_func_proto bpf_cgrp_storage_get_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_cgroup_btf_id[0], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -241,6 +235,6 @@ const struct bpf_func_proto bpf_cgrp_storage_delete_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_cgroup_btf_id[0], }; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 85720311cc67..b0ef45db207c 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -57,7 +57,6 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, void bpf_inode_storage_free(struct inode *inode) { struct bpf_local_storage *local_storage; - bool free_inode_storage = false; struct bpf_storage_blob *bsb; bsb = bpf_inode(inode); @@ -72,13 +71,8 @@ void bpf_inode_storage_free(struct inode *inode) return; } - raw_spin_lock_bh(&local_storage->lock); - free_inode_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_bh(&local_storage->lock); + bpf_local_storage_destroy(local_storage); rcu_read_unlock(); - - if (free_inode_storage) - kfree_rcu(local_storage, rcu); } static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) @@ -94,8 +88,8 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) return sdata ? sdata->data : NULL; } -static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct fd f = fdget_raw(*(int *)key); @@ -122,12 +116,12 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } -static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) { struct fd f = fdget_raw(*(int *)key); int err; @@ -197,7 +191,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &inode_cache); + return bpf_local_storage_map_alloc(attr, &inode_cache, false); } static void inode_storage_map_free(struct bpf_map *map) @@ -215,6 +209,7 @@ const struct bpf_map_ops inode_storage_map_ops = { .map_update_elem = bpf_fd_inode_storage_update_elem, .map_delete_elem = bpf_fd_inode_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, + .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = inode_storage_ptr, }; @@ -226,7 +221,7 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_inode_storage_btf_ids[0], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -237,6 +232,6 @@ const struct bpf_func_proto bpf_inode_storage_delete_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_inode_storage_btf_ids[0], }; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 5dc307bdeaeb..96856f130cbf 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +struct bpf_iter_num_kern { + int cur; /* current value, inclusive */ + int end; /* final value, exclusive */ +} __aligned(8); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) +{ + struct bpf_iter_num_kern *s = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); + + BTF_TYPE_EMIT(struct btf_iter_num); + + /* start == end is legit, it's an empty range and we'll just get NULL + * on first (and any subsequent) bpf_iter_num_next() call + */ + if (start > end) { + s->cur = s->end = 0; + return -EINVAL; + } + + /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ + if ((s64)end - (s64)start > BPF_MAX_LOOPS) { + s->cur = s->end = 0; + return -E2BIG; + } + + /* user will call bpf_iter_num_next() first, + * which will set s->cur to exactly start value; + * underflow shouldn't matter + */ + s->cur = start - 1; + s->end = end; + + return 0; +} + +__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + /* check failed initialization or if we are done (same behavior); + * need to be careful about overflow, so convert to s64 for checks, + * e.g., if s->cur == s->end == INT_MAX, we can't just do + * s->cur + 1 >= s->end + */ + if ((s64)(s->cur + 1) >= s->end) { + s->cur = s->end = 0; + return NULL; + } + + s->cur++; + + return &s->cur; +} + +__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + s->cur = s->end = 0; +} + +__diag_pop(); diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 35f4138a54dc..47d9948d768f 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -51,11 +51,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner) return map->ops->map_owner_storage_ptr(owner); } +static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->snode); +} + static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->snode); } +static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->map_node); +} + static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->map_node); @@ -70,11 +80,28 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL; - selem = bpf_map_kzalloc(&smap->map, smap->elem_size, - gfp_flags | __GFP_NOWARN); + if (smap->bpf_ma) { + migrate_disable(); + selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); + migrate_enable(); + if (selem) + /* Keep the original bpf_map_kzalloc behavior + * before started using the bpf_mem_cache_alloc. + * + * No need to use zero_map_value. The bpf_selem_free() + * only does bpf_mem_cache_free when there is + * no other bpf prog is using the selem. + */ + memset(SDATA(selem)->data, 0, smap->map.value_size); + } else { + selem = bpf_map_kzalloc(&smap->map, smap->elem_size, + gfp_flags | __GFP_NOWARN); + } + if (selem) { if (value) copy_map_value(&smap->map, SDATA(selem)->data, value); + /* No need to call check_and_init_map_value as memory is zero init */ return selem; } @@ -84,7 +111,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } -void bpf_local_storage_free_rcu(struct rcu_head *rcu) +/* rcu tasks trace callback for bpf_ma == false */ +static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; @@ -98,7 +126,66 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_rcu(struct rcu_head *rcu) +static void bpf_local_storage_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage *local_storage; + + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + bpf_mem_cache_raw_free(local_storage); +} + +static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_local_storage_free_rcu(rcu); + else + call_rcu(rcu, bpf_local_storage_free_rcu); +} + +/* Handle bpf_ma == false */ +static void __bpf_local_storage_free(struct bpf_local_storage *local_storage, + bool vanilla_rcu) +{ + if (vanilla_rcu) + kfree_rcu(local_storage, rcu); + else + call_rcu_tasks_trace(&local_storage->rcu, + __bpf_local_storage_free_trace_rcu); +} + +static void bpf_local_storage_free(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + bool bpf_ma, bool reuse_now) +{ + if (!local_storage) + return; + + if (!bpf_ma) { + __bpf_local_storage_free(local_storage, reuse_now); + return; + } + + if (!reuse_now) { + call_rcu_tasks_trace(&local_storage->rcu, + bpf_local_storage_free_trace_rcu); + return; + } + + if (smap) { + migrate_disable(); + bpf_mem_cache_free(&smap->storage_ma, local_storage); + migrate_enable(); + } else { + /* smap could be NULL if the selem that triggered + * this 'local_storage' creation had been long gone. + * In this case, directly do call_rcu(). + */ + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); + } +} + +/* rcu tasks trace callback for bpf_ma == false */ +static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -109,13 +196,63 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) kfree_rcu(selem, rcu); } +/* Handle bpf_ma == false */ +static void __bpf_selem_free(struct bpf_local_storage_elem *selem, + bool vanilla_rcu) +{ + if (vanilla_rcu) + kfree_rcu(selem, rcu); + else + call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); +} + +static void bpf_selem_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + bpf_mem_cache_raw_free(selem); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_selem_free_rcu(rcu); + else + call_rcu(rcu, bpf_selem_free_rcu); +} + +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now) +{ + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + + if (!smap->bpf_ma) { + __bpf_selem_free(selem, reuse_now); + return; + } + + if (!reuse_now) { + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + } else { + /* Instead of using the vanilla call_rcu(), + * bpf_mem_cache_free will be able to reuse selem + * immediately. + */ + migrate_disable(); + bpf_mem_cache_free(&smap->selem_ma, selem); + migrate_enable(); + } +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. */ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool use_trace_rcu) + bool uncharge_mem, bool reuse_now) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -159,40 +296,75 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); - else - kfree_rcu(selem, rcu); + bpf_selem_free(selem, smap, reuse_now); + + if (rcu_access_pointer(local_storage->smap) == smap) + RCU_INIT_POINTER(local_storage->smap, NULL); return free_local_storage; } -static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool use_trace_rcu) +static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *storage_smap, + struct bpf_local_storage_elem *selem) +{ + + struct bpf_local_storage_map *selem_smap; + + /* local_storage->smap may be NULL. If it is, get the bpf_ma + * from any selem in the local_storage->list. The bpf_ma of all + * local_storage and selem should have the same value + * for the same map type. + * + * If the local_storage->list is already empty, the caller will not + * care about the bpf_ma value also because the caller is not + * responsibile to free the local_storage. + */ + + if (storage_smap) + return storage_smap->bpf_ma; + + if (!selem) { + struct hlist_node *n; + + n = rcu_dereference_check(hlist_first_rcu(&local_storage->list), + bpf_rcu_lock_held()); + if (!n) + return false; + + selem = hlist_entry(n, struct bpf_local_storage_elem, snode); + } + selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + + return selem_smap->bpf_ma; +} + +static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, + bool reuse_now) { + struct bpf_local_storage_map *storage_smap; struct bpf_local_storage *local_storage; - bool free_local_storage = false; + bool bpf_ma, free_local_storage = false; unsigned long flags; - if (unlikely(!selem_linked_to_storage(selem))) + if (unlikely(!selem_linked_to_storage_lockless(selem))) /* selem has already been unlinked from sk */ return; local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held()); + storage_smap = rcu_dereference_check(local_storage->smap, + bpf_rcu_lock_held()); + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); + raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, use_trace_rcu); + local_storage, selem, true, reuse_now); raw_spin_unlock_irqrestore(&local_storage->lock, flags); - if (free_local_storage) { - if (use_trace_rcu) - call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_rcu); - else - kfree_rcu(local_storage, rcu); - } + if (free_local_storage) + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -202,13 +374,13 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, hlist_add_head_rcu(&selem->snode, &local_storage->list); } -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) |
