diff options
Diffstat (limited to 'kernel')
71 files changed, 2153 insertions, 999 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 121d37e700a6..4cebadb5f30d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -718,7 +718,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, { int rc = 0; struct sk_buff *skb; - static unsigned int failed = 0; + unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ @@ -735,32 +735,30 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, continue; } +retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { - /* fatal failure for our queue flush attempt? */ + /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { - /* yes - error processing for the queue */ sk = NULL; if (err_hook) (*err_hook)(skb); - if (!skb_hook) - goto out; - /* keep processing with the skb_hook */ + if (rc == -EAGAIN) + rc = 0; + /* continue to drain the queue */ continue; } else - /* no - requeue to preserve ordering */ - skb_queue_head(queue, skb); + goto retry; } else { - /* it worked - drop the extra reference and continue */ + /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } -out: return (rc >= 0 ? 0 : rc); } @@ -1609,7 +1607,8 @@ static int __net_init audit_net_init(struct net *net) audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + /* limit the timeout in case auditd is blocked/stopped */ + aunet->sk->sk_sndtimeo = HZ / 10; return 0; } diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index cf6ca339f3cd..c1a9be6a4b9f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -36,3 +36,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif obj-$(CONFIG_BPF_PRELOAD) += preload/ + +obj-$(CONFIG_BPF_SYSCALL) += relo_core.o +$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE + $(call if_changed_rule,cc_o_c) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 277a05e9c984..b141a1346f72 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -82,6 +82,11 @@ static int bloom_map_delete_elem(struct bpf_map *map, void *value) return -EOPNOTSUPP; } +static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -EOPNOTSUPP; +} + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; @@ -192,6 +197,7 @@ const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, + .map_get_next_key = bloom_map_get_next_key, .map_push_elem = bloom_map_push_elem, .map_peek_elem = bloom_map_peek_elem, .map_pop_elem = bloom_map_pop_elem, diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 96ceed0e0fb5..e29d9e3d853e 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -17,6 +17,7 @@ #include <linux/bpf_lsm.h> #include <linux/btf_ids.h> #include <linux/fdtable.h> +#include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(inode_cache); @@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, if (!bsb) return NULL; - inode_storage = rcu_dereference(bsb->storage); + inode_storage = + rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); if (!inode_storage) return NULL; @@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, { struct bpf_local_storage_data *sdata; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; @@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!inode) return -EINVAL; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b2ee45064e06..b7aef5b3416d 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +/* maximum number of loops */ +#define MAX_LOOPS BIT(23) + +BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, + u64, flags) +{ + bpf_callback_t callback = (bpf_callback_t)callback_fn; + u64 ret; + u32 i; + + if (flags) + return -EINVAL; + if (nr_loops > MAX_LOOPS) + return -E2BIG; + + for (i = 0; i < nr_loops; i++) { + ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); + /* return value: 0 - continue, 1 - stop and return */ + if (ret) + return i + 1; + } + + return i; +} + +const struct bpf_func_proto bpf_loop_proto = { + .func = bpf_loop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b305270b7a4b..71de2a89869c 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -11,6 +11,9 @@ #include <net/sock.h> #include <uapi/linux/sock_diag.h> #include <uapi/linux/btf.h> +#include <linux/rcupdate.h> +#include <linux/rcupdate_trace.h> +#include <linux/rcupdate_wait.h> #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) @@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } +void bpf_local_storage_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage *local_storage; + + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + kfree_rcu(local_storage, rcu); +} + +static void bpf_selem_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + kfree_rcu(selem, rcu); +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, bool free_local_storage; void *owner; - smap = rcu_dereference(SDATA(selem)->smap); + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); owner = local_storage->owner; /* All uncharging on the owner must be done first. @@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, * * Although the unlock will be done under * rcu_read_lock(), it is more intutivie to - * read if kfree_rcu(local_storage, rcu) is done + * read if the freeing of the storage is done * after the raw_spin_unlock_bh(&local_storage->lock). * * Hence, a "bool free_local_storage" is returned - * to the caller which then calls the kfree_rcu() - * after unlock. + * to the caller which then calls then frees the storage after + * all the RCU grace periods have expired. */ } hlist_del_init_rcu(&selem->snode); @@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - kfree_rcu(selem, rcu); - + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); return free_local_storage; } @@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) /* selem has already been unlinked from sk */ return; - local_storage = rcu_dereference(selem->local_storage); + local_storage = rcu_dereference_check(selem->local_storage, + bpf_rcu_lock_held()); raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( @@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_local_storage) - kfree_rcu(local_storage, rcu); + call_rcu_tasks_trace(&local_storage->rcu, + bpf_local_storage_free_rcu); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) /* selem has already be unlinked from smap */ return; - smap = rcu_dereference(SDATA(selem)->smap); + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); b = select_bucket(smap, selem); raw_spin_lock_irqsave(&b->lock, flags); if (likely(selem_linked_to_map(selem))) @@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem; /* Fast path (cache hit) */ - sdata = rcu_dereference(local_storage->cache[smap->cache_idx]); + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); if (sdata && rcu_access_pointer(sdata->smap) == smap) return sdata; /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &local_storage->list, snode) + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) if (rcu_access_pointer(SDATA(selem)->smap) == smap) break; @@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner, * bucket->list, first_selem can be freed immediately * (instead of kfree_rcu) because * bpf_local_storage_map_free() does a - * synchronize_rcu() before walking the bucket->list. + * synchronize_rcu_mult (waiting for both sleepable and + * normal programs) before walking the bucket->list. * Hence, no one is accessing selem from the * bucket->list under rcu_read_lock(). */ @@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, !map_value_has_spin_lock(&smap->map))) return ERR_PTR(-EINVAL); - local_storage = rcu_dereference(*owner_storage(smap, owner)); + local_storage = rcu_dereference_check(*owner_storage(smap, owner), + bpf_rcu_lock_held()); if (!local_storage || hlist_empty(&local_storage->list)) { /* Very first elem for the owner */ err = check_flags(NULL, map_flags); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 8ecfe4752769..21069dbe9138 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -165,7 +165,7 @@ void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) break; } - if (btf_member_bitfield_size(t, member)) { + if (__btf_member_bitfield_size(t, member)) { pr_warn("bit field member %s in struct %s is not supported\n", mname, st_ops->name); break; @@ -296,7 +296,7 @@ static int check_zero_holes(const struct btf_type *t, void *data) const struct btf_type *mtype; for_each_member(i, t, member) { - moff = btf_member_bit_offset(t, member) / 8; + moff = __btf_member_bit_offset(t, member) / 8; if (moff > prev_mend && memchr_inv(data + prev_mend, 0, moff - prev_mend)) return -EINVAL; @@ -387,7 +387,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_prog *prog; u32 moff; - moff = btf_member_bit_offset(t, member) / 8; + moff = __btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); if (ptype == module_type) { if (*(void **)(udata + moff)) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index ebfa8bc90892..5da7bed0f5f6 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -17,6 +17,7 @@ #include <uapi/linux/btf.h> #include <linux/btf_ids.h> #include <linux/fdtable.h> +#include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(task_cache); @@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, struct bpf_local_storage *task_storage; struct bpf_local_storage_map *smap; - task_storage = rcu_dereference(task->bpf_storage); + task_storage = + rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); if (!task_storage) return NULL; @@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, { struct bpf_local_storage_data *sdata; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; @@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, { int ret; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; @@ -323,7 +327,7 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID, - .arg2_btf_id = &btf_task_struct_ids[0], + .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; @@ -334,5 +338,5 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID, - .arg2_btf_id = &btf_task_struct_ids[0], + .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index dbc3ad07e21b..33bb8ae4a804 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -25,6 +25,7 @@ #include <linux/kobject.h> #include <linux/sysfs.h> #include <net/sock.h> +#include "../tools/lib/bpf/relo_core.h" /* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus @@ -282,6 +283,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", + [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; const char *btf_type_str(const struct btf_type *t) @@ -418,6 +420,7 @@ static bool btf_type_is_modifier(const struct btf_type *t) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return true; } @@ -834,7 +837,7 @@ static const char *btf_show_name(struct btf_show *show) const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)]; const char *name = NULL, *prefix = "", *parens = ""; const struct btf_member *m = show->state.member; - const struct btf_type *t = show->state.type; + const struct btf_type *t; const struct btf_array *array; u32 id = show->state.type_id; const char *member = NULL; @@ -1737,6 +1740,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: id = type->type; type = btf_type_by_id(btf, type->type); break; @@ -2345,6 +2349,8 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 |
