diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/Makefile | 1 | ||||
| -rw-r--r-- | kernel/bpf/bpf_iter.c | 14 | ||||
| -rw-r--r-- | kernel/bpf/bpf_lsm.c | 88 | ||||
| -rw-r--r-- | kernel/bpf/bpf_task_storage.c | 315 | ||||
| -rw-r--r-- | kernel/bpf/btf.c | 411 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 144 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/sysfs_btf.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/task_iter.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 182 | ||||
| -rw-r--r-- | kernel/module.c | 32 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 29 |
12 files changed, 1076 insertions, 147 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1b9f71ee6aa..d1249340fd6b 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o +obj-${CONFIG_BPF_LSM} += bpf_task_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 8f10e30ea0b0..5454161407f1 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -67,6 +67,15 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +static bool bpf_iter_support_resched(struct seq_file *seq) +{ + struct bpf_iter_priv_data *iter_priv; + + iter_priv = container_of(seq->private, struct bpf_iter_priv_data, + target_private); + return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; +} + /* maximum visited objects before bailing out */ #define MAX_ITER_OBJECTS 1000000 @@ -83,6 +92,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; int err = 0, num_objs = 0; + bool can_resched; void *p; mutex_lock(&seq->lock); @@ -135,6 +145,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, goto done; } + can_resched = bpf_iter_support_resched(seq); while (1) { loff_t pos = seq->index; @@ -180,6 +191,9 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, } break; } + + if (can_resched) + cond_resched(); } stop: offs = seq->count; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 56cc5a915f67..553107f4706a 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -63,11 +63,99 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; + case BPF_FUNC_task_storage_get: + return &bpf_task_storage_get_proto; + case BPF_FUNC_task_storage_delete: + return &bpf_task_storage_delete_proto; default: return tracing_prog_func_proto(func_id, prog); } } +/* The set of hooks which are called without pagefaults disabled and are allowed + * to "sleep" and thus can be used for sleeable BPF programs. + */ +BTF_SET_START(sleepable_lsm_hooks) +BTF_ID(func, bpf_lsm_bpf) +BTF_ID(func, bpf_lsm_bpf_map) +BTF_ID(func, bpf_lsm_bpf_map_alloc_security) +BTF_ID(func, bpf_lsm_bpf_map_free_security) +BTF_ID(func, bpf_lsm_bpf_prog) +BTF_ID(func, bpf_lsm_bprm_check_security) +BTF_ID(func, bpf_lsm_bprm_committed_creds) +BTF_ID(func, bpf_lsm_bprm_committing_creds) +BTF_ID(func, bpf_lsm_bprm_creds_for_exec) +BTF_ID(func, bpf_lsm_bprm_creds_from_file) +BTF_ID(func, bpf_lsm_capget) +BTF_ID(func, bpf_lsm_capset) +BTF_ID(func, bpf_lsm_cred_prepare) +BTF_ID(func, bpf_lsm_file_ioctl) +BTF_ID(func, bpf_lsm_file_lock) +BTF_ID(func, bpf_lsm_file_open) +BTF_ID(func, bpf_lsm_file_receive) +BTF_ID(func, bpf_lsm_inet_conn_established) +BTF_ID(func, bpf_lsm_inode_create) +BTF_ID(func, bpf_lsm_inode_free_security) +BTF_ID(func, bpf_lsm_inode_getattr) +BTF_ID(func, bpf_lsm_inode_getxattr) +BTF_ID(func, bpf_lsm_inode_mknod) +BTF_ID(func, bpf_lsm_inode_need_killpriv) +BTF_ID(func, bpf_lsm_inode_post_setxattr) +BTF_ID(func, bpf_lsm_inode_readlink) +BTF_ID(func, bpf_lsm_inode_rename) +BTF_ID(func, bpf_lsm_inode_rmdir) +BTF_ID(func, bpf_lsm_inode_setattr) +BTF_ID(func, bpf_lsm_inode_setxattr) +BTF_ID(func, bpf_lsm_inode_symlink) +BTF_ID(func, bpf_lsm_inode_unlink) +BTF_ID(func, bpf_lsm_kernel_module_request) +BTF_ID(func, bpf_lsm_kernfs_init_security) +BTF_ID(func, bpf_lsm_key_free) +BTF_ID(func, bpf_lsm_mmap_file) +BTF_ID(func, bpf_lsm_netlink_send) +BTF_ID(func, bpf_lsm_path_notify) +BTF_ID(func, bpf_lsm_release_secctx) +BTF_ID(func, bpf_lsm_sb_alloc_security) +BTF_ID(func, bpf_lsm_sb_eat_lsm_opts) +BTF_ID(func, bpf_lsm_sb_kern_mount) +BTF_ID(func, bpf_lsm_sb_mount) +BTF_ID(func, bpf_lsm_sb_remount) +BTF_ID(func, bpf_lsm_sb_set_mnt_opts) +BTF_ID(func, bpf_lsm_sb_show_options) +BTF_ID(func, bpf_lsm_sb_statfs) +BTF_ID(func, bpf_lsm_sb_umount) +BTF_ID(func, bpf_lsm_settime) +BTF_ID(func, bpf_lsm_socket_accept) +BTF_ID(func, bpf_lsm_socket_bind) +BTF_ID(func, bpf_lsm_socket_connect) +BTF_ID(func, bpf_lsm_socket_create) +BTF_ID(func, bpf_lsm_socket_getpeername) +BTF_ID(func, bpf_lsm_socket_getpeersec_dgram) +BTF_ID(func, bpf_lsm_socket_getsockname) +BTF_ID(func, bpf_lsm_socket_getsockopt) +BTF_ID(func, bpf_lsm_socket_listen) +BTF_ID(func, bpf_lsm_socket_post_create) +BTF_ID(func, bpf_lsm_socket_recvmsg) +BTF_ID(func, bpf_lsm_socket_sendmsg) +BTF_ID(func, bpf_lsm_socket_shutdown) +BTF_ID(func, bpf_lsm_socket_socketpair) +BTF_ID(func, bpf_lsm_syslog) +BTF_ID(func, bpf_lsm_task_alloc) +BTF_ID(func, bpf_lsm_task_getsecid) +BTF_ID(func, bpf_lsm_task_prctl) +BTF_ID(func, bpf_lsm_task_setscheduler) +BTF_ID(func, bpf_lsm_task_to_inode) +BTF_SET_END(sleepable_lsm_hooks) + +bool bpf_lsm_is_sleepable_hook(u32 btf_id) +{ + return btf_id_set_contains(&sleepable_lsm_hooks, btf_id); +} + const struct bpf_prog_ops lsm_prog_ops = { }; diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c new file mode 100644 index 000000000000..4ef1959a78f2 --- /dev/null +++ b/kernel/bpf/bpf_task_storage.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Facebook + * Copyright 2020 Google LLC. + */ + +#include <linux/pid.h> +#include <linux/sched.h> +#include <linux/rculist.h> +#include <linux/list.h> +#include <linux/hash.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bpf.h> +#include <linux/bpf_local_storage.h> +#include <linux/filter.h> +#include <uapi/linux/btf.h> +#include <linux/bpf_lsm.h> +#include <linux/btf_ids.h> +#include <linux/fdtable.h> + +DEFINE_BPF_STORAGE_CACHE(task_cache); + +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) +{ + struct task_struct *task = owner; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + return &bsb->storage; +} + +static struct bpf_local_storage_data * +task_storage_lookup(struct task_struct *task, struct bpf_map *map, + bool cacheit_lockit) +{ + struct bpf_local_storage *task_storage; + struct bpf_local_storage_map *smap; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + + task_storage = rcu_dereference(bsb->storage); + if (!task_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); +} + +void bpf_task_storage_free(struct task_struct *task) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + bool free_task_storage = false; + struct bpf_storage_blob *bsb; + struct hlist_node *n; + + bsb = bpf_task(task); + if (!bsb) + return; + + rcu_read_lock(); + + local_storage = rcu_dereference(bsb->storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + /* Neither the bpf_prog nor the bpf-map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * local_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&local_storage->lock); + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + free_task_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false); + } + raw_spin_unlock_bh(&local_storage->lock); + rcu_read_unlock(); + + /* free_task_storage should always be true as long as + * local_storage->list was non-empty. + */ + if (free_task_storage) + kfree_rcu(local_storage, rcu); +} + +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = task_storage_lookup(task, map, true); + put_pid(pid); + return sdata ? sdata->data : NULL; +out: + put_pid(pid); + return ERR_PTR(err); +} + +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task || !task_storage_ptr(task)) { + err = -ENOENT; + goto out; + } + + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, map_flags); + + err = PTR_ERR_OR_ZERO(sdata); +out: + put_pid(pid); + return err; +} + +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = task_storage_lookup(task, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata)); + + return 0; +} + +static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + err = task_storage_delete(task, map); +out: + put_pid(pid); + return err; +} + +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags) +{ + struct bpf_local_storage_data *sdata; + + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + /* explicitly check that the task_storage_ptr is not + * NULL as task_storage_lookup returns NULL in this case and + * bpf_local_storage_update expects the owner to have a + * valid storage pointer. + */ + if (!task_storage_ptr(task)) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); + if (sdata) + return (unsigned long)sdata->data; + + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); + return IS_ERR(sdata) ? (unsigned long)NULL : + (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + return task_storage_delete(task, map); +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); + return &smap->map; +} + +static void task_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); +} + +static int task_storage_map_btf_id; +const struct bpf_map_ops task_storage_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = task_storage_map_alloc, + .map_free = task_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_pid_task_storage_lookup_elem, + .map_update_elem = bpf_pid_task_storage_update_elem, + .map_delete_elem = bpf_pid_task_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", + .map_btf_id = &task_storage_map_btf_id, + .map_owner_storage_ptr = task_storage_ptr, +}; + +BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct) + +const struct bpf_func_proto bpf_task_storage_get_proto = { + .func = bpf_task_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_task_storage_delete_proto = { + .func = bpf_task_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], +}; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ed7d02e8bc93..6b2d508b33d4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -22,7 +22,8 @@ #include <linux/skmsg.h> #include <linux/perf_event.h> #include <linux/bsearch.h> -#include <linux/btf_ids.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> #include <net/sock.h> /* BTF (BPF Type Format) is the meta data format which describes @@ -204,12 +205,19 @@ struct btf { const char *strings; void *nohdr_data; struct btf_header hdr; - u32 nr_types; + u32 nr_types; /* includes VOID for base BTF */ u32 types_size; u32 data_size; refcount_t refcnt; u32 id; struct rcu_head rcu; + + /* split BTF support */ + struct btf *base_btf; + u32 start_id; /* first type ID in this BTF (0 for base BTF) */ + u32 start_str_off; /* first string offset (0 for base BTF) */ + char name[MODULE_NAME_LEN]; + bool kernel_btf; }; enum verifier_phase { @@ -450,14 +458,27 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } +static u32 btf_nr_types_total(const struct btf *btf) +{ + u32 total = 0; + + while (btf) { + total += btf->nr_types; + btf = btf->base_btf; + } + + return total; +} + s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) { const struct btf_type *t; const char *tname; - u32 i; + u32 i, total; - for (i = 1; i <= btf->nr_types; i++) { - t = btf->types[i]; + total = btf_nr_types_total(btf); + for (i = 1; i < total; i++) { + t = btf_type_by_id(btf, i); if (BTF_INFO_KIND(t->info) != kind) continue; @@ -600,8 +621,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { - return BTF_STR_OFFSET_VALID(offset) && - offset < btf->hdr.str_len; + if (!BTF_STR_OFFSET_VALID(offset)) + return false; + + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + return offset < btf->hdr.str_len; } static bool __btf_name_char_ok(char c, bool first, bool dot_ok) @@ -615,10 +642,22 @@ static bool __btf_name_char_ok(char c, bool first, bool dot_ok) return true; } +static const char *btf_str_by_offset(const struct btf *btf, u32 offset) +{ + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + if (offset < btf->hdr.str_len) + return &btf->strings[offset]; + + return NULL; +} + static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) { /* offset must be valid */ - const char *src = &btf->strings[offset]; + const char *src = btf_str_by_offset(btf, offset); const char *src_limit; if (!__btf_name_char_ok(*src, true, dot_ok)) @@ -651,27 +690,28 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset) static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { + const char *name; + if (!offset) return "(anon)"; - else if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - else - return "(invalid-name-offset)"; + + name = btf_str_by_offset(btf, offset); + return name ?: "(invalid-name-offset)"; } const char *btf_name_by_offset(const struct btf *btf, u32 offset) { - if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - - return NULL; + return btf_str_by_offset(btf, offset); } const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { - if (type_id > btf->nr_types) - return NULL; + while (type_id < btf->start_id) + btf = btf->base_btf; + type_id -= btf->start_id; + if (type_id >= btf->nr_types) + return NULL; return btf->types[type_id]; } @@ -1391,17 +1431,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) { struct btf *btf = env->btf; - /* < 2 because +1 for btf_void which is always in btf->types[0]. - * btf_void is not accounted in btf->nr_types because btf_void - * does not come from the BTF file. - */ - if (btf->types_size - btf->nr_types < 2) { + if (btf->types_size == btf->nr_types) { /* Expand 'types' array */ struct btf_type **new_types; u32 expand_by, new_size; - if (btf->types_size == BTF_MAX_TYPE) { + if (btf->start_id + btf->types_size == BTF_MAX_TYPE) { btf_verifier_log(env, "Exceeded max num of types"); return -E2BIG; } @@ -1415,18 +1451,23 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) if (!new_types) return -ENOMEM; - if (btf->nr_types == 0) - new_types[0] = &btf_void; - else + if (btf->nr_types == 0) { + if (!btf->base_btf) { + /* lazily init VOID type */ + new_types[0] = &btf_void; + btf->nr_types++; + } + } else { memcpy(new_types, btf->types, - sizeof(*btf->types) * (btf->nr_types + 1)); + sizeof(*btf->types) * btf->nr_types); + } kvfree(btf->types); btf->types = new_types; btf->types_size = new_size; } - btf->types[++(btf->nr_types)] = t; + btf->types[btf->nr_types++] = t; return 0; } @@ -1499,18 +1540,17 @@ static int env_resolve_init(struct btf_verifier_env *env) u32 *resolved_ids = NULL; u8 *visit_states = NULL; - /* +1 for btf_void */ - resolved_sizes = kvcalloc(nr_types + 1, sizeof(*resolved_sizes), + resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes), GFP_KERNEL | __GFP_NOWARN); if (!resolved_sizes) goto nomem; - resolved_ids = kvcalloc(nr_types + 1, sizeof(*resolved_ids), + resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids), GFP_KERNEL | __GFP_NOWARN); if (!resolved_ids) goto nomem; - visit_states = kvcalloc(nr_types + 1, sizeof(*visit_states), + visit_states = kvcalloc(nr_types, sizeof(*visit_states), GFP_KERNEL | __GFP_NOWARN); if (!visit_states) goto nomem; @@ -1562,21 +1602,27 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, static bool env_type_is_resolved(const struct btf_verifier_env *env, u32 type_id) { - return env->visit_states[type_id] == RESOLVED; + /* base BTF types should be resolved by now */ + if (type_id < env->btf->start_id) + return true; + + return env->visit_states[type_id - env->btf->start_id] == RESOLVED; } static int env_stack_push(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) { + const struct btf *btf = env->btf; struct resolve_vertex *v; if (env->top_stack == MAX_RESOLVE_DEPTH) return -E2BIG; - if (env->visit_states[type_id] != NOT_VISITED) + if (type_id < btf->start_id + || env->visit_states[type_id - btf->start_id] != NOT_VISITED) return -EEXIST; - env->visit_states[type_id] = VISITED; + env->visit_states[type_id - btf->start_id] = VISITED; v = &env->stack[env->top_stack++]; v->t = t; @@ -1606,6 +1652,7 @@ static void env_stack_pop_resolved(struct btf_verifier_env *env, u32 type_id = env->stack[--(env->top_stack)].type_id; struct btf *btf = env->btf; + type_id -= btf->start_id; /* adjust to local type id */ btf->resolved_sizes[type_id] = resolved_size; btf->resolved_ids[type_id] = resolved_type_id; env->visit_states[type_id] = RESOLVED; @@ -1710,14 +1757,30 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL); } +static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id) +{ + while (type_id < btf->start_id) + btf = btf->base_btf; + + return btf->resolved_ids[type_id - btf->start_id]; +} + /* The input param "type_id" must point to a needs_resolve type */ static const struct btf_type *btf_type_id_resolve(const struct btf *btf, u32 *type_id) { - *type_id = btf->resolved_ids[*type_id]; + *type_id = btf_resolved_type_id(btf, *type_id); return btf_type_by_id(btf, *type_id); } +static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id) +{ + while (type_id < btf->start_id) + btf = btf->base_btf; + + return btf->resolved_sizes[type_id - btf->start_id]; +} + const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *type_id, u32 *ret_size) { @@ -1732,7 +1795,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, if (btf_type_has_size(size_type)) { size = size_type->size; } else if (btf_type_is_array(size_type)) { - size = btf->resolved_sizes[size_type_id]; + size = btf_resolved_type_size(btf, size_type_id); } else if (btf_type_is_ptr(size_type)) { size = sizeof(void *); } else { @@ -1740,14 +1803,14 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, !btf_type_is_var(size_type))) return NULL; - size_type_id = btf->resolved_ids[size_type_id]; + size_type_id = btf_resolved_type_id(btf, size_type_id); size_type = btf_type_by_id(btf, size_type_id); if (btf_type_nosize_or_null(size_type)) return NULL; else if (btf_type_has_size(size_type)) size = size_type->size; else if (btf_type_is_array(size_type)) - size = btf->resolved_sizes[size_type_id]; + size = btf_resolved_type_size(btf, size_type_id); else if (btf_type_is_ptr(size_type)) size = sizeof(void *); else @@ -3799,7 +3862,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env) cur = btf->nohdr_data + hdr->type_off; end = cur + hdr->type_len; - env->log_type_id = 1; + env->log_type_id = btf->base_btf ? btf->start_id : 1; while (cur < end) { struct btf_type *t = cur; s32 meta_size; @@ -3826,8 +3889,8 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return false; if (btf_type_is_struct(t) || btf_type_is_datasec(t)) - return !btf->resolved_ids[type_id] && - !btf->resolved_sizes[type_id]; + return !btf_resolved_type_id(btf, type_id) && + !btf_resolved_type_size(btf, type_id); if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || btf_type_is_var(t)) { @@ -3847,7 +3910,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); return elem_type && !btf_type_is_modifier(elem_type) && (array->nelems * elem_size == - btf->resolved_sizes[type_id]); + btf_resolved_type_size(btf, type_id)); } return false; @@ -3889,7 +3952,8 @@ static int btf_resolve(struct btf_verifier_env *env, static int btf_check_all_types(struct btf_verifier_env *env) { struct btf *btf = env->btf; - u32 type_id; + const struct btf_type *t; + u32 type_id, i; int err; err = env_resolve_init(env); @@ -3897,8 +3961,9 @@ static int btf_check_all_types(struct btf_verifier_env *env) return err; env->phase++; - for (type_id = 1; type_id <= btf->nr_types; type_id++) { - const struct btf_type *t = btf_type_by_id(btf, type_id); + for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) { + type_id = btf->start_id + i; + t = btf_type_by_id(btf, type_id); env->log_type_id = type_id; if (btf_type_needs_resolve(t) && @@ -3935,7 +4000,7 @@ static int btf_parse_type_sec(struct btf_verifier_env *env) return -EINVAL; } - if (!hdr->type_len) { + if (!env->btf->base_btf && !hdr->type_len) { btf_verifier_log(env, "No type found"); return -EINVAL; } @@ -3962,13 +4027,18 @@ static int btf_parse_str_sec(struct btf_verifier_env *env) return -EINVAL; } - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || - start[0] || end[-1]) { + btf->strings = start; + + if (btf->base_btf && !hdr->str_len) + return 0; + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) { + btf_verifier_log(env, "Invalid string section"); + return -EINVAL; + } + if (!btf->base_btf && start[0]) { btf_verifier_log(env, "Invalid string section"); return -EINVAL; } - - btf->strings = start; return 0; } @@ -4363,6 +4433,8 @@ struct btf *btf_parse_vmlinux(void) btf->data = __start_BTF; btf->data_size = __stop_BTF - __start_BTF; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "vmlinux"); err = btf_parse_hdr(env); if (err) @@ -4388,6 +4460,81 @@ struct btf *btf_parse_vmlinux(void) bpf_struct_ops_init(btf, log); + refcount_set(&btf->refcnt, 1); + + err = btf_alloc_id(btf); + if (err) + goto errout; + + btf_verifier_env_free(env); + return btf; + +errout: + btf_verifier_env_free(env); + if (btf) { + kvfree(btf->types); + kfree(btf); + } + return ERR_PTR(err); +} + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) +{ + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + struct btf *btf = NULL, *base_btf; + int err; + + base_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(base_btf)) + return base_btf; + if (!base_btf) + return ERR_PTR(-EINVAL); + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + log->level = BPF_LOG_KERNEL; + + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); + if (!btf) { + err = -ENOMEM; + goto errout; + } + env->btf = btf; + + btf->base_btf = base_btf; + btf->start_id = base_btf->nr_types; + btf->start_str_off = base_btf->hdr.str_len; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "%s", module_name); + + btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN); + if (!btf->data) { + err = -ENOMEM; + goto errout; + } + memcpy(btf->data, data, data_size); + btf->data_size = data_size; + + err = btf_parse_hdr(env); + if (err) + goto errout; + + btf->nohdr_data = btf->data + btf->hdr.hdr_len; + + err = btf_parse_str_sec(env); + if (err) + goto errout; + + err = btf_check_all_metas(env); + if (err) + goto errout; + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; @@ -4395,12 +4542,15 @@ struct btf *btf_parse_vmlinux(void) errout: btf_verifier_env_free(env); if (btf) { + kvfree(btf->data); kvfree(btf->types); kfree(btf); } return ERR_PTR(err); } +#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ + struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) { struct bpf_prog *tgt_prog = prog->aux->dst_prog; @@ -4909,7 +5059,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id, while (t && btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!t) { - *bad_type = btf->types[0]; + *bad_type = btf_type_by_id(btf, 0); return -EINVAL; } if (btf_type_is_ptr(t)) @@ -5487,7 +5637,9 @@ int btf_get_info_by_fd(const struct btf *btf, struct bpf_btf_info info; u32 info_copy, btf_copy; void __user *ubtf; - u32 uinfo_len; + char __user *uname; + u32 uinfo_len, uname_len, name_len; + int ret = 0; uinfo = u64_to_user_ptr(attr->info.info); uinfo_len = attr->info.info_len; @@ -5504,11 +5656,37 @@ int btf_get_info_by_fd(const struct btf *btf, return -EFAULT; info.btf_size = btf->data_size; + info.kernel_btf = btf->kernel_btf; + + uname = u64_to_user_ptr(info.name); + uname_len = info.name_len; + if (!uname ^ !uname_len) + return -EINVAL; + + name_len = strlen(btf->name); + info.name_len = name_len; + + if (uname) { + if (uname_len >= name_len + 1) { + if (copy_to_user(uname, btf->name, name_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(uname, btf->name, uname_len - 1)) + return -EFAULT; + if (put_user(zero, uname + uname_len - 1)) + return -EFAULT; + /* let user-space know about too short buffer */ + ret = -ENOSPC; + } + } + if (copy_to_user(uinfo, &info, info_copy) || put_user(info_copy, &uattr->info.info_len)) return -EFAULT; - return 0; + return ret; } int btf_get_fd_by_id(u32 id) @@ -5548,3 +5726,126 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) { return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +struct btf_module { + struct list_head list; + struct module *module; + struct btf *btf; + struct bin_attribute *sysfs_attr; +}; |
