summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-11-14 09:13:40 -0800
committerJakub Kicinski <kuba@kernel.org>2020-11-14 09:13:41 -0800
commit07cbce2e466cabb46b7c2317bd456584aa4ceacc (patch)
tree17c5bd5574a13aa7db481f650b622839597a8d29 /kernel
parent774626fa440e4c01bcbe5213cd5220dea545c9f7 (diff)
parentc14d61fca0d10498bf267c0ab1f381dd0b35d96b (diff)
downloadlinux-07cbce2e466cabb46b7c2317bd456584aa4ceacc.tar.gz
linux-07cbce2e466cabb46b7c2317bd456584aa4ceacc.tar.bz2
linux-07cbce2e466cabb46b7c2317bd456584aa4ceacc.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-11-14 1) Add BTF generation for kernel modules and extend BTF infra in kernel e.g. support for split BTF loading and validation, from Andrii Nakryiko. 2) Support for pointers beyond pkt_end to recognize LLVM generated patterns on inlined branch conditions, from Alexei Starovoitov. 3) Implements bpf_local_storage for task_struct for BPF LSM, from KP Singh. 4) Enable FENTRY/FEXIT/RAW_TP tracing program to use the bpf_sk_storage infra, from Martin KaFai Lau. 5) Add XDP bulk APIs that introduce a defer/flush mechanism to optimize the XDP_REDIRECT path, from Lorenzo Bianconi. 6) Fix a potential (although rather theoretical) deadlock of hashtab in NMI context, from Song Liu. 7) Fixes for cross and out-of-tree build of bpftool and runqslower allowing build for different target archs on same source tree, from Jean-Philippe Brucker. 8) Fix error path in htab_map_alloc() triggered from syzbot, from Eric Dumazet. 9) Move functionality from test_tcpbpf_user into the test_progs framework so it can run in BPF CI, from Alexander Duyck. 10) Lift hashtab key_size limit to be larger than MAX_BPF_STACK, from Florian Lehner. Note that for the fix from Song we have seen a sparse report on context imbalance which requires changes in sparse itself for proper annotation detection where this is currently being discussed on linux-sparse among developers [0]. Once we have more clarification/guidance after their fix, Song will follow-up. [0] https://lore.kernel.org/linux-sparse/CAHk-=wh4bx8A8dHnX612MsDO13st6uzAz1mJ1PaHHVevJx_ZCw@mail.gmail.com/T/ https://lore.kernel.org/linux-sparse/20201109221345.uklbp3lzgq6g42zb@ltop.local/T/ * git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (66 commits) net: mlx5: Add xdp tx return bulking support net: mvpp2: Add xdp tx return bulking support net: mvneta: Add xdp tx return bulking support net: page_pool: Add bulk support for ptr_ring net: xdp: Introduce bulking for xdp tx return path bpf: Expose bpf_d_path helper to sleepable LSM hooks bpf: Augment the set of sleepable LSM hooks bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP bpf: Allow using bpf_sk_storage in FENTRY/FEXIT/RAW_TP bpf: Rename some functions in bpf_sk_storage bpf: Folding omem_charge() into sk_storage_charge() selftests/bpf: Add asm tests for pkt vs pkt_end comparison. selftests/bpf: Add skb_pkt_end test bpf: Support for pointers beyond pkt_end. tools/bpf: Always run the *-clean recipes tools/bpf: Add bootstrap/ to .gitignore bpf: Fix NULL dereference in bpf_task_storage tools/bpftool: Fix build slowdown tools/runqslower: Build bpftool using HOSTCC tools/runqslower: Enable out-of-tree build ... ==================== Link: https://lore.kernel.org/r/20201114020819.29584-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/bpf_iter.c14
-rw-r--r--kernel/bpf/bpf_lsm.c88
-rw-r--r--kernel/bpf/bpf_task_storage.c315
-rw-r--r--kernel/bpf/btf.c411
-rw-r--r--kernel/bpf/hashtab.c144
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/sysfs_btf.c2
-rw-r--r--kernel/bpf/task_iter.c2
-rw-r--r--kernel/bpf/verifier.c182
-rw-r--r--kernel/module.c32
-rw-r--r--kernel/trace/bpf_trace.c29
12 files changed, 1076 insertions, 147 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index c1b9f71ee6aa..d1249340fd6b 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 8f10e30ea0b0..5454161407f1 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -67,6 +67,15 @@ static void bpf_iter_done_stop(struct seq_file *seq)
iter_priv->done_stop = true;
}
+static bool bpf_iter_support_resched(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED;
+}
+
/* maximum visited objects before bailing out */
#define MAX_ITER_OBJECTS 1000000
@@ -83,6 +92,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
struct seq_file *seq = file->private_data;
size_t n, offs, copied = 0;
int err = 0, num_objs = 0;
+ bool can_resched;
void *p;
mutex_lock(&seq->lock);
@@ -135,6 +145,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
goto done;
}
+ can_resched = bpf_iter_support_resched(seq);
while (1) {
loff_t pos = seq->index;
@@ -180,6 +191,9 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
}
break;
}
+
+ if (can_resched)
+ cond_resched();
}
stop:
offs = seq->count;
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 56cc5a915f67..553107f4706a 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -63,11 +63,99 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_spin_lock:
+ return &bpf_spin_lock_proto;
+ case BPF_FUNC_spin_unlock:
+ return &bpf_spin_unlock_proto;
+ case BPF_FUNC_task_storage_get:
+ return &bpf_task_storage_get_proto;
+ case BPF_FUNC_task_storage_delete:
+ return &bpf_task_storage_delete_proto;
default:
return tracing_prog_func_proto(func_id, prog);
}
}
+/* The set of hooks which are called without pagefaults disabled and are allowed
+ * to "sleep" and thus can be used for sleeable BPF programs.
+ */
+BTF_SET_START(sleepable_lsm_hooks)
+BTF_ID(func, bpf_lsm_bpf)
+BTF_ID(func, bpf_lsm_bpf_map)
+BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
+BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_prog)
+BTF_ID(func, bpf_lsm_bprm_check_security)
+BTF_ID(func, bpf_lsm_bprm_committed_creds)
+BTF_ID(func, bpf_lsm_bprm_committing_creds)
+BTF_ID(func, bpf_lsm_bprm_creds_for_exec)
+BTF_ID(func, bpf_lsm_bprm_creds_from_file)
+BTF_ID(func, bpf_lsm_capget)
+BTF_ID(func, bpf_lsm_capset)
+BTF_ID(func, bpf_lsm_cred_prepare)
+BTF_ID(func, bpf_lsm_file_ioctl)
+BTF_ID(func, bpf_lsm_file_lock)
+BTF_ID(func, bpf_lsm_file_open)
+BTF_ID(func, bpf_lsm_file_receive)
+BTF_ID(func, bpf_lsm_inet_conn_established)
+BTF_ID(func, bpf_lsm_inode_create)
+BTF_ID(func, bpf_lsm_inode_free_security)
+BTF_ID(func, bpf_lsm_inode_getattr)
+BTF_ID(func, bpf_lsm_inode_getxattr)
+BTF_ID(func, bpf_lsm_inode_mknod)
+BTF_ID(func, bpf_lsm_inode_need_killpriv)
+BTF_ID(func, bpf_lsm_inode_post_setxattr)
+BTF_ID(func, bpf_lsm_inode_readlink)
+BTF_ID(func, bpf_lsm_inode_rename)
+BTF_ID(func, bpf_lsm_inode_rmdir)
+BTF_ID(func, bpf_lsm_inode_setattr)
+BTF_ID(func, bpf_lsm_inode_setxattr)
+BTF_ID(func, bpf_lsm_inode_symlink)
+BTF_ID(func, bpf_lsm_inode_unlink)
+BTF_ID(func, bpf_lsm_kernel_module_request)
+BTF_ID(func, bpf_lsm_kernfs_init_security)
+BTF_ID(func, bpf_lsm_key_free)
+BTF_ID(func, bpf_lsm_mmap_file)
+BTF_ID(func, bpf_lsm_netlink_send)
+BTF_ID(func, bpf_lsm_path_notify)
+BTF_ID(func, bpf_lsm_release_secctx)
+BTF_ID(func, bpf_lsm_sb_alloc_security)
+BTF_ID(func, bpf_lsm_sb_eat_lsm_opts)
+BTF_ID(func, bpf_lsm_sb_kern_mount)
+BTF_ID(func, bpf_lsm_sb_mount)
+BTF_ID(func, bpf_lsm_sb_remount)
+BTF_ID(func, bpf_lsm_sb_set_mnt_opts)
+BTF_ID(func, bpf_lsm_sb_show_options)
+BTF_ID(func, bpf_lsm_sb_statfs)
+BTF_ID(func, bpf_lsm_sb_umount)
+BTF_ID(func, bpf_lsm_settime)
+BTF_ID(func, bpf_lsm_socket_accept)
+BTF_ID(func, bpf_lsm_socket_bind)
+BTF_ID(func, bpf_lsm_socket_connect)
+BTF_ID(func, bpf_lsm_socket_create)
+BTF_ID(func, bpf_lsm_socket_getpeername)
+BTF_ID(func, bpf_lsm_socket_getpeersec_dgram)
+BTF_ID(func, bpf_lsm_socket_getsockname)
+BTF_ID(func, bpf_lsm_socket_getsockopt)
+BTF_ID(func, bpf_lsm_socket_listen)
+BTF_ID(func, bpf_lsm_socket_post_create)
+BTF_ID(func, bpf_lsm_socket_recvmsg)
+BTF_ID(func, bpf_lsm_socket_sendmsg)
+BTF_ID(func, bpf_lsm_socket_shutdown)
+BTF_ID(func, bpf_lsm_socket_socketpair)
+BTF_ID(func, bpf_lsm_syslog)
+BTF_ID(func, bpf_lsm_task_alloc)
+BTF_ID(func, bpf_lsm_task_getsecid)
+BTF_ID(func, bpf_lsm_task_prctl)
+BTF_ID(func, bpf_lsm_task_setscheduler)
+BTF_ID(func, bpf_lsm_task_to_inode)
+BTF_SET_END(sleepable_lsm_hooks)
+
+bool bpf_lsm_is_sleepable_hook(u32 btf_id)
+{
+ return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
+}
+
const struct bpf_prog_ops lsm_prog_ops = {
};
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
new file mode 100644
index 000000000000..4ef1959a78f2
--- /dev/null
+++ b/kernel/bpf/bpf_task_storage.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <linux/filter.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(task_cache);
+
+static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
+{
+ struct task_struct *task = owner;
+ struct bpf_storage_blob *bsb;
+
+ bsb = bpf_task(task);
+ if (!bsb)
+ return NULL;
+ return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+task_storage_lookup(struct task_struct *task, struct bpf_map *map,
+ bool cacheit_lockit)
+{
+ struct bpf_local_storage *task_storage;
+ struct bpf_local_storage_map *smap;
+ struct bpf_storage_blob *bsb;
+
+ bsb = bpf_task(task);
+ if (!bsb)
+ return NULL;
+
+ task_storage = rcu_dereference(bsb->storage);
+ if (!task_storage)
+ return NULL;
+
+ smap = (struct bpf_local_storage_map *)map;
+ return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
+}
+
+void bpf_task_storage_free(struct task_struct *task)
+{
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *local_storage;
+ bool free_task_storage = false;
+ struct bpf_storage_blob *bsb;
+ struct hlist_node *n;
+
+ bsb = bpf_task(task);
+ if (!bsb)
+ return;
+
+ rcu_read_lock();
+
+ local_storage = rcu_dereference(bsb->storage);
+ if (!local_storage) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* Neither the bpf_prog nor the bpf-map's syscall
+ * could be modifying the local_storage->list now.
+ * Thus, no elem can be added-to or deleted-from the
+ * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+ *
+ * It is racing with bpf_local_storage_map_free() alone
+ * when unlinking elem from the local_storage->list and
+ * the map's bucket->list.
+ */
+ raw_spin_lock_bh(&local_storage->lock);
+ hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+ /* Always unlink from map before unlinking from
+ * local_storage.
+ */
+ bpf_selem_unlink_map(selem);
+ free_task_storage = bpf_selem_unlink_storage_nolock(
+ local_storage, selem, false);
+ }
+ raw_spin_unlock_bh(&local_storage->lock);
+ rcu_read_unlock();
+
+ /* free_task_storage should always be true as long as
+ * local_storage->list was non-empty.
+ */
+ if (free_task_storage)
+ kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_local_storage_data *sdata;
+ struct task_struct *task;
+ unsigned int f_flags;
+ struct pid *pid;
+ int fd, err;
+
+ fd = *(int *)key;
+ pid = pidfd_get_pid(fd, &f_flags);
+ if (IS_ERR(pid))
+ return ERR_CAST(pid);
+
+ /* We should be in an RCU read side critical section, it should be safe
+ * to call pid_task.
+ */
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ sdata = task_storage_lookup(task, map, true);
+ put_pid(pid);
+ return sdata ? sdata->data : NULL;
+out:
+ put_pid(pid);
+ return ERR_PTR(err);
+}
+
+static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_local_storage_data *sdata;
+ struct task_struct *task;
+ unsigned int f_flags;
+ struct pid *pid;
+ int fd, err;
+
+ fd = *(int *)key;
+ pid = pidfd_get_pid(fd, &f_flags);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+
+ /* We should be in an RCU read side critical section, it should be safe
+ * to call pid_task.
+ */
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task || !task_storage_ptr(task)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ sdata = bpf_local_storage_update(
+ task, (struct bpf_local_storage_map *)map, value, map_flags);
+
+ err = PTR_ERR_OR_ZERO(sdata);
+out:
+ put_pid(pid);
+ return err;
+}
+
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
+{
+ struct bpf_local_storage_data *sdata;
+
+ sdata = task_storage_lookup(task, map, false);
+ if (!sdata)
+ return -ENOENT;
+
+ bpf_selem_unlink(SELEM(sdata));
+
+ return 0;
+}
+
+static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+{
+ struct task_struct *task;
+ unsigned int f_flags;
+ struct pid *pid;
+ int fd, err;
+
+ fd = *(int *)key;
+ pid = pidfd_get_pid(fd, &f_flags);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+
+ /* We should be in an RCU read side critical section, it should be safe
+ * to call pid_task.
+ */
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = task_storage_delete(task, map);
+out:
+ put_pid(pid);
+ return err;
+}
+
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags)
+{
+ struct bpf_local_storage_data *sdata;
+
+ if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+ return (unsigned long)NULL;
+
+ /* explicitly check that the task_storage_ptr is not
+ * NULL as task_storage_lookup returns NULL in this case and
+ * bpf_local_storage_update expects the owner to have a
+ * valid storage pointer.
+ */
+ if (!task_storage_ptr(task))
+ return (unsigned long)NULL;
+
+ sdata = task_storage_lookup(task, map, true);
+ if (sdata)
+ return (unsigned long)sdata->data;
+
+ /* This helper must only be called from places where the lifetime of the task
+ * is guaranteed. Either by being refcounted or by being protected
+ * by an RCU read-side critical section.
+ */
+ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+ sdata = bpf_local_storage_update(
+ task, (struct bpf_local_storage_map *)map, value,
+ BPF_NOEXIST);
+ return IS_ERR(sdata) ? (unsigned long)NULL :
+ (unsigned long)sdata->data;
+ }
+
+ return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
+ task)
+{
+ /* This helper must only be called from places where the lifetime of the task
+ * is guaranteed. Either by being refcounted or by being protected
+ * by an RCU read-side critical section.
+ */
+ return task_storage_delete(task, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ return -ENOTSUPP;
+}
+
+static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
+
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+ return &smap->map;
+}
+
+static void task_storage_map_free(struct bpf_map *map)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
+}
+
+static int task_storage_map_btf_id;
+const struct bpf_map_ops task_storage_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = task_storage_map_alloc,
+ .map_free = task_storage_map_free,
+ .map_get_next_key = notsupp_get_next_key,
+ .map_lookup_elem = bpf_pid_task_storage_lookup_elem,
+ .map_update_elem = bpf_pid_task_storage_update_elem,
+ .map_delete_elem = bpf_pid_task_storage_delete_elem,
+ .map_check_btf = bpf_local_storage_map_check_btf,
+ .map_btf_name = "bpf_local_storage_map",
+ .map_btf_id = &task_storage_map_btf_id,
+ .map_owner_storage_ptr = task_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
+
+const struct bpf_func_proto bpf_task_storage_get_proto = {
+ .func = bpf_task_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_task_storage_delete_proto = {
+ .func = bpf_task_storage_delete,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ed7d02e8bc93..6b2d508b33d4 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -22,7 +22,8 @@
#include <linux/skmsg.h>
#include <linux/perf_event.h>
#include <linux/bsearch.h>
-#include <linux/btf_ids.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
#include <net/sock.h>
/* BTF (BPF Type Format) is the meta data format which describes
@@ -204,12 +205,19 @@ struct btf {
const char *strings;
void *nohdr_data;
struct btf_header hdr;
- u32 nr_types;
+ u32 nr_types; /* includes VOID for base BTF */
u32 types_size;
u32 data_size;
refcount_t refcnt;
u32 id;
struct rcu_head rcu;
+
+ /* split BTF support */
+ struct btf *base_btf;
+ u32 start_id; /* first type ID in this BTF (0 for base BTF) */
+ u32 start_str_off; /* first string offset (0 for base BTF) */
+ char name[MODULE_NAME_LEN];
+ bool kernel_btf;
};
enum verifier_phase {
@@ -450,14 +458,27 @@ static bool btf_type_is_datasec(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
}
+static u32 btf_nr_types_total(const struct btf *btf)
+{
+ u32 total = 0;
+
+ while (btf) {
+ total += btf->nr_types;
+ btf = btf->base_btf;
+ }
+
+ return total;
+}
+
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
{
const struct btf_type *t;
const char *tname;
- u32 i;
+ u32 i, total;
- for (i = 1; i <= btf->nr_types; i++) {
- t = btf->types[i];
+ total = btf_nr_types_total(btf);
+ for (i = 1; i < total; i++) {
+ t = btf_type_by_id(btf, i);
if (BTF_INFO_KIND(t->info) != kind)
continue;
@@ -600,8 +621,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
- return BTF_STR_OFFSET_VALID(offset) &&
- offset < btf->hdr.str_len;
+ if (!BTF_STR_OFFSET_VALID(offset))
+ return false;
+
+ while (offset < btf->start_str_off)
+ btf = btf->base_btf;
+
+ offset -= btf->start_str_off;
+ return offset < btf->hdr.str_len;
}
static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
@@ -615,10 +642,22 @@ static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
return true;
}
+static const char *btf_str_by_offset(const struct btf *btf, u32 offset)
+{
+ while (offset < btf->start_str_off)
+ btf = btf->base_btf;
+
+ offset -= btf->start_str_off;
+ if (offset < btf->hdr.str_len)
+ return &btf->strings[offset];
+
+ return NULL;
+}
+
static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
{
/* offset must be valid */
- const char *src = &btf->strings[offset];
+ const char *src = btf_str_by_offset(btf, offset);
const char *src_limit;
if (!__btf_name_char_ok(*src, true, dot_ok))
@@ -651,27 +690,28 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset)
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
{
+ const char *name;
+
if (!offset)
return "(anon)";
- else if (offset < btf->hdr.str_len)
- return &btf->strings[offset];
- else
- return "(invalid-name-offset)";
+
+ name = btf_str_by_offset(btf, offset);
+ return name ?: "(invalid-name-offset)";
}
const char *btf_name_by_offset(const struct btf *btf, u32 offset)
{
- if (offset < btf->hdr.str_len)
- return &btf->strings[offset];
-
- return NULL;
+ return btf_str_by_offset(btf, offset);
}
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
{
- if (type_id > btf->nr_types)
- return NULL;
+ while (type_id < btf->start_id)
+ btf = btf->base_btf;
+ type_id -= btf->start_id;
+ if (type_id >= btf->nr_types)
+ return NULL;
return btf->types[type_id];
}
@@ -1391,17 +1431,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
{
struct btf *btf = env->btf;
- /* < 2 because +1 for btf_void which is always in btf->types[0].
- * btf_void is not accounted in btf->nr_types because btf_void
- * does not come from the BTF file.
- */
- if (btf->types_size - btf->nr_types < 2) {
+ if (btf->types_size == btf->nr_types) {
/* Expand 'types' array */
struct btf_type **new_types;
u32 expand_by, new_size;
- if (btf->types_size == BTF_MAX_TYPE) {
+ if (btf->start_id + btf->types_size == BTF_MAX_TYPE) {
btf_verifier_log(env, "Exceeded max num of types");
return -E2BIG;
}
@@ -1415,18 +1451,23 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
if (!new_types)
return -ENOMEM;
- if (btf->nr_types == 0)
- new_types[0] = &btf_void;
- else
+ if (btf->nr_types == 0) {
+ if (!btf->base_btf) {
+ /* lazily init VOID type */
+ new_types[0] = &btf_void;
+ btf->nr_types++;
+ }
+ } else {
memcpy(new_types, btf->types,
- sizeof(*btf->types) * (btf->nr_types + 1));
+ sizeof(*btf->types) * btf->nr_types);
+ }
kvfree(btf->types);
btf->types = new_types;
btf->types_size = new_size;
}
- btf->types[++(btf->nr_types)] = t;
+ btf->types[btf->nr_types++] = t;
return 0;
}
@@ -1499,18 +1540,17 @@ static int env_resolve_init(struct btf_verifier_env *env)
u32 *resolved_ids = NULL;
u8 *visit_states = NULL;
- /* +1 for btf_void */
- resolved_sizes = kvcalloc(nr_types + 1, sizeof(*resolved_sizes),
+ resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes),
GFP_KERNEL | __GFP_NOWARN);
if (!resolved_sizes)
goto nomem;
- resolved_ids = kvcalloc(nr_types + 1, sizeof(*resolved_ids),
+ resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids),
GFP_KERNEL | __GFP_NOWARN);
if (!resolved_ids)
goto nomem;
- visit_states = kvcalloc(nr_types + 1, sizeof(*visit_states),
+ visit_states = kvcalloc(nr_types, sizeof(*visit_states),
GFP_KERNEL | __GFP_NOWARN);
if (!visit_states)
goto nomem;
@@ -1562,21 +1602,27 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
static bool env_type_is_resolved(const struct btf_verifier_env *env,
u32 type_id)
{
- return env->visit_states[type_id] == RESOLVED;
+ /* base BTF types should be resolved by now */
+ if (type_id < env->btf->start_id)
+ return true;
+
+ return env->visit_states[type_id - env->btf->start_id] == RESOLVED;
}
static int env_stack_push(struct btf_verifier_env *env,
const struct btf_type *t, u32 type_id)
{
+ const struct btf *btf = env->btf;
struct resolve_vertex *v;
if (env->top_stack == MAX_RESOLVE_DEPTH)
return -E2BIG;
- if (env->visit_states[type_id] != NOT_VISITED)
+ if (type_id < btf->start_id
+ || env->visit_states[type_id - btf->start_id] != NOT_VISITED)
return -EEXIST;
- env->visit_states[type_id] = VISITED;
+ env->visit_states[type_id - btf->start_id] = VISITED;
v = &env->stack[env->top_stack++];
v->t = t;
@@ -1606,6 +1652,7 @@ static void env_stack_pop_resolved(struct btf_verifier_env *env,
u32 type_id = env->stack[--(env->top_stack)].type_id;
struct btf *btf = env->btf;
+ type_id -= btf->start_id; /* adjust to local type id */
btf->resolved_sizes[type_id] = resolved_size;
btf->resolved_ids[type_id] = resolved_type_id;
env->visit_states[type_id] = RESOLVED;
@@ -1710,14 +1757,30 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL);
}
+static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id)
+{
+ while (type_id < btf->start_id)
+ btf = btf->base_btf;
+
+ return btf->resolved_ids[type_id - btf->start_id];
+}
+
/* The input param "type_id" must point to a needs_resolve type */
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
u32 *type_id)
{
- *type_id = btf->resolved_ids[*type_id];
+ *type_id = btf_resolved_type_id(btf, *type_id);
return btf_type_by_id(btf, *type_id);
}
+static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id)
+{
+ while (type_id < btf->start_id)
+ btf = btf->base_btf;
+
+ return btf->resolved_sizes[type_id - btf->start_id];
+}
+
const struct btf_type *btf_type_id_size(const struct btf *btf,
u32 *type_id, u32 *ret_size)
{
@@ -1732,7 +1795,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
if (btf_type_has_size(size_type)) {
size = size_type->size;
} else if (btf_type_is_array(size_type)) {
- size = btf->resolved_sizes[size_type_id];
+ size = btf_resolved_type_size(btf, size_type_id);
} else if (btf_type_is_ptr(size_type)) {
size = sizeof(void *);
} else {
@@ -1740,14 +1803,14 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
!btf_type_is_var(size_type)))
return NULL;
- size_type_id = btf->resolved_ids[size_type_id];
+ size_type_id = btf_resolved_type_id(btf, size_type_id);
size_type = btf_type_by_id(btf, size_type_id);
if (btf_type_nosize_or_null(size_type))
return NULL;
else if (btf_type_has_size(size_type))
size = size_type->size;
else if (btf_type_is_array(size_type))
- size = btf->resolved_sizes[size_type_id];
+ size = btf_resolved_type_size(btf, size_type_id);
else if (btf_type_is_ptr(size_type))
size = sizeof(void *);
else
@@ -3799,7 +3862,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
cur = btf->nohdr_data + hdr->type_off;
end = cur + hdr->type_len;
- env->log_type_id = 1;
+ env->log_type_id = btf->base_btf ? btf->start_id : 1;
while (cur < end) {
struct btf_type *t = cur;
s32 meta_size;
@@ -3826,8 +3889,8 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
return false;
if (btf_type_is_struct(t) || btf_type_is_datasec(t))
- return !btf->resolved_ids[type_id] &&
- !btf->resolved_sizes[type_id];
+ return !btf_resolved_type_id(btf, type_id) &&
+ !btf_resolved_type_size(btf, type_id);
if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
btf_type_is_var(t)) {
@@ -3847,7 +3910,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
return elem_type && !btf_type_is_modifier(elem_type) &&
(array->nelems * elem_size ==
- btf->resolved_sizes[type_id]);
+ btf_resolved_type_size(btf, type_id));
}
return false;
@@ -3889,7 +3952,8 @@ static int btf_resolve(struct btf_verifier_env *env,
static int btf_check_all_types(struct btf_verifier_env *env)
{
struct btf *btf = env->btf;
- u32 type_id;
+ const struct btf_type *t;
+ u32 type_id, i;
int err;
err = env_resolve_init(env);
@@ -3897,8 +3961,9 @@ static int btf_check_all_types(struct btf_verifier_env *env)
return err;
env->phase++;
- for (type_id = 1; type_id <= btf->nr_types; type_id++) {
- const struct btf_type *t = btf_type_by_id(btf, type_id);
+ for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) {
+ type_id = btf->start_id + i;
+ t = btf_type_by_id(btf, type_id);
env->log_type_id = type_id;
if (btf_type_needs_resolve(t) &&
@@ -3935,7 +4000,7 @@ static int btf_parse_type_sec(struct btf_verifier_env *env)
return -EINVAL;
}
- if (!hdr->type_len) {
+ if (!env->btf->base_btf && !hdr->type_len) {
btf_verifier_log(env, "No type found");
return -EINVAL;
}
@@ -3962,13 +4027,18 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return -EINVAL;
}
- if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
- start[0] || end[-1]) {
+ btf->strings = start;
+
+ if (btf->base_btf && !hdr->str_len)
+ return 0;
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) {
+ btf_verifier_log(env, "Invalid string section");
+ return -EINVAL;
+ }
+ if (!btf->base_btf && start[0]) {
btf_verifier_log(env, "Invalid string section");
return -EINVAL;
}
-
- btf->strings = start;
return 0;
}
@@ -4363,6 +4433,8 @@ struct btf *btf_parse_vmlinux(void)
btf->data = __start_BTF;
btf->data_size = __stop_BTF - __start_BTF;
+ btf->kernel_btf = true;
+ snprintf(btf->name, sizeof(btf->name), "vmlinux");
err = btf_parse_hdr(env);
if (err)
@@ -4388,6 +4460,81 @@ struct btf *btf_parse_vmlinux(void)
bpf_struct_ops_init(btf, log);
+ refcount_set(&btf->refcnt, 1);
+
+ err = btf_alloc_id(btf);
+ if (err)
+ goto errout;
+
+ btf_verifier_env_free(env);
+ return btf;
+
+errout:
+ btf_verifier_env_free(env);
+ if (btf) {
+ kvfree(btf->types);
+ kfree(btf);
+ }
+ return ERR_PTR(err);
+}
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+
+static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size)
+{
+ struct btf_verifier_env *env = NULL;
+ struct bpf_verifier_log *log;
+ struct btf *btf = NULL, *base_btf;
+ int err;
+
+ base_btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(base_btf))
+ return base_btf;
+ if (!base_btf)
+ return ERR_PTR(-EINVAL);
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+ if (!env)
+ return ERR_PTR(-ENOMEM);
+
+ log = &env->log;
+ log->level = BPF_LOG_KERNEL;
+
+ btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+ if (!btf) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ env->btf = btf;
+
+ btf->base_btf = base_btf;
+ btf->start_id = base_btf->nr_types;
+ btf->start_str_off = base_btf->hdr.str_len;
+ btf->kernel_btf = true;
+ snprintf(btf->name, sizeof(btf->name), "%s", module_name);
+
+ btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
+ if (!btf->data) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ memcpy(