summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/map_cgrp_storage.rst109
-rw-r--r--Documentation/bpf/maps.rst101
-rw-r--r--arch/arm64/net/bpf_jit_comp.c9
-rw-r--r--arch/x86/net/bpf_jit_comp.c125
-rw-r--r--include/linux/bpf.h33
-rw-r--r--include/linux/bpf_local_storage.h17
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/bpf_verifier.h15
-rw-r--r--include/linux/btf_ids.h1
-rw-r--r--include/linux/cgroup-defs.h4
-rw-r--r--include/linux/module.h9
-rw-r--r--include/uapi/linux/bpf.h50
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c247
-rw-r--r--kernel/bpf/bpf_inode_storage.c38
-rw-r--r--kernel/bpf/bpf_local_storage.c191
-rw-r--r--kernel/bpf/bpf_task_storage.c157
-rw-r--r--kernel/bpf/cgroup_iter.c2
-rw-r--r--kernel/bpf/cpumap.c20
-rw-r--r--kernel/bpf/helpers.c6
-rw-r--r--kernel/bpf/syscall.c12
-rw-r--r--kernel/bpf/trampoline.c80
-rw-r--r--kernel/bpf/verifier.c29
-rw-r--r--kernel/cgroup/cgroup.c1
-rw-r--r--kernel/module/kallsyms.c2
-rw-r--r--kernel/trace/bpf_trace.c107
-rw-r--r--kernel/trace/ftrace.c16
-rw-r--r--net/core/bpf_sk_storage.c35
-rw-r--r--samples/bpf/README.rst6
-rw-r--r--samples/bpf/hbm_edt_kern.c2
-rw-r--r--samples/bpf/xdp1_user.c2
-rw-r--r--samples/bpf/xdp2_kern.c4
-rwxr-xr-xscripts/bpf_doc.py2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst15
-rw-r--r--tools/bpf/bpftool/Documentation/common_options.rst8
-rw-r--r--tools/bpf/bpftool/Makefile74
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool1
-rw-r--r--tools/bpf/bpftool/common.c12
-rw-r--r--tools/bpf/bpftool/iter.c2
-rw-r--r--tools/bpf/bpftool/jit_disasm.c261
-rw-r--r--tools/bpf/bpftool/main.c90
-rw-r--r--tools/bpf/bpftool/main.h32
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/bpf/bpftool/net.c2
-rw-r--r--tools/bpf/bpftool/perf.c2
-rw-r--r--tools/bpf/bpftool/prog.c99
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c2
-rw-r--r--tools/include/uapi/linux/bpf.h50
-rw-r--r--tools/lib/bpf/btf.c8
-rw-r--r--tools/lib/bpf/libbpf.c178
-rw-r--r--tools/lib/bpf/libbpf_probes.c1
-rw-r--r--tools/lib/bpf/usdt.c16
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch6481
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile8
-rw-r--r--tools/testing/selftests/bpf/README.rst42
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c24
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/config.aarch64181
-rw-r--r--tools/testing/selftests/bpf/config.s390x3
-rw-r--r--tools/testing/selftests/bpf/config.x86_641
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c171
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c164
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c21
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c101
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_negative.c26
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c70
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c88
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi.c50
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c43
-rw-r--r--tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c17
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh7
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py8
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh17
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh3
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh5
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh5
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh8
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c20
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/verifier/jit.c24
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh6
96 files changed, 3203 insertions, 640 deletions
diff --git a/Documentation/bpf/map_cgrp_storage.rst b/Documentation/bpf/map_cgrp_storage.rst
new file mode 100644
index 000000000000..5d3f603efffa
--- /dev/null
+++ b/Documentation/bpf/map_cgrp_storage.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Meta Platforms, Inc. and affiliates.
+
+=========================
+BPF_MAP_TYPE_CGRP_STORAGE
+=========================
+
+The ``BPF_MAP_TYPE_CGRP_STORAGE`` map type represents a local fix-sized
+storage for cgroups. It is only available with ``CONFIG_CGROUPS``.
+The programs are made available by the same Kconfig. The
+data for a particular cgroup can be retrieved by looking up the map
+with that cgroup.
+
+This document describes the usage and semantics of the
+``BPF_MAP_TYPE_CGRP_STORAGE`` map type.
+
+Usage
+=====
+
+The map key must be ``sizeof(int)`` representing a cgroup fd.
+To access the storage in a program, use ``bpf_cgrp_storage_get``::
+
+ void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
+
+``flags`` could be 0 or ``BPF_LOCAL_STORAGE_GET_F_CREATE`` which indicates that
+a new local storage will be created if one does not exist.
+
+The local storage can be removed with ``bpf_cgrp_storage_delete``::
+
+ long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
+
+The map is available to all program types.
+
+Examples
+========
+
+A BPF program example with BPF_MAP_TYPE_CGRP_STORAGE::
+
+ #include <vmlinux.h>
+ #include <bpf/bpf_helpers.h>
+ #include <bpf/bpf_tracing.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+ } cgrp_storage SEC(".maps");
+
+ SEC("tp_btf/sys_enter")
+ int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+ {
+ struct task_struct *task = bpf_get_current_task_btf();
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&cgrp_storage, task->cgroups->dfl_cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ __sync_fetch_and_add(ptr, 1);
+
+ return 0;
+ }
+
+Userspace accessing map declared above::
+
+ #include <linux/bpf.h>
+ #include <linux/libbpf.h>
+
+ __u32 map_lookup(struct bpf_map *map, int cgrp_fd)
+ {
+ __u32 *value;
+ value = bpf_map_lookup_elem(bpf_map__fd(map), &cgrp_fd);
+ if (value)
+ return *value;
+ return 0;
+ }
+
+Difference Between BPF_MAP_TYPE_CGRP_STORAGE and BPF_MAP_TYPE_CGROUP_STORAGE
+============================================================================
+
+The old cgroup storage map ``BPF_MAP_TYPE_CGROUP_STORAGE`` has been marked as
+deprecated (renamed to ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``). The new
+``BPF_MAP_TYPE_CGRP_STORAGE`` map should be used instead. The following
+illusates the main difference between ``BPF_MAP_TYPE_CGRP_STORAGE`` and
+``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``.
+
+(1). ``BPF_MAP_TYPE_CGRP_STORAGE`` can be used by all program types while
+ ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` is available only to cgroup program types
+ like BPF_CGROUP_INET_INGRESS or BPF_CGROUP_SOCK_OPS, etc.
+
+(2). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports local storage for more than one
+ cgroup while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only supports one cgroup
+ which is attached by a BPF program.
+
+(3). ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` allocates local storage at attach time so
+ ``bpf_get_local_storage()`` always returns non-NULL local storage.
+ ``BPF_MAP_TYPE_CGRP_STORAGE`` allocates local storage at runtime so
+ it is possible that ``bpf_cgrp_storage_get()`` may return null local storage.
+ To avoid such null local storage issue, user space can do
+ ``bpf_map_update_elem()`` to pre-allocate local storage before a BPF program
+ is attached.
+
+(4). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports deleting local storage by a BPF program
+ while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only deletes storage during
+ prog detach time.
+
+So overall, ``BPF_MAP_TYPE_CGRP_STORAGE`` supports all ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``
+functionality and beyond. It is recommended to use ``BPF_MAP_TYPE_CGRP_STORAGE``
+instead of ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``.
diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst
index f41619e312ac..4906ff0f8382 100644
--- a/Documentation/bpf/maps.rst
+++ b/Documentation/bpf/maps.rst
@@ -1,52 +1,81 @@
-=========
-eBPF maps
+========
+BPF maps
+========
+
+BPF 'maps' provide generic storage of different types for sharing data between
+kernel and user space. There are several storage types available, including
+hash, array, bloom filter and radix-tree. Several of the map types exist to
+support specific BPF helpers that perform actions based on the map contents. The
+maps are accessed from BPF programs via BPF helpers which are documented in the
+`man-pages`_ for `bpf-helpers(7)`_.
+
+BPF maps are accessed from user space via the ``bpf`` syscall, which provides
+commands to create maps, lookup elements, update elements and delete
+elements. More details of the BPF syscall are available in
+:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_.
+
+Map Types
=========
-'maps' is a generic storage of different types for sharing data between kernel
-and userspace.
+.. toctree::
+ :maxdepth: 1
+ :glob:
-The maps are accessed from user space via BPF syscall, which has commands:
+ map_*
-- create a map with given type and attributes
- ``map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)``
- using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
- returns process-local file descriptor or negative error
+Usage Notes
+===========
-- lookup key in a given map
- ``err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)``
- using attr->map_fd, attr->key, attr->value
- returns zero and stores found elem into value or negative error
+.. c:function::
+ int bpf(int command, union bpf_attr *attr, u32 size)
-- create or update key/value pair in a given map
- ``err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)``
- using attr->map_fd, attr->key, attr->value
- returns zero or negative error
+Use the ``bpf()`` system call to perform the operation specified by
+``command``. The operation takes parameters provided in ``attr``. The ``size``
+argument is the size of the ``union bpf_attr`` in ``attr``.
-- find and delete element by key in a given map
- ``err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)``
- using attr->map_fd, attr->key
+**BPF_MAP_CREATE**
-- to delete map: close(fd)
- Exiting process will delete maps automatically
+Create a map with the desired type and attributes in ``attr``:
-userspace programs use this syscall to create/access maps that eBPF programs
-are concurrently updating.
+.. code-block:: c
-maps can have different types: hash, array, bloom filter, radix-tree, etc.
+ int fd;
+ union bpf_attr attr = {
+ .map_type = BPF_MAP_TYPE_ARRAY; /* mandatory */
+ .key_size = sizeof(__u32); /* mandatory */
+ .value_size = sizeof(__u32); /* mandatory */
+ .max_entries = 256; /* mandatory */
+ .map_flags = BPF_F_MMAPABLE;
+ .map_name = "example_array";
+ };
-The map is defined by:
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
- - type
- - max number of elements
- - key size in bytes
- - value size in bytes
+Returns a process-local file descriptor on success, or negative error in case of
+failure. The map can be deleted by calling ``close(fd)``. Maps held by open
+file descriptors will be deleted automatically when a process exits.
-Map Types
-=========
+.. note:: Valid characters for ``map_name`` are ``A-Z``, ``a-z``, ``0-9``,
+ ``'_'`` and ``'.'``.
-.. toctree::
- :maxdepth: 1
- :glob:
+**BPF_MAP_LOOKUP_ELEM**
+
+Lookup key in a given map using ``attr->map_fd``, ``attr->key``,
+``attr->value``. Returns zero and stores found elem into ``attr->value`` on
+success, or negative error on failure.
+
+**BPF_MAP_UPDATE_ELEM**
+
+Create or update key/value pair in a given map using ``attr->map_fd``, ``attr->key``,
+``attr->value``. Returns zero on success or negative error on failure.
+
+**BPF_MAP_DELETE_ELEM**
+
+Find and delete element by key in a given map using ``attr->map_fd``,
+``attr->key``. Returns zero on success or negative error on failure.
- map_* \ No newline at end of file
+.. Links:
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
+.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 30f76178608b..62f805f427b7 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1649,13 +1649,8 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
struct bpf_prog *p = l->link.prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- if (p->aux->sleepable) {
- enter_prog = (u64)__bpf_prog_enter_sleepable;
- exit_prog = (u64)__bpf_prog_exit_sleepable;
- } else {
- enter_prog = (u64)__bpf_prog_enter;
- exit_prog = (u64)__bpf_prog_exit;
- }
+ enter_prog = (u64)bpf_trampoline_enter(p);
+ exit_prog = (u64)bpf_trampoline_exit(p);
if (l->cookie == 0) {
/* if cookie is zero, one instruction is enough to store it */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 51afd6d0c05f..cec5195602bc 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -904,6 +904,65 @@ static void emit_nops(u8 **pprog, int len)
*pprog = prog;
}
+/* emit the 3-byte VEX prefix
+ *
+ * r: same as rex.r, extra bit for ModRM reg field
+ * x: same as rex.x, extra bit for SIB index field