diff options
70 files changed, 1935 insertions, 442 deletions
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst index 6d7770793fab..07433915aa41 100644 --- a/Documentation/bpf/bpf_iterators.rst +++ b/Documentation/bpf/bpf_iterators.rst @@ -238,11 +238,8 @@ The following is the breakdown for each field in struct ``bpf_iter_reg``. that the kernel function cond_resched() is called to avoid other kernel subsystem (e.g., rcu) misbehaving. * - seq_info - - Specifies certain action requests in the kernel BPF iterator - infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means - that the kernel function cond_resched() is called to avoid other kernel - subsystem (e.g., rcu) misbehaving. - + - Specifies the set of seq operations for the BPF iterator and helpers to + initialize/free the private data for the corresponding ``seq_file``. `Click here <https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_ diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index 41efd8874eeb..3139c7c02e79 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -351,14 +351,15 @@ In addition to the above kfuncs, there is also a set of read-only kfuncs that can be used to query the contents of cpumasks. .. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_test_cpu + :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and + bpf_cpumask_test_cpu .. kernel-doc:: kernel/bpf/cpumask.c :identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset bpf_cpumask_empty bpf_cpumask_full .. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_any bpf_cpumask_any_and + :identifiers: bpf_cpumask_any_distribute bpf_cpumask_any_and_distribute ---- diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 7a3d9de5f315..0d2647fb358d 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -227,23 +227,49 @@ absolutely no ABI stability guarantees. As mentioned above, a nested pointer obtained from walking a trusted pointer is no longer trusted, with one exception. If a struct type has a field that is -guaranteed to be valid as long as its parent pointer is trusted, the -``BTF_TYPE_SAFE_NESTED`` macro can be used to express that to the verifier as -follows: +guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long +as its parent pointer is valid, the following macros can be used to express +that to the verifier: + +* ``BTF_TYPE_SAFE_TRUSTED`` +* ``BTF_TYPE_SAFE_RCU`` +* ``BTF_TYPE_SAFE_RCU_OR_NULL`` + +For example, + +.. code-block:: c + + BTF_TYPE_SAFE_TRUSTED(struct socket) { + struct sock *sk; + }; + +or .. code-block:: c - BTF_TYPE_SAFE_NESTED(struct task_struct) { + BTF_TYPE_SAFE_RCU(struct task_struct) { const cpumask_t *cpus_ptr; + struct css_set __rcu *cgroups; + struct task_struct __rcu *real_parent; + struct task_struct *group_leader; }; In other words, you must: -1. Wrap the trusted pointer type in the ``BTF_TYPE_SAFE_NESTED`` macro. +1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro. -2. Specify the type and name of the trusted nested field. This field must match +2. Specify the type and name of the valid nested field. This field must match the field in the original type definition exactly. +A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so +that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)`` +is emitted in the ``type_is_trusted()`` function as follows: + +.. code-block:: c + + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); + + 2.4.5 KF_SLEEPABLE flag ----------------------- diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5b11a3b0fec0..f70f9ac884d2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -313,11 +313,6 @@ struct bpf_idx_pair { u32 idx; }; -struct bpf_id_pair { - u32 old; - u32 cur; -}; - #define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) @@ -557,6 +552,21 @@ struct backtrack_state { u64 stack_masks[MAX_CALL_FRAMES]; }; +struct bpf_id_pair { + u32 old; + u32 cur; +}; + +struct bpf_idmap { + u32 tmp_id_gen; + struct bpf_id_pair map[BPF_ID_MAP_SIZE]; +}; + +struct bpf_idset { + u32 count; + u32 ids[BPF_ID_MAP_SIZE]; +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -588,7 +598,10 @@ struct bpf_verifier_env { const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; - struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE]; + union { + struct bpf_idmap idmap_scratch; + struct bpf_idset idset_scratch; + }; struct { int *insn_state; int *insn_stack; diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index 2ae3c8e1d83c..736ded4905e0 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -11,7 +11,6 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); -void bpfilter_umh_cleanup(struct umd_info *info); struct bpfilter_umh_ops { struct umd_info info; diff --git a/include/linux/filter.h b/include/linux/filter.h index bbce89937fde..f69114083ec7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -874,7 +874,6 @@ void bpf_prog_free(struct bpf_prog *fp); bool bpf_opcode_in_insntable(u8 code); -void bpf_prog_free_linfo(struct bpf_prog *prog); void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const u32 *insn_to_jit_off); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index acf706d49c2b..b828c7a75be2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5073,6 +5073,15 @@ static inline bool netif_is_l3_slave(const struct net_device *dev) return dev->priv_flags & IFF_L3MDEV_SLAVE; } +static inline int dev_sdif(const struct net_device *dev) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + if (netif_is_l3_slave(dev)) + return dev->ifindex; +#endif + return 0; +} + static inline bool netif_is_bridge_master(const struct net_device *dev) { return dev->priv_flags & IFF_EBRIDGE; diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 9c0d860609ba..c243f906ebed 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -255,10 +255,6 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } -static inline void xsk_buff_discard(struct xdp_buff *xdp) -{ -} - static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6961a7b70028..60a9d59beeab 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3178,6 +3178,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6832,6 +6836,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6892,9 +6897,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 540331b610a9..addf3dd57b59 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -86,9 +86,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct bpf_bloom_filter *bloom; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 47d9948d768f..b5149cfce7d4 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -723,9 +723,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) !attr->btf_key_type_id || !attr->btf_value_type_id) return -EINVAL; - if (!bpf_capable()) - return -EPERM; - if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) return -E2BIG; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d3f0a4825fa6..116a0ce378ec 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -655,9 +655,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) const struct btf_type *t, *vt; struct bpf_map *map; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); if (!st_ops) return ERR_PTR(-ENOTSUPP); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bbcae434fda5..29fe21099298 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -492,25 +492,26 @@ static bool btf_type_is_fwd(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; } -static bool btf_type_nosize(const struct btf_type *t) +static bool btf_type_is_datasec(const struct btf_type *t) { - return btf_type_is_void(t) || btf_type_is_fwd(t) || - btf_type_is_func(t) || btf_type_is_func_proto(t); + return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_nosize_or_null(const struct btf_type *t) +static bool btf_type_is_decl_tag(const struct btf_type *t) { - return !t || btf_type_nosize(t); + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } -static bool btf_type_is_datasec(const struct btf_type *t) +static bool btf_type_nosize(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; + return btf_type_is_void(t) || btf_type_is_fwd(t) || + btf_type_is_func(t) || btf_type_is_func_proto(t) || + btf_type_is_decl_tag(t); } -static bool btf_type_is_decl_tag(const struct btf_type *t) +static bool btf_type_nosize_or_null(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; + return !t || btf_type_nosize(t); } static bool btf_type_is_decl_tag_target(const struct btf_type *t) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7421487422d4..dc85240a0134 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2064,14 +2064,16 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), -static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, - const struct bpf_insn *insn) = { +static __maybe_unused +u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, + const struct bpf_insn *insn) = { EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST +#ifdef CONFIG_BPF_SYSCALL void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) { stack_depth = max_t(u32, stack_depth, 1); @@ -2080,7 +2082,7 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) __bpf_call_base_args; insn->code = BPF_JMP | BPF_CALL_ARGS; } - +#endif #else static unsigned int __bpf_prog_ret0_warn(const void *ctx, const struct bpf_insn *insn) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8ec18faa74ac..8a33e8747a0e 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -28,7 +28,6 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/capability.h> #include <trace/events/xdp.h> #include <linux/btf_ids.h> @@ -89,9 +88,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) u32 value_size = attr->value_size; struct bpf_cpu_map *cmap; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || (value_size != offsetofend(struct bpf_cpumap_val, qsize) && diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 7efdf5d770ca..938a60ff4295 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -132,6 +132,21 @@ __bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) } /** + * bpf_cpumask_first_and() - Return the index of the first nonzero bit from the + * AND of two cpumasks. + * @src1: The first cpumask. + * @src2: The second cpumask. + * + * Find the index of the first nonzero bit of the AND of two cpumasks. + * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. + */ +__bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1, + const struct cpumask *src2) +{ + return cpumask_first_and(src1, src2); +} + +/** * bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask. * @cpu: The CPU to be set in the cpumask. * @cpumask: The BPF cpumask in which a bit is being set. @@ -367,7 +382,7 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask } /** - * bpf_cpumask_any() - Return a random set CPU from a cpumask. + * bpf_cpumask_any_distribute() - Return a random set CPU from a cpumask. * @cpumask: The cpumask being queried. * * Return: @@ -376,26 +391,28 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask * * A struct bpf_cpumask pointer may be safely passed to @src. */ -__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask) +__bpf_kfunc u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) { - return cpumask_any(cpumask); + return cpumask_any_distribute(cpumask); } /** - * bpf_cpumask_any_and() - Return a random set CPU from the AND of two - * cpumasks. + * bpf_cpumask_any_and_distribute() - Return a random set CPU from the AND of + * two cpumasks. * @src1: The first cpumask. * @src2: The second cpumask. * * Return: - * * A random set bit within [0, num_cpus) if at least one bit is set. + * * A random set bit within [0, num_cpus) from the AND of two cpumasks, if at + * least one bit is set. * * >= num_cpus if no bit is set. * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, + const struct cpumask *src2) { - return cpumask_any_and(src1, src2); + return cpumask_any_and_distribute(src1, src2); } __diag_pop(); @@ -406,6 +423,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_first_and, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU) @@ -422,8 +440,8 @@ BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) -BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU) -BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_SET8_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_ |
