From e2b2cd592adbd303bcc02451d32fedd511000fb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:38 +0200 Subject: bpf: Add missed value to kprobe_multi link info Add missed value to kprobe_multi link info to hold the stats of missed kprobe_multi probe. The missed counter gets incremented when fprobe fails the recursion check or there's no rethook available for return probe. In either case the attached bpf program is not executed. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Song Liu Reviewed-by: Song Liu Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20230920213145.1941596-3-jolsa@kernel.org --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5f13db15a3c7..1da5b1bcce71 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6532,6 +6532,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ -- cgit v1.2.3 From 3acf8ace68230e9558cf916847f1cc9f208abdf1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:39 +0200 Subject: bpf: Add missed value to kprobe perf link info Add missed value to kprobe attached through perf link info to hold the stats of missed kprobe handler execution. The kprobe's missed counter gets incremented when kprobe handler is not executed due to another kprobe running on the same cpu. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230920213145.1941596-4-jolsa@kernel.org --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1da5b1bcce71..70bfa997e896 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6548,6 +6548,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ -- cgit v1.2.3 From d6247ecb6c1e17d7a33317090627f5bfe563cbb2 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 4 Oct 2023 11:23:38 -0500 Subject: bpf: Add ability to pin bpf timer to calling CPU BPF supports creating high resolution timers using bpf_timer_* helper functions. Currently, only the BPF_F_TIMER_ABS flag is supported, which specifies that the timeout should be interpreted as absolute time. It would also be useful to be able to pin that timer to a core. For example, if you wanted to make a subset of cores run without timer interrupts, and only have the timer be invoked on a single core. This patch adds support for this with a new BPF_F_TIMER_CPU_PIN flag. When specified, the HRTIMER_MODE_PINNED flag is passed to hrtimer_start(). A subsequent patch will update selftests to validate. Signed-off-by: David Vernet Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20231004162339.200702-2-void@manifault.com --- tools/include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 70bfa997e896..a7d4a1a69f21 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5096,6 +5096,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -7309,9 +7311,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ -- cgit v1.2.3 From dab4e1f06cabb6834de14264394ccab197007302 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Sat, 7 Oct 2023 10:14:14 +0200 Subject: bpf: Derive source IP addr via bpf_*_fib_lookup() Extend the bpf_fib_lookup() helper by making it to return the source IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. For example, the following snippet can be used to derive the desired source IP address: struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; ret = bpf_skb_fib_lookup(skb, p, sizeof(p), BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); if (ret != BPF_FIB_LKUP_RET_SUCCESS) return TC_ACT_SHOT; /* the p.ipv4_src now contains the source address */ The inability to derive the proper source address may cause malfunctions in BPF-based dataplanes for hosts containing netdevs with more than one routable IP address or for multi-homed hosts. For example, Cilium implements packet masquerading in BPF. If an egressing netdev to which the Cilium's BPF prog is attached has multiple IP addresses, then only one [hardcoded] IP address can be used for masquerading. This breaks connectivity if any other IP address should have been selected instead, for example, when a public and private addresses are attached to the same egress interface. The change was tested with Cilium [1]. Nikolay Aleksandrov helped to figure out the IPv6 addr selection. [1]: https://github.com/cilium/cilium/pull/28283 Signed-off-by: Martynas Pumputis Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt Signed-off-by: Martin KaFai Lau --- tools/include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a7d4a1a69f21..e0aa457f94a9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3264,6 +3264,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6964,6 +6969,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6976,6 +6982,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ -- cgit v1.2.3 From 859051dd165ec6cc915f0f2114699021144fd249 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:06 +0200 Subject: bpf: Implement cgroup sockaddr hooks for unix sockets These hooks allows intercepting connect(), getsockname(), getpeername(), sendmsg() and recvmsg() for unix sockets. The unix socket hooks get write access to the address length because the address length is not fixed when dealing with unix sockets and needs to be modified when a unix socket address is modified by the hook. Because abstract socket unix addresses start with a NUL byte, we cannot recalculate the socket address in kernelspace after running the hook by calculating the length of the unix socket path using strlen(). These hooks can be used when users want to multiplex syscall to a single unix socket to multiple different processes behind the scenes by redirecting the connect() and other syscalls to process specific sockets. We do not implement support for intercepting bind() because when using bind() with unix sockets with a pathname address, this creates an inode in the filesystem which must be cleaned up. If we rewrite the address, the user might try to clean up the wrong file, leaking the socket in the filesystem where it is never cleaned up. Until we figure out a solution for this (and a use case for intercepting bind()), we opt to not allow rewriting the sockaddr in bind() calls. We also implement recvmsg() support for connected streams so that after a connect() that is modified by a sockaddr hook, any corresponding recmvsg() on the connected socket can also be modified to make the connected program think it is connected to the "intended" remote. Reviewed-by: Kuniyuki Iwashima Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-5-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- tools/include/uapi/linux/bpf.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e0aa457f94a9..7ba61b75bc0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by -- cgit v1.2.3