Merge branch 'bpf: add netfilter program type'

Florian Westphal says: ==================== Changes since last version: - rework test case in last patch wrt. ctx->skb dereference etc (Alexei) - pacify bpf ci tests, netfilter program type missed string translation in libbpf helper. This still uses runtime btf walk rather than extending the btf trace array as Alexei suggested, I would do this later (or someone else can). v1 cover letter: Add minimal support to hook bpf programs to netfilter hooks, e.g. PREROUTING or FORWARD. For this the most relevant parts for registering a netfilter hook via the in-kernel api are exposed to userspace via bpf_link. The new program type is 'tracing style', i.e. there is no context access rewrite done by verifier, the function argument (struct bpf_nf_ctx) isn't stable. There is no support for direct packet access, dynptr api should be used instead. With this its possible to build a small test program such as: #include "vmlinux.h" extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; SEC("netfilter") int nf_test(struct bpf_nf_ctx *ctx) { struct nf_hook_state *state = ctx->state; struct sk_buff *skb = ctx->skb; const struct iphdr *iph, _iph; const struct tcphdr *th, _th; struct bpf_dynptr ptr; if (bpf_dynptr_from_skb(skb, 0, &ptr)) return NF_DROP; iph = bpf_dynptr_slice(&ptr, 0, &_iph, sizeof(_iph)); if (!iph) return NF_DROP; th = bpf_dynptr_slice(&ptr, iph->ihl << 2, &_th, sizeof(_th)); if (!th) return NF_DROP; bpf_printk("accept %x:%d->%x:%d, hook %d ifin %d\n", iph->saddr, bpf_ntohs(th->source), iph->daddr, bpf_ntohs(th->dest), state->hook, state->in->ifindex); return NF_ACCEPT; } Then, tail /sys/kernel/tracing/trace_pipe. Changes since v3: - uapi: remove 'reserved' struct member, s/prio/priority (Alexei) - add ctx access test cases (Alexei, see last patch) - some arm32 can only handle cmpxchg on u32 (build bot) - Fix kdoc annotations (Simon Horman) - bpftool: prefer p_err, not fprintf (Quentin) - add test cases in separate patch Changes since v2: 1. don't WARN when user calls 'bpftool loink detach' twice restrict attachment to ip+ip6 families, lets relax this later in case arp/bridge/netdev are needed too. 2. show netfilter links in 'bpftool net' output as well. Changes since v1: 1. Don't fail to link when CONFIG_NETFILTER=n (build bot) 2. Use test_progs instead of test_verifier (Alexei) Changes since last RFC version: 1. extend 'bpftool link show' to print prio/hooknum etc 2. extend 'nft list hooks' so it can print the bpf program id 3. Add an extra patch to artificially restrict bpf progs with same priority. Its fine from a technical pov but it will cause ordering issues (most recent one comes first). Can be removed later. 4. Add test_run support for netfilter prog type and a small extension to verifier tests to make sure we can't return verdicts like NF_STOLEN. 5. Alter the netfilter part of the bpf_link uapi struct: - add flags/reserved members. Not used here except returning errors when they are nonzero. Plan is to allow the bpf_link users to enable netfilter defrag or conntrack engine by setting feature flags at link create time in the future. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
author: Alexei Starovoitov <ast@kernel.org> 2023-04-21 11:35:51 -0700
committer: Alexei Starovoitov <ast@kernel.org> 2023-04-21 11:37:41 -0700
commit: d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4 (patch)
tree: a3b43374a8f86d3ae74143bf60a2d493f4ea01af /tools/bpf
parent: 45cea721ea36d83969473d2abd29bcc2321cacdd (diff)
parent: 006c0e44ed924140d44bc756e6ea36301fcea68d (diff)
download: linux-d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4.tar.gz
linux-d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4.tar.bz2
linux-d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4.zip
3 files changed, 192 insertions, 0 deletions
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index f985b79cca27..d98dbc50cf4c 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -3,6 +3,8 @@
 
 #include <errno.h>
 #include <linux/err.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
 #include <net/if.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -135,6 +137,18 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
 	}
 }
 
+void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr)
+{
+	jsonw_uint_field(json_wtr, "pf",
+			 info->netfilter.pf);
+	jsonw_uint_field(json_wtr, "hook",
+			 info->netfilter.hooknum);
+	jsonw_int_field(json_wtr, "prio",
+			 info->netfilter.priority);
+	jsonw_uint_field(json_wtr, "flags",
+			 info->netfilter.flags);
+}
+
 static int get_prog_info(int prog_id, struct bpf_prog_info *info)
 {
 	__u32 len = sizeof(*info);
@@ -195,6 +209,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 				 info->netns.netns_ino);
 		show_link_attach_type_json(info->netns.attach_type, json_wtr);
 		break;
+	case BPF_LINK_TYPE_NETFILTER:
+		netfilter_dump_json(info, json_wtr);
+		break;
+
 	default:
 		break;
 	}
@@ -263,6 +281,68 @@ static void show_iter_plain(struct bpf_link_info *info)
 	}
 }
 
+static const char * const pf2name[] = {
+	[NFPROTO_INET] = "inet",
+	[NFPROTO_IPV4] = "ip",
+	[NFPROTO_ARP] = "arp",
+	[NFPROTO_NETDEV] = "netdev",
+	[NFPROTO_BRIDGE] = "bridge",
+	[NFPROTO_IPV6] = "ip6",
+};
+
+static const char * const inethook2name[] = {
+	[NF_INET_PRE_ROUTING] = "prerouting",
+	[NF_INET_LOCAL_IN] = "input",
+	[NF_INET_FORWARD] = "forward",
+	[NF_INET_LOCAL_OUT] = "output",
+	[NF_INET_POST_ROUTING] = "postrouting",
+};
+
+static const char * const arphook2name[] = {
+	[NF_ARP_IN] = "input",
+	[NF_ARP_OUT] = "output",
+};
+
+void netfilter_dump_plain(const struct bpf_link_info *info)
+{
+	const char *hookname = NULL, *pfname = NULL;
+	unsigned int hook = info->netfilter.hooknum;
+	unsigned int pf = info->netfilter.pf;
+
+	if (pf < ARRAY_SIZE(pf2name))
+		pfname = pf2name[pf];
+
+	switch (pf) {
+	case NFPROTO_BRIDGE: /* bridge shares numbers with enum nf_inet_hooks */
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		if (hook < ARRAY_SIZE(inethook2name))
+			hookname = inethook2name[hook];
+		break;
+	case NFPROTO_ARP:
+		if (hook < ARRAY_SIZE(arphook2name))
+			hookname = arphook2name[hook];
+	default:
+		break;
+	}
+
+	if (pfname)
+		printf("\n\t%s", pfname);
+	else
+		printf("\n\tpf: %d", pf);
+
+	if (hookname)
+		printf(" %s", hookname);
+	else
+		printf(", hook %u,", hook);
+
+	printf(" prio %d", info->netfilter.priority);
+
+	if (info->netfilter.flags)
+		printf(" flags 0x%x", info->netfilter.flags);
+}
+
 static int show_link_close_plain(int fd, struct bpf_link_info *info)
 {
 	struct bpf_prog_info prog_info;
@@ -301,6 +381,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 		printf("\n\tnetns_ino %u  ", info->netns.netns_ino);
 		show_link_attach_type_plain(info->netns.attach_type);
 		break;
+	case BPF_LINK_TYPE_NETFILTER:
+		netfilter_dump_plain(info);
+		break;
 	default:
 		break;
 	}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9a95788e654d..a49534d7eafa 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -267,4 +267,7 @@ static inline bool hashmap__empty(struct hashmap *map)
 int pathname_concat(char *buf, int buf_sz, const char *path,
 		    const char *name);
 
+/* print netfilter bpf_link info */
+void netfilter_dump_plain(const struct bpf_link_info *info);
+void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr);
 #endif
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index c40e44c938ae..26a49965bf71 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -647,6 +647,108 @@ static int do_detach(int argc, char **argv)
 	return 0;
 }
 
+static int netfilter_link_compar(const void *a, const void *b)
+{
+	const struct bpf_link_info *nfa = a;
+	const struct bpf_link_info *nfb = b;
+	int delta;
+
+	delta = nfa->netfilter.pf - nfb->netfilter.pf;
+	if (delta)
+		return delta;
+
+	delta = nfa->netfilter.hooknum - nfb->netfilter.hooknum;
+	if (delta)
+		return delta;
+
+	if (nfa->netfilter.priority < nfb->netfilter.priority)
+		return -1;
+	if (nfa->netfilter.priority > nfb->netfilter.priority)
+		return 1;
+
+	return nfa->netfilter.flags - nfb->netfilter.flags;
+}
+
+static void show_link_netfilter(void)
+{
+	unsigned int nf_link_len = 0, nf_link_count = 0;
+	struct bpf_link_info *nf_link_info = NULL;
+	__u32 id = 0;
+
+	while (true) {
+		struct bpf_link_info info;
+		int fd, err;
+		__u32 len;
+
+		err = bpf_link_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT)
+				break;
+			p_err("can't get next link: %s (id %d)", strerror(errno), id);
+			break;
+		}
+
+		fd = bpf_link_get_fd_by_id(id);
+		if (fd < 0) {
+			p_err("can't get link by id (%u): %s", id, strerror(errno));
+			continue;
+		}
+
+		memset(&info, 0, sizeof(info));
+		len = sizeof(info);
+
+		err = bpf_link_get_info_by_fd(fd, &info, &len);
+
+		close(fd);
+
+		if (err) {
+			p_err("can't get link info for fd %d: %s", fd, strerror(errno));
+			continue;
+		}
+
+		if (info.type != BPF_LINK_TYPE_NETFILTER)
+			continue;
+
+		if (nf_link_count >= nf_link_len) {
+			static const unsigned int max_link_count = INT_MAX / sizeof(info);
+			struct bpf_link_info *expand;
+
+			if (nf_link_count > max_link_count) {
+				p_err("cannot handle more than %u links\n", max_link_count);
+				break;
+			}
+
+			nf_link_len += 16;
+
+			expand = realloc(nf_link_info, nf_link_len * sizeof(info));
+			if (!expand) {
+				p_err("realloc: %s",  strerror(errno));
+				break;
+			}
+
+			nf_link_info = expand;
+		}
+
+		nf_link_info[nf_link_count] = info;
+		nf_link_count++;
+	}
+
+	qsort(nf_link_info, nf_link_count, sizeof(*nf_link_info), netfilter_link_compar);
+
+	for (id = 0; id < nf_link_count; id++) {
+		NET_START_OBJECT;
+		if (json_output)
+			netfilter_dump_json(&nf_link_info[id], json_wtr);
+		else
+			netfilter_dump_plain(&nf_link_info[id]);
+
+		NET_DUMP_UINT("id", " prog_id %u", nf_link_info[id].prog_id);
+		NET_END_OBJECT;
+	}
+
+	free(nf_link_info);
+}
+
 static int do_show(int argc, char **argv)
 {
 	struct bpf_attach_info attach_info = {};
@@ -701,6 +803,10 @@ static int do_show(int argc, char **argv)
 		NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id);
 	NET_END_ARRAY("\n");
 
+	NET_START_ARRAY("netfilter", "%s:\n");
+	show_link_netfilter();
+	NET_END_ARRAY("\n");
+
 	NET_END_OBJECT;
 	if (json_output)
 		jsonw_end_array(json_wtr);
author	Alexei Starovoitov <ast@kernel.org>	2023-04-21 11:35:51 -0700
committer	Alexei Starovoitov <ast@kernel.org>	2023-04-21 11:37:41 -0700
commit	d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4 (patch)
tree	a3b43374a8f86d3ae74143bf60a2d493f4ea01af /tools/bpf
parent	45cea721ea36d83969473d2abd29bcc2321cacdd (diff)
parent	006c0e44ed924140d44bc756e6ea36301fcea68d (diff)
download	linux-d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4.tar.gz linux-d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4.tar.bz2 linux-d7a799ec782b6ae9158f8d587c9f49cf34a6c5f4.zip