diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 11:56:19 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 11:56:19 -0800 |
| commit | 5bbcc0f595fadb4cac0eddc4401035ec0bd95b09 (patch) | |
| tree | 3b65e490cc36a6c6fecac1fa24d9e0ac9ced4455 /samples | |
| parent | 892204e06cb9e89fbc4b299a678f9ca358e97cac (diff) | |
| parent | 50895b9de1d3e0258e015e8e55128d835d9a9f19 (diff) | |
| download | linux-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.tar.gz linux-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.tar.bz2 linux-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.zip | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Maintain the TCP retransmit queue using an rbtree, with 1GB
windows at 100Gb this really has become necessary. From Eric
Dumazet.
2) Multi-program support for cgroup+bpf, from Alexei Starovoitov.
3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew
Lunn.
4) Add meter action support to openvswitch, from Andy Zhou.
5) Add a data meta pointer for BPF accessible packets, from Daniel
Borkmann.
6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet.
7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli.
8) More work to move the RTNL mutex down, from Florian Westphal.
9) Add 'bpftool' utility, to help with bpf program introspection.
From Jakub Kicinski.
10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper
Dangaard Brouer.
11) Support 'blocks' of transformations in the packet scheduler which
can span multiple network devices, from Jiri Pirko.
12) TC flower offload support in cxgb4, from Kumar Sanghvi.
13) Priority based stream scheduler for SCTP, from Marcelo Ricardo
Leitner.
14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg.
15) Add RED qdisc offloadability, and use it in mlxsw driver. From
Nogah Frankel.
16) eBPF based device controller for cgroup v2, from Roman Gushchin.
17) Add some fundamental tracepoints for TCP, from Song Liu.
18) Remove garbage collection from ipv6 route layer, this is a
significant accomplishment. From Wei Wang.
19) Add multicast route offload support to mlxsw, from Yotam Gigi"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits)
tcp: highest_sack fix
geneve: fix fill_info when link down
bpf: fix lockdep splat
net: cdc_ncm: GetNtbFormat endian fix
openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start
netem: remove unnecessary 64 bit modulus
netem: use 64 bit divide by rate
tcp: Namespace-ify sysctl_tcp_default_congestion_control
net: Protect iterations over net::fib_notifier_ops in fib_seq_sum()
ipv6: set all.accept_dad to 0 by default
uapi: fix linux/tls.h userspace compilation error
usbnet: ipheth: prevent TX queue timeouts when device not ready
vhost_net: conditionally enable tx polling
uapi: fix linux/rxrpc.h userspace compilation errors
net: stmmac: fix LPI transitioning for dwmac4
atm: horizon: Fix irq release error
net-sysfs: trigger netlink notification on ifalias change via sysfs
openvswitch: Using kfree_rcu() to simplify the code
openvswitch: Make local function ovs_nsh_key_attr_size() static
openvswitch: Fix return value check in ovs_meter_cmd_features()
...
Diffstat (limited to 'samples')
35 files changed, 2958 insertions, 493 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 9b4a66e3363e..3b4945c1eab0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -29,6 +29,7 @@ hostprogs-y += test_cgrp2_sock hostprogs-y += test_cgrp2_sock2 hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += xdp_router_ipv4 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip @@ -40,11 +41,13 @@ hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops hostprogs-y += xdp_redirect hostprogs-y += xdp_redirect_map +hostprogs-y += xdp_redirect_cpu hostprogs-y += xdp_monitor hostprogs-y += syscall_tp # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o +CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF) @@ -68,13 +71,14 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o -test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o +test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS) test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o -test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ +xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o +test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o @@ -85,6 +89,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o +xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o @@ -115,6 +120,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += xdp_router_ipv4_kern.o always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o always += sampleip_kern.o @@ -128,8 +134,10 @@ always += tcp_bufs_kern.o always += tcp_cong_kern.o always += tcp_iw_kern.o always += tcp_clamp_kern.o +always += tcp_basertt_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o +always += xdp_redirect_cpu_kern.o always += xdp_monitor_kern.o always += syscall_tp_kern.o @@ -161,6 +169,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_xdp_router_ipv4 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf @@ -170,6 +179,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf HOSTLOADLIBES_xdp_redirect += -lelf HOSTLOADLIBES_xdp_redirect_map += -lelf +HOSTLOADLIBES_xdp_redirect_cpu += -lelf HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_syscall_tp += -lelf @@ -178,6 +188,12 @@ HOSTLOADLIBES_syscall_tp += -lelf LLC ?= llc CLANG ?= clang +# Detect that we're cross compiling and use the cross compiler +ifdef CROSS_COMPILE +HOSTCC = $(CROSS_COMPILE)gcc +CLANG_ARCH_ARGS = -target $(ARCH) +endif + # Trick to allow make to be run from this directory all: $(MAKE) -C ../../ $(CURDIR)/ @@ -225,9 +241,9 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h $(obj)/%.o: $(src)/%.c $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ - -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -Wno-compare-distinct-pointer-types \ + -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ + -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ - -Wno-unknown-warning-option \ + -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index 79f9a58f1872..5f27e4faca50 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -64,3 +64,13 @@ It is also possible to point make to the newly compiled 'llc' or 'clang' command via redefining LLC or CLANG on the make command line:: make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang + +Cross compiling samples +----------------------- +In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH +environment variables before calling make. This will direct make to build +samples for the cross target. + +export ARCH=arm64 +export CROSS_COMPILE="aarch64-linux-gnu-" +make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 2325d7ad76df..522ca9252d6c 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -222,6 +222,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, inner_map_fd, maps[i].def.max_entries, @@ -229,6 +230,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, numa_node); } else { map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, maps[i].def.value_size, maps[i].def.max_entries, diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c deleted file mode 100644 index 09afaddfc9ba..000000000000 --- a/samples/bpf/cgroup_helpers.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include <sched.h> -#include <sys/mount.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <linux/limits.h> -#include <stdio.h> -#include <linux/sched.h> -#include <fcntl.h> -#include <unistd.h> -#include <ftw.h> - - -#include "cgroup_helpers.h" - -/* - * To avoid relying on the system setup, when setup_cgroup_env is called - * we create a new mount namespace, and cgroup namespace. The cgroup2 - * root is mounted at CGROUP_MOUNT_PATH - * - * Unfortunately, most people don't have cgroupv2 enabled at this point in time. - * It's easier to create our own mount namespace and manage it ourselves. - * - * We assume /mnt exists. - */ - -#define WALK_FD_LIMIT 16 -#define CGROUP_MOUNT_PATH "/mnt" -#define CGROUP_WORK_DIR "/cgroup-test-work-dir" -#define format_cgroup_path(buf, path) \ - snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ - CGROUP_WORK_DIR, path) - -/** - * setup_cgroup_environment() - Setup the cgroup environment - * - * After calling this function, cleanup_cgroup_environment should be called - * once testing is complete. - * - * This function will print an error to stderr and return 1 if it is unable - * to setup the cgroup environment. If setup is successful, 0 is returned. - */ -int setup_cgroup_environment(void) -{ - char cgroup_workdir[PATH_MAX + 1]; - - format_cgroup_path(cgroup_workdir, ""); - - if (unshare(CLONE_NEWNS)) { - log_err("unshare"); - return 1; - } - - if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { - log_err("mount fakeroot"); - return 1; - } - - if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { - log_err("mount cgroup2"); - return 1; - } - - /* Cleanup existing failed runs, now that the environment is setup */ - cleanup_cgroup_environment(); - - if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { - log_err("mkdir cgroup work dir"); - return 1; - } - - return 0; -} - -static int nftwfunc(const char *filename, const struct stat *statptr, - int fileflags, struct FTW *pfwt) -{ - if ((fileflags & FTW_D) && rmdir(filename)) - log_err("Removing cgroup: %s", filename); - return 0; -} - - -static int join_cgroup_from_top(char *cgroup_path) -{ - char cgroup_procs_path[PATH_MAX + 1]; - pid_t pid = getpid(); - int fd, rc = 0; - - snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), - "%s/cgroup.procs", cgroup_path); - - fd = open(cgroup_procs_path, O_WRONLY); - if (fd < 0) { - log_err("Opening Cgroup Procs: %s", cgroup_procs_path); - return 1; - } - - if (dprintf(fd, "%d\n", pid) < 0) { - log_err("Joining Cgroup"); - rc = 1; - } - - close(fd); - return rc; -} - -/** - * join_cgroup() - Join a cgroup - * @path: The cgroup path, relative to the workdir, to join - * - * This function expects a cgroup to already be created, relative to the cgroup - * work dir, and it joins it. For example, passing "/my-cgroup" as the path - * would actually put the calling process into the cgroup - * "/cgroup-test-work-dir/my-cgroup" - * - * On success, it returns 0, otherwise on failure it returns 1. - */ -int join_cgroup(char *path) -{ - char cgroup_path[PATH_MAX + 1]; - - format_cgroup_path(cgroup_path, path); - return join_cgroup_from_top(cgroup_path); -} - -/** - * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment - * - * This is an idempotent function to delete all temporary cgroups that - * have been created during the test, including the cgroup testing work - * directory. - * - * At call time, it moves the calling process to the root cgroup, and then - * runs the deletion process. It is idempotent, and should not fail, unless - * a process is lingering. - * - * On failure, it will print an error to stderr, and try to continue. - */ -void cleanup_cgroup_environment(void) -{ - char cgroup_workdir[PATH_MAX + 1]; - - format_cgroup_path(cgroup_workdir, ""); - join_cgroup_from_top(CGROUP_MOUNT_PATH); - nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); -} - -/** - * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD - * @path: The cgroup path, relative to the workdir, to join - * - * This function creates a cgroup under the top level workdir and returns the - * file descriptor. It is idempotent. - * - * On success, it returns the file descriptor. On failure it returns 0. - * If there is a failure, it prints the error to stderr. - */ -int create_and_get_cgroup(char *path) -{ - char cgroup_path[PATH_MAX + 1]; - int fd; - - format_cgroup_path(cgroup_path, path); - if (mkdir(cgroup_path, 0777) && errno != EEXIST) { - log_err("mkdiring cgroup"); - return 0; - } - - fd = open(cgroup_path, O_RDONLY); - if (fd < 0) { - log_err("Opening Cgroup"); - return 0; - } - - return fd; -} diff --git a/samples/bpf/cgroup_helpers.h b/samples/bpf/cgroup_helpers.h deleted file mode 100644 index 06485e0002b3..000000000000 --- a/samples/bpf/cgroup_helpers.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __CGROUP_HELPERS_H -#define __CGROUP_HELPERS_H -#include <errno.h> -#include <string.h> - -#define clean_errno() (errno == 0 ? "None" : strerror(errno)) -#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ - __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) - - -int create_and_get_cgroup(char *path); -int join_cgroup(char *path); -int setup_cgroup_environment(void); -void cleanup_cgroup_environment(void); - -#endif diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 098c857f1eda..2b2ffb97018b 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -266,7 +266,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getpgrp") +SEC("kprobe/sys_getppid") int stress_array_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index f388254896f6..519d9af4b04a 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -137,6 +137,7 @@ static void do_test_lru(enum test_type test, int cpu) inner_lru_map_fds[cpu] = bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + test_map_names[INNER_LRU_HASH_PREALLOC], sizeof(uint32_t), sizeof(long), inner_lru_hash_size, 0, @@ -282,7 +283,7 @@ static void test_array_lookup(int cpu) start_time = time_get_ns(); for (i = 0; i < max_cnt; i++) - syscall(__NR_getpgrp, 0); + syscall(__NR_getppid, 0); printf("%d:array_lookup %lld lookups per sec\n", cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); } diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index a3cb91ebf4e7..9169d3207f18 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -23,6 +23,13 @@ * This requires kernel CONFIG_FTRACE_SYSCALLS to be set. */ +static void usage(const char *cmd) +{ + printf("USAGE: %s [-i num_progs] [-h]\n", cmd); + printf(" -i num_progs # number of progs of the test\n"); + printf(" -h # help\n"); +} + static void verify_map(int map_id) { __u32 key = 0; @@ -32,22 +39,29 @@ static void verify_map(int map_id) fprintf(stderr, "map_lookup failed: %s\n", strerror(errno)); return; } - if (val == 0) + if (val == 0) { fprintf(stderr, "failed: map #%d returns value 0\n", map_id); + return; + } + val = 0; + if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) { + fprintf(stderr, "map_update failed: %s\n", strerror(errno)); + return; + } } -int main(int argc, char **argv) +static int test(char *filename, int num_progs) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - char filename[256]; - int fd; + int i, fd, map0_fds[num_progs], map1_fds[num_progs]; - setrlimit(RLIMIT_MEMLOCK, &r); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - if (load_bpf_file(filename)) { - fprintf(stderr, "%s", bpf_log_buf); - return 1; + for (i = 0; i < num_progs; i++) { + if (load_bpf_file(filename)) { + fprintf(stderr, "%s", bpf_log_buf); + return 1; + } + printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]); + map0_fds[i] = map_fd[0]; + map1_fds[i] = map_fd[1]; } /* current load_bpf_file has perf_event_open default pid = -1 @@ -64,8 +78,34 @@ int main(int argc, char **argv) close(fd); /* verify the map */ - verify_map(map_fd[0]); - verify_map(map_fd[1]); + for (i = 0; i < num_progs; i++) { + verify_map(map0_fds[i]); + verify_map(map1_fds[i]); + } return 0; } + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int opt, num_progs = 1; + char filename[256]; + + while ((opt = getopt(argc, argv, "i:h")) != -1) { + switch (opt) { + case 'i': + num_progs = atoi(optarg); + break; + case 'h': + default: + usage(argv[0]); + return 0; + } + } + + setrlimit(RLIMIT_MEMLOCK, &r); + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + return test(filename, num_progs); +} diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c new file mode 100644 index 000000000000..4bf4fc597db9 --- /dev/null +++ b/samples/bpf/tcp_basertt_kern.c @@ -0,0 +1,78 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set base_rtt to 80us when host is running TCP-NV and + * both hosts are in the same datacenter (as determined by IPv6 prefix). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_basertt(struct bpf_sock_ops *skops) +{ + char cong[20]; + char nv[] = "nv"; + int rv = 0, n; + int op; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check if both hosts are in the same datacenter. For this + * example they are if the 1st 5.5 bytes in the IPv6 address + * are the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_BASE_RTT: + n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) { + /* Set base_rtt to 80us */ + rv = 80; + } else if (n) { + rv = n; + } else { + rv = -1; + } + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme new file mode 100644 index 000000000000..831fb601e3c9 --- /dev/null +++ b/samples/bpf/tcp_bpf.readme @@ -0,0 +1,26 @@ +This file describes how to run the tcp_*_kern.o tcp_bpf (or socket_ops) +programs. These programs attach to a cgroupv2. The following commands create +a cgroupv2 and attach a bash shell to the group. + + mkdir -p /tmp/cgroupv2 + mount -t cgroup2 none /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2/foo + bash + echo $$ >> /tmp/cgroupv2/foo/cgroup.procs + +Anything that runs under this shell belongs to the foo cgroupv2 To load +(attach) one of the tcp_*_kern.o programs: + + ./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o + +If the "-l" flag is used, the load_sock_ops program will continue to run +printing the BPF log buffer. The tcp_*_kern.o programs use special print +functions to print logging information (if enabled by the ifdef). + +If using netperf/netserver to create traffic, you need to run them under the +cgroupv2 to which the BPF programs are attached (i.e. under bash shell +attached to the cgroupv2). + +To remove (unattach) a socket_ops BPF program from a cgroupv2: + + ./load_sock_ops -r /tmp/cgroupv2/foo diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c index ee83bbabd17c..0566b7fa38a1 100644 --- a/samples/bpf/tcp_bufs_kern.c +++ b/samples/bpf/tcp_bufs_kern.c @@ -41,8 +41,10 @@ int bpf_bufs(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; @@ -61,8 +63,8 @@ int bpf_bufs(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: /* Nothing to do */ @@ -71,8 +73,8 @@ int bpf_bufs(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of passive connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; default: rv = -1; diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c index d68eadd9ca2d..f4225c9d2c0c 100644 --- a/samples/bpf/tcp_clamp_kern.c +++ b/samples/bpf/tcp_clamp_kern.c @@ -41,8 +41,10 @@ int bpf_clamp(struct bpf_sock_ops *skops) /* For testing purposes, only execute rest of BPF program * if neither port numberis 55601 */ - if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) - return -1; + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) { + skops->reply = -1; + return 0; + } op = (int) skops->op; @@ -66,9 +68,9 @@ int bpf_clamp(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_RCVBUF, &bufsize, - sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: rv = bpf_setsockopt(skops, SOL_TCP, @@ -80,12 +82,12 @@ int bpf_clamp(struct bpf_sock_ops *skops) rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SNDCWND_CLAMP, &clamp, sizeof(clamp)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_SNDBUF, &bufsize, - sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_RCVBUF, &bufsize, - sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); break; default: rv = -1; diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c index dac15bce1fa9..ad0f1ba8206a 100644 --- a/samples/bpf/tcp_cong_kern.c +++ b/samples/bpf/tcp_cong_kern.c @@ -39,8 +39,10 @@ int bpf_cong(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tc |
