summaryrefslogtreecommitdiff
path: root/samples/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile29
-rw-r--r--samples/bpf/README.rst10
-rw-r--r--samples/bpf/bpf_load.c16
-rw-r--r--samples/bpf/cgroup_helpers.c178
-rw-r--r--samples/bpf/cgroup_helpers.h17
-rw-r--r--samples/bpf/map_perf_test_kern.c2
-rw-r--r--samples/bpf/map_perf_test_user.c3
-rw-r--r--samples/bpf/syscall_tp_user.c66
-rw-r--r--samples/bpf/tcp_basertt_kern.c78
-rw-r--r--samples/bpf/tcp_bpf.readme26
-rw-r--r--samples/bpf/tcp_bufs_kern.c14
-rw-r--r--samples/bpf/tcp_clamp_kern.c24
-rw-r--r--samples/bpf/tcp_cong_kern.c6
-rw-r--r--samples/bpf/tcp_iw_kern.c14
-rw-r--r--samples/bpf/tcp_rwnd_kern.c6
-rw-r--r--samples/bpf/tcp_synrto_kern.c6
-rw-r--r--samples/bpf/test_cgrp2_attach2.c224
-rw-r--r--samples/bpf/trace_event_kern.c10
-rw-r--r--samples/bpf/trace_event_user.c13
-rw-r--r--samples/bpf/tracex6_kern.c26
-rw-r--r--samples/bpf/tracex6_user.c13
-rw-r--r--samples/bpf/xdp1_user.c8
-rw-r--r--samples/bpf/xdp_monitor_kern.c60
-rw-r--r--samples/bpf/xdp_monitor_user.c123
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c609
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c697
-rw-r--r--samples/bpf/xdp_redirect_map_user.c7
-rw-r--r--samples/bpf/xdp_router_ipv4_kern.c186
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c660
29 files changed, 2829 insertions, 302 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 9b4a66e3363e..adeaa1302f34 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -1,7 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# kbuild trick to avoid linker error. Can be omitted if a module is built.
-obj- := dummy.o
-
# List of programs to build
hostprogs-y := test_lru_dist
hostprogs-y += sock_example
@@ -29,6 +26,7 @@ hostprogs-y += test_cgrp2_sock
hostprogs-y += test_cgrp2_sock2
hostprogs-y += xdp1
hostprogs-y += xdp2
+hostprogs-y += xdp_router_ipv4
hostprogs-y += test_current_task_under_cgroup
hostprogs-y += trace_event
hostprogs-y += sampleip
@@ -40,11 +38,13 @@ hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
+hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
hostprogs-y += syscall_tp
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
+CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
sock_example-objs := sock_example.o $(LIBBPF)
@@ -68,13 +68,14 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
-test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o
+test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
+xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
+test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
@@ -85,6 +86,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
+xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
@@ -115,6 +117,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
always += xdp1_kern.o
always += xdp2_kern.o
+always += xdp_router_ipv4_kern.o
always += test_current_task_under_cgroup_kern.o
always += trace_event_kern.o
always += sampleip_kern.o
@@ -128,8 +131,10 @@ always += tcp_bufs_kern.o
always += tcp_cong_kern.o
always += tcp_iw_kern.o
always += tcp_clamp_kern.o
+always += tcp_basertt_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
+always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
always += syscall_tp_kern.o
@@ -161,6 +166,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
HOSTLOADLIBES_test_overhead += -lelf -lrt
HOSTLOADLIBES_xdp1 += -lelf
HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_xdp_router_ipv4 += -lelf
HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
HOSTLOADLIBES_trace_event += -lelf
HOSTLOADLIBES_sampleip += -lelf
@@ -170,6 +176,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
+HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
@@ -178,6 +185,12 @@ HOSTLOADLIBES_syscall_tp += -lelf
LLC ?= llc
CLANG ?= clang
+# Detect that we're cross compiling and use the cross compiler
+ifdef CROSS_COMPILE
+HOSTCC = $(CROSS_COMPILE)gcc
+CLANG_ARCH_ARGS = -target $(ARCH)
+endif
+
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/
@@ -225,9 +238,9 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-I$(srctree)/tools/testing/selftests/bpf/ \
- -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
- -Wno-compare-distinct-pointer-types \
+ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option \
+ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 79f9a58f1872..5f27e4faca50 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -64,3 +64,13 @@ It is also possible to point make to the newly compiled 'llc' or
'clang' command via redefining LLC or CLANG on the make command line::
make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+
+Cross compiling samples
+-----------------------
+In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH
+environment variables before calling make. This will direct make to build
+samples for the cross target.
+
+export ARCH=arm64
+export CROSS_COMPILE="aarch64-linux-gnu-"
+make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 2325d7ad76df..242631aa4ea2 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -193,8 +193,18 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1;
}
event_fd[prog_cnt - 1] = efd;
- ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
- ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+ err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+ if (err < 0) {
+ printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
+ strerror(errno));
+ return -1;
+ }
+ err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+ if (err < 0) {
+ printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
+ strerror(errno));
+ return -1;
+ }
return 0;
}
@@ -222,6 +232,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
+ maps[i].name,
maps[i].def.key_size,
inner_map_fd,
maps[i].def.max_entries,
@@ -229,6 +240,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
numa_node);
} else {
map_fd[i] = bpf_create_map_node(maps[i].def.type,
+ maps[i].name,
maps[i].def.key_size,
maps[i].def.value_size,
maps[i].def.max_entries,
diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c
deleted file mode 100644
index 09afaddfc9ba..000000000000
--- a/samples/bpf/cgroup_helpers.c
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <sched.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <linux/limits.h>
-#include <stdio.h>
-#include <linux/sched.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <ftw.h>
-
-
-#include "cgroup_helpers.h"
-
-/*
- * To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
- *
- * We assume /mnt exists.
- */
-
-#define WALK_FD_LIMIT 16
-#define CGROUP_MOUNT_PATH "/mnt"
-#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
-#define format_cgroup_path(buf, path) \
- snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
- CGROUP_WORK_DIR, path)
-
-/**
- * setup_cgroup_environment() - Setup the cgroup environment
- *
- * After calling this function, cleanup_cgroup_environment should be called
- * once testing is complete.
- *
- * This function will print an error to stderr and return 1 if it is unable
- * to setup the cgroup environment. If setup is successful, 0 is returned.
- */
-int setup_cgroup_environment(void)
-{
- char cgroup_workdir[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_workdir, "");
-
- if (unshare(CLONE_NEWNS)) {
- log_err("unshare");
- return 1;
- }
-
- if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
- log_err("mount fakeroot");
- return 1;
- }
-
- if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
- log_err("mount cgroup2");
- return 1;
- }
-
- /* Cleanup existing failed runs, now that the environment is setup */
- cleanup_cgroup_environment();
-
- if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
- log_err("mkdir cgroup work dir");
- return 1;
- }
-
- return 0;
-}
-
-static int nftwfunc(const char *filename, const struct stat *statptr,
- int fileflags, struct FTW *pfwt)
-{
- if ((fileflags & FTW_D) && rmdir(filename))
- log_err("Removing cgroup: %s", filename);
- return 0;
-}
-
-
-static int join_cgroup_from_top(char *cgroup_path)
-{
- char cgroup_procs_path[PATH_MAX + 1];
- pid_t pid = getpid();
- int fd, rc = 0;
-
- snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
- "%s/cgroup.procs", cgroup_path);
-
- fd = open(cgroup_procs_path, O_WRONLY);
- if (fd < 0) {
- log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
- return 1;
- }
-
- if (dprintf(fd, "%d\n", pid) < 0) {
- log_err("Joining Cgroup");
- rc = 1;
- }
-
- close(fd);
- return rc;
-}
-
-/**
- * join_cgroup() - Join a cgroup
- * @path: The cgroup path, relative to the workdir, to join
- *
- * This function expects a cgroup to already be created, relative to the cgroup
- * work dir, and it joins it. For example, passing "/my-cgroup" as the path
- * would actually put the calling process into the cgroup
- * "/cgroup-test-work-dir/my-cgroup"
- *
- * On success, it returns 0, otherwise on failure it returns 1.
- */
-int join_cgroup(char *path)
-{
- char cgroup_path[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_path, path);
- return join_cgroup_from_top(cgroup_path);
-}
-
-/**
- * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
- *
- * This is an idempotent function to delete all temporary cgroups that
- * have been created during the test, including the cgroup testing work
- * directory.
- *
- * At call time, it moves the calling process to the root cgroup, and then
- * runs the deletion process. It is idempotent, and should not fail, unless
- * a process is lingering.
- *
- * On failure, it will print an error to stderr, and try to continue.
- */
-void cleanup_cgroup_environment(void)
-{
- char cgroup_workdir[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_workdir, "");
- join_cgroup_from_top(CGROUP_MOUNT_PATH);
- nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
-}
-
-/**
- * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
- * @path: The cgroup path, relative to the workdir, to join
- *
- * This function creates a cgroup under the top level workdir and returns the
- * file descriptor. It is idempotent.
- *
- * On success, it returns the file descriptor. On failure it returns 0.
- * If there is a failure, it prints the error to stderr.
- */
-int create_and_get_cgroup(char *path)
-{
- char cgroup_path[PATH_MAX + 1];
- int fd;
-
- format_cgroup_path(cgroup_path, path);
- if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
- log_err("mkdiring cgroup");
- return 0;
- }
-
- fd = open(cgroup_path, O_RDONLY);
- if (fd < 0) {
- log_err("Opening Cgroup");
- return 0;
- }
-
- return fd;
-}
diff --git a/samples/bpf/cgroup_helpers.h b/samples/bpf/cgroup_helpers.h
deleted file mode 100644
index 06485e0002b3..000000000000
--- a/samples/bpf/cgroup_helpers.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __CGROUP_HELPERS_H
-#define __CGROUP_HELPERS_H
-#include <errno.h>
-#include <string.h>
-
-#define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
- __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
-
-
-int create_and_get_cgroup(char *path);
-int join_cgroup(char *path);
-int setup_cgroup_environment(void);
-void cleanup_cgroup_environment(void);
-
-#endif
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 098c857f1eda..2b2ffb97018b 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -266,7 +266,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/sys_getpgrp")
+SEC("kprobe/sys_getppid")
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index f388254896f6..519d9af4b04a 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -137,6 +137,7 @@ static void do_test_lru(enum test_type test, int cpu)
inner_lru_map_fds[cpu] =
bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
+ test_map_names[INNER_LRU_HASH_PREALLOC],
sizeof(uint32_t),
sizeof(long),
inner_lru_hash_size, 0,
@@ -282,7 +283,7 @@ static void test_array_lookup(int cpu)
start_time = time_get_ns();
for (i = 0; i < max_cnt; i++)
- syscall(__NR_getpgrp, 0);
+ syscall(__NR_getppid, 0);
printf("%d:array_lookup %lld lookups per sec\n",
cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
}
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index a3cb91ebf4e7..9169d3207f18 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -23,6 +23,13 @@
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
*/
+static void usage(const char *cmd)
+{
+ printf("USAGE: %s [-i num_progs] [-h]\n", cmd);
+ printf(" -i num_progs # number of progs of the test\n");
+ printf(" -h # help\n");
+}
+
static void verify_map(int map_id)
{
__u32 key = 0;
@@ -32,22 +39,29 @@ static void verify_map(int map_id)
fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
return;
}
- if (val == 0)
+ if (val == 0) {
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
+ return;
+ }
+ val = 0;
+ if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) {
+ fprintf(stderr, "map_update failed: %s\n", strerror(errno));
+ return;
+ }
}
-int main(int argc, char **argv)
+static int test(char *filename, int num_progs)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- char filename[256];
- int fd;
+ int i, fd, map0_fds[num_progs], map1_fds[num_progs];
- setrlimit(RLIMIT_MEMLOCK, &r);
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (load_bpf_file(filename)) {
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ for (i = 0; i < num_progs; i++) {
+ if (load_bpf_file(filename)) {
+ fprintf(stderr, "%s", bpf_log_buf);
+ return 1;
+ }
+ printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
+ map0_fds[i] = map_fd[0];
+ map1_fds[i] = map_fd[1];
}
/* current load_bpf_file has perf_event_open default pid = -1
@@ -64,8 +78,34 @@ int main(int argc, char **argv)
close(fd);
/* verify the map */
- verify_map(map_fd[0]);
- verify_map(map_fd[1]);
+ for (i = 0; i < num_progs; i++) {
+ verify_map(map0_fds[i]);
+ verify_map(map1_fds[i]);
+ }
return 0;
}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int opt, num_progs = 1;
+ char filename[256];
+
+ while ((opt = getopt(argc, argv, "i:h")) != -1) {
+ switch (opt) {
+ case 'i':
+ num_progs = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ return 0;
+ }
+ }
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ return test(filename, num_progs);
+}
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
new file mode 100644
index 000000000000..4bf4fc597db9
--- /dev/null
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -0,0 +1,78 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set base_rtt to 80us when host is running TCP-NV and
+ * both hosts are in the same datacenter (as determined by IPv6 prefix).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_basertt(struct bpf_sock_ops *skops)
+{
+ char cong[20];
+ char nv[] = "nv";
+ int rv = 0, n;
+ int op;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check if both hosts are in the same datacenter. For this
+ * example they are if the 1st 5.5 bytes in the IPv6 address
+ * are the same.
+ */
+ if (skops->family == AF_INET6 &&
+ skops->local_ip6[0] == skops->remote_ip6[0] &&
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+ switch (op) {
+ case BPF_SOCK_OPS_BASE_RTT:
+ n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION,
+ cong, sizeof(cong));
+ if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) {
+ /* Set base_rtt to 80us */
+ rv = 80;
+ } else if (n) {
+ rv = n;
+ } else {
+ rv = -1;
+ }
+ break;
+ default:
+ rv = -1;
+ }
+ } else {
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme
new file mode 100644
index 000000000000..831fb601e3c9
--- /dev/null
+++ b/samples/bpf/tcp_bpf.readme
@@ -0,0 +1,26 @@
+This file describes how to run the tcp_*_kern.o tcp_bpf (or socket_ops)
+programs. These programs attach to a cgroupv2. The following commands create
+a cgroupv2 and attach a bash shell to the group.
+
+ mkdir -p /tmp/cgroupv2
+ mount -t cgroup2 none /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2/foo
+ bash
+ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+
+Anything that runs under this shell belongs to the foo cgroupv2 To load
+(attach) one of the tcp_*_kern.o programs:
+
+ ./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o
+
+If the "-l" flag is used, the load_sock_ops program will continue to run
+printing the BPF log buffer. The tcp_*_kern.o programs use special print
+functions to print logging information (if enabled by the ifdef).
+
+If using netperf/netserver to create traffic, you need to run them under the
+cgroupv2 to which the BPF programs are attached (i.e. under bash shell
+attached to the cgroupv2).
+
+To remove (unattach) a socket_ops BPF program from a cgroupv2:
+
+ ./load_sock_ops -r /tmp/cgroupv2/foo
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
index ee83bbabd17c..0566b7fa38a1 100644
--- a/samples/bpf/tcp_bufs_kern.c
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -41,8 +41,10 @@ int bpf_bufs(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
@@ -61,8 +63,8 @@ int bpf_bufs(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of active connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
/* Nothing to do */
@@ -71,8 +73,8 @@ int bpf_bufs(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of passive connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
default:
rv = -1;
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
index d68eadd9ca2d..f4225c9d2c0c 100644
--- a/samples/bpf/tcp_clamp_kern.c
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -41,8 +41,10 @@ int bpf_clamp(struct bpf_sock_ops *skops)
/* For testing purposes, only execute rest of BPF program
* if neither port numberis 55601
*/
- if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601)
- return -1;
+ if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) {
+ skops->reply = -1;
+ return 0;
+ }
op = (int) skops->op;
@@ -66,9 +68,9 @@ int bpf_clamp(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of active connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF,
&bufsize, sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
- SO_RCVBUF, &bufsize,
- sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET,
+ SO_RCVBUF, &bufsize,
+ sizeof(bufsize));
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
rv = bpf_setsockopt(skops, SOL_TCP,
@@ -80,12 +82,12 @@ int bpf_clamp(struct bpf_sock_ops *skops)
rv = bpf_setsockopt(skops, SOL_TCP,
TCP_BPF_SNDCWND_CLAMP,
&clamp, sizeof(clamp));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
- SO_SNDBUF, &bufsize,
- sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
- SO_RCVBUF, &bufsize,
- sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET,
+ SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET,
+ SO_RCVBUF, &bufsize,
+ sizeof(bufsize));
break;
default:
rv = -1;
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index dac15bce1fa9..ad0f1ba8206a 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c
@@ -39,8 +39,10 @@ int bpf_cong(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
index 23c5122ef819..4ca5ecc9f580 100644
--- a/samples/bpf/tcp_iw_kern.c
+++ b/samples/bpf/tcp_iw_kern.c
@@ -42,8 +42,10 @@ int bpf_iw(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
@@ -62,8 +64,8 @@ int bpf_iw(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of active connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw,
@@ -73,8 +75,8 @@ int bpf_iw(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of passive connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
default:
rv = -1;
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
index 3f2a228f81ce..09ff65b40b31 100644
--- a/samples/bpf/tcp_rwnd_kern.c
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -38,8 +38,10 @@ int bpf_rwnd(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) !=
- 55601 && skops->local_port != 55601)
- return -1;
+ 55601 && skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
index 3c3fc83d81cb..232bb242823e 100644
--- a/samples/bpf/tcp_synrto_kern.c
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -38,8 +38,10 @@ int bpf_synrto(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
diff --git a/samples/bpf/test_cg