From 7bc57950bd41d40685ca45a4079ce74d5d41250b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 30 May 2017 14:37:51 +0200
Subject: samples/bpf: bpf_load.c order of prog_fd[] should correspond with ELF
 order

An eBPF ELF file generated with LLVM can contain several program
section, which can be used for bpf tail calls.  The bpf prog file
descriptors are accessible via array prog_fd[].

At-least XDP samples assume ordering, and uses prog_fd[0] is the main
XDP program to attach.  The actual order of array prog_fd[] depend on
whether or not a bpf program section is referencing any maps or not.
Not using a map result in being loaded/processed after all other
prog section.  Thus, this can lead to some very strange and hard to
debug situation, as the user can only see a FD and cannot correlated
that with the ELF section name.

The fix is rather simple, and even removes duplicate memcmp code.
Simply load program sections as the last step, instead of
load_and_attach while processing the relocation section.

When working with tail calls, it become even more essential that the
order of prog_fd[] is consistant, like the current dependency of the
map_fd[] order.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_load.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

(limited to 'samples')

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 74456b3eb89a..a91c57dd8571 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -516,16 +516,18 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		processed_sec[maps_shndx] = true;
 	}
 
-	/* load programs that need map fixup (relocations) */
+	/* process all relo sections, and rewrite bpf insns for maps */
 	for (i = 1; i < ehdr.e_shnum; i++) {
 		if (processed_sec[i])
 			continue;
 
 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
 			continue;
+
 		if (shdr.sh_type == SHT_REL) {
 			struct bpf_insn *insns;
 
+			/* locate prog sec that need map fixup (relocations) */
 			if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
 				    &shdr_prog, &data_prog))
 				continue;
@@ -535,26 +537,15 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 				continue;
 
 			insns = (struct bpf_insn *) data_prog->d_buf;
-
-			processed_sec[shdr.sh_info] = true;
-			processed_sec[i] = true;
+			processed_sec[i] = true; /* relo section */
 
 			if (parse_relo_and_apply(data, symbols, &shdr, insns,
 						 map_data, nr_maps))
 				continue;
-
-			if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
-			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||
-			    memcmp(shname_prog, "tracepoint/", 11) == 0 ||
-			    memcmp(shname_prog, "xdp", 3) == 0 ||
-			    memcmp(shname_prog, "perf_event", 10) == 0 ||
-			    memcmp(shname_prog, "socket", 6) == 0 ||
-			    memcmp(shname_prog, "cgroup/", 7) == 0)
-				load_and_attach(shname_prog, insns, data_prog->d_size);
 		}
 	}
 
-	/* load programs that don't use maps */
+	/* load programs */
 	for (i = 1; i < ehdr.e_shnum; i++) {
 
 		if (processed_sec[i])
-- 
cgit v1.2.3


From 41e9a8046c92e26a68fdf5a4cb831b7c60113602 Mon Sep 17 00:00:00 2001
From: Teng Qin <qinteng@fb.com>
Date: Fri, 2 Jun 2017 21:03:53 -0700
Subject: samples/bpf: add tests for more perf event types

$ trace_event

tests attaching BPF program to HW_CPU_CYCLES, SW_CPU_CLOCK, HW_CACHE_L1D and other events.
It runs 'dd' in the background while bpf program collects user and kernel
stack trace on counter overflow.
User space expects to see sys_read and sys_write in the kernel stack.

$ tracex6

tests reading of various perf counters from BPF program.

Both tests were refactored to increase coverage and be more accurate.

Signed-off-by: Teng Qin <qinteng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_helpers.h      |   3 +-
 samples/bpf/trace_event_user.c |  73 ++++++++++++++---
 samples/bpf/tracex6_kern.c     |  28 +++++--
 samples/bpf/tracex6_user.c     | 180 ++++++++++++++++++++++++++++++++---------
 4 files changed, 228 insertions(+), 56 deletions(-)

(limited to 'samples')

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 9a9c95f2c9fb..51e567bc70fc 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -31,7 +31,8 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) =
 	(void *) BPF_FUNC_get_current_uid_gid;
 static int (*bpf_get_current_comm)(void *buf, int buf_size) =
 	(void *) BPF_FUNC_get_current_comm;
-static int (*bpf_perf_event_read)(void *map, int index) =
+static unsigned long long (*bpf_perf_event_read)(void *map,
+						 unsigned long long flags) =
 	(void *) BPF_FUNC_perf_event_read;
 static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
 	(void *) BPF_FUNC_clone_redirect;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index fa4336423da5..7bd827b84a67 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -75,7 +75,10 @@ static void print_stack(struct key_t *key, __u64 count)
 		for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
 			print_addr(ip[i]);
 	}
-	printf("\n");
+	if (count < 6)
+		printf("\r");
+	else
+		printf("\n");
 
 	if (key->kernstack == -EEXIST && !warned) {
 		printf("stackmap collisions seen. Consider increasing size\n");
@@ -105,7 +108,7 @@ static void print_stacks(void)
 		bpf_map_delete_elem(fd, &next_key);
 		key = next_key;
 	}
-
+	printf("\n");
 	if (!sys_read_seen || !sys_write_seen) {
 		printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
 		int_exit(0);
@@ -122,24 +125,29 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
 {
 	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 	int *pmu_fd = malloc(nr_cpus * sizeof(int));
-	int i;
+	int i, error = 0;
 
 	/* open perf_event on all cpus */
 	for (i = 0; i < nr_cpus; i++) {
 		pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
 		if (pmu_fd[i] < 0) {
 			printf("sys_perf_event_open failed\n");
+			error = 1;
 			goto all_cpu_err;
 		}
 		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
-		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
 	}
-	system("dd if=/dev/zero of=/dev/null count=5000k");
+	system("dd if=/dev/zero of=/dev/null count=5000k status=none");
 	print_stacks();
 all_cpu_err:
-	for (i--; i >= 0; i--)
+	for (i--; i >= 0; i--) {
+		ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
 		close(pmu_fd[i]);
+	}
 	free(pmu_fd);
+	if (error)
+		int_exit(0);
 }
 
 static void test_perf_event_task(struct perf_event_attr *attr)
@@ -150,12 +158,13 @@ static void test_perf_event_task(struct perf_event_attr *attr)
 	pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
 	if (pmu_fd < 0) {
 		printf("sys_perf_event_open failed\n");
-		return;
+		int_exit(0);
 	}
 	assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
-	assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0);
-	system("dd if=/dev/zero of=/dev/null count=5000k");
+	assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
+	system("dd if=/dev/zero of=/dev/null count=5000k status=none");
 	print_stacks();
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
 	close(pmu_fd);
 }
 
@@ -175,11 +184,56 @@ static void test_bpf_perf_event(void)
 		.config = PERF_COUNT_SW_CPU_CLOCK,
 		.inherit = 1,
 	};
+	struct perf_event_attr attr_hw_cache_l1d = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_HW_CACHE,
+		.config =
+			PERF_COUNT_HW_CACHE_L1D |
+			(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+			(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+		.inherit = 1,
+	};
+	struct perf_event_attr attr_hw_cache_branch_miss = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_HW_CACHE,
+		.config =
+			PERF_COUNT_HW_CACHE_BPU |
+			(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+			(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+		.inherit = 1,
+	};
+	struct perf_event_attr attr_type_raw = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_RAW,
+		/* Intel Instruction Retired */
+		.config = 0xc0,
+		.inherit = 1,
+	};
 
+	printf("Test HW_CPU_CYCLES\n");
 	test_perf_event_all_cpu(&attr_type_hw);
 	test_perf_event_task(&attr_type_hw);
+
+	printf("Test SW_CPU_CLOCK\n");
 	test_perf_event_all_cpu(&attr_type_sw);
 	test_perf_event_task(&attr_type_sw);
+
+	printf("Test HW_CACHE_L1D\n");
+	test_perf_event_all_cpu(&attr_hw_cache_l1d);
+	test_perf_event_task(&attr_hw_cache_l1d);
+
+	printf("Test HW_CACHE_BPU\n");
+	test_perf_event_all_cpu(&attr_hw_cache_branch_miss);
+	test_perf_event_task(&attr_hw_cache_branch_miss);
+
+	printf("Test Instruction Retired\n");
+	test_perf_event_all_cpu(&attr_type_raw);
+	test_perf_event_task(&attr_type_raw);
+
+	printf("*** PASS ***\n");
 }
 
 
@@ -209,7 +263,6 @@ int main(int argc, char **argv)
 		return 0;
 	}
 	test_bpf_perf_event();
-
 	int_exit(0);
 	return 0;
 }
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index be479c4af9e2..e7d180305974 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -3,22 +3,36 @@
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") my_map = {
+struct bpf_map_def SEC("maps") counters = {
 	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 	.key_size = sizeof(int),
 	.value_size = sizeof(u32),
-	.max_entries = 32,
+	.max_entries = 64,
+};
+struct bpf_map_def SEC("maps") values = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u64),
+	.max_entries = 64,
 };
 
-SEC("kprobe/sys_write")
+SEC("kprobe/htab_map_get_next_key")
 int bpf_prog1(struct pt_regs *ctx)
 {
-	u64 count;
 	u32 key = bpf_get_smp_processor_id();
-	char fmt[] = "CPU-%d   %llu\n";
+	u64 count, *val;
+	s64 error;
+
+	count = bpf_perf_event_read(&counters, key);
+	error = (s64)count;
+	if (error <= -2 && error >= -22)
+		return 0;
 
-	count = bpf_perf_event_read(&my_map, key);
-	bpf_trace_printk(fmt, sizeof(fmt), key, count);
+	val = bpf_map_lookup_elem(&values, &key);
+	if (val)
+		*val = count;
+	else
+		bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST);
 
 	return 0;
 }
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index ca7874ed77f4..a05a99a0752f 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -1,73 +1,177 @@
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
+#define _GNU_SOURCE
+
+#include <assert.h>
 #include <fcntl.h>
-#include <poll.h>
-#include <sys/ioctl.h>
 #include <linux/perf_event.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
 #include "bpf_load.h"
+#include "libbpf.h"
 #include "perf-sys.h"
 
 #define SAMPLE_PERIOD  0x7fffffffffffffffULL
 
-static void test_bpf_perf_event(void)
+static void check_on_cpu(int cpu, struct perf_event_attr *attr)
 {
-	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
-	int *pmu_fd = malloc(nr_cpus * sizeof(int));
-	int status, i;
+	int pmu_fd, error = 0;
+	cpu_set_t set;
+	__u64 value;
 
-	struct perf_event_attr attr_insn_pmu = {
+	/* Move to target CPU */
+	CPU_ZERO(&set);
+	CPU_SET(cpu, &set);
+	assert(sched_setaffinity(0, sizeof(set), &set) == 0);
+	/* Open perf event and attach to the perf_event_array */
+	pmu_fd = sys_perf_event_open(attr, -1/*pid*/, cpu/*cpu*/, -1/*group_fd*/, 0);
+	if (pmu_fd < 0) {
+		fprintf(stderr, "sys_perf_event_open failed on CPU %d\n", cpu);
+		error = 1;
+		goto on_exit;
+	}
+	assert(bpf_map_update_elem(map_fd[0], &cpu, &pmu_fd, BPF_ANY) == 0);
+	assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0);
+	/* Trigger the kprobe */
+	bpf_map_get_next_key(map_fd[1], &cpu, NULL);
+	/* Check the value */
+	if (bpf_map_lookup_elem(map_fd[1], &cpu, &value)) {
+		fprintf(stderr, "Value missing for CPU %d\n", cpu);
+		error = 1;
+		goto on_exit;
+	}
+	fprintf(stderr, "CPU %d: %llu\n", cpu, value);
+
+on_exit:
+	assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error);
+	assert(ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE, 0) == 0 || error);
+	assert(close(pmu_fd) == 0 || error);
+	assert(bpf_map_delete_elem(map_fd[1], &cpu) == 0 || error);
+	exit(error);
+}
+
+static void test_perf_event_array(struct perf_event_attr *attr,
+				  const char *name)
+{
+	int i, status, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	pid_t pid[nr_cpus];
+	int err = 0;
+
+	printf("Test reading %s counters\n", name);
+
+	for (i = 0; i < nr_cpus; i++) {
+		pid[i] = fork();
+		assert(pid[i] >= 0);
+		if (pid[i] == 0) {
+			check_on_cpu(i, attr);
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < nr_cpus; i++) {
+		assert(waitpid(pid[i], &status, 0) == pid[i]);
+		err |= status;
+	}
+
+	if (err)
+		printf("Test: %s FAILED\n", name);
+}
+
+static void test_bpf_perf_event(void)
+{
+	struct perf_event_attr attr_cycles = {
 		.freq = 0,
 		.sample_period = SAMPLE_PERIOD,
 		.inherit = 0,
 		.type = PERF_TYPE_HARDWARE,
 		.read_format = 0,
 		.sample_type = 0,
-		.config = 0,/* PMU: cycles */
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+	struct perf_event_attr attr_clock = {
+		.freq = 0,
+		.sample_period = SAMPLE_PERIOD,
+		.inherit = 0,
+		.type = PERF_TYPE_SOFTWARE,
+		.read_format = 0,
+		.sample_type = 0,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+	};
+	struct perf_event_attr attr_raw = {
+		.freq = 0,
+		.sample_period = SAMPLE_PERIOD,
+		.inherit = 0,
+		.type = PERF_TYPE_RAW,
+		.read_format = 0,
+		.sample_type = 0,
+		/* Intel Instruction Retired */
+		.config = 0xc0,
+	};
+	struct perf_event_attr attr_l1d_load = {
+		.freq = 0,
+		.sample_period = SAMPLE_PERIOD,
+		.inherit = 0,
+		.type = PERF_TYPE_HW_CACHE,
+		.read_format = 0,
+		.sample_type = 0,
+		.config =
+			PERF_COUNT_HW_CACHE_L1D |
+			(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+			(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+	};
+	struct perf_event_attr attr_llc_miss = {
+		.freq = 0,
+		.sample_period = SAMPLE_PERIOD,
+		.inherit = 0,
+		.type = PERF_TYPE_HW_CACHE,
+		.read_format = 0,
+		.sample_type = 0,
+		.config =
+			PERF_COUNT_HW_CACHE_LL |
+			(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+			(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+	};
+	struct perf_event_attr attr_msr_tsc = {
+		.freq = 0,
+		.sample_period = 0,
+		.inherit = 0,
+		/* From /sys/bus/event_source/devices/msr/ */
+		.type = 7,
+		.read_format = 0,
+		.sample_type = 0,
+		.config = 0,
 	};
 
-	for (i = 0; i < nr_cpus; i++) {
-		pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
-		if (pmu_fd[i] < 0) {
-			printf("event syscall failed\n");
-			goto exit;
-		}
-
-		bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
-		ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
-	}
+	test_perf_event_array(&attr_cycles, "HARDWARE-cycles");
+	test_perf_event_array(&attr_clock, "SOFTWARE-clock");
+	test_perf_event_array(&attr_raw, "RAW-instruction-retired");
+	test_perf_event_array(&attr_l1d_load, "HW_CACHE-L1D-load");
 
-	status = system("ls > /dev/null");
-	if (status)
-		goto exit;
-	status = system("sleep 2");
-	if (status)
-		goto exit;
-
-exit:
-	for (i = 0; i < nr_cpus; i++)
-		close(pmu_fd[i]);
-	close(map_fd[0]);
-	free(pmu_fd);
+	/* below tests may fail in qemu */
+	test_perf_event_array(&attr_llc_miss, "HW_CACHE-LLC-miss");
+	test_perf_event_array(&attr_msr_tsc, "Dynamic-msr-tsc");
 }
 
 int main(int argc, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	char filename[256];
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
+	setrlimit(RLIMIT_MEMLOCK, &r);
 	if (load_bpf_file(filename)) {
 		printf("%s", bpf_log_buf);
 		return 1;
 	}
 
 	test_bpf_perf_event();
-	read_trace_pipe();
-
 	return 0;
 }
-- 
cgit v1.2.3


From c1932cdb275e65cbef2f230f0df0c490fdd03c62 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 16:49:37 -0700
Subject: bpf: Add MIPS support to samples/bpf.

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_helpers.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'samples')

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 51e567bc70fc..f4840b8bb8f9 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -136,6 +136,19 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
 #define PT_REGS_SP(x) ((x)->sp)
 #define PT_REGS_IP(x) ((x)->pc)
 
+#elif defined(__mips__)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
 #elif defined(__powerpc__)
 
 #define PT_REGS_PARM1(x) ((x)->gpr[3])
-- 
cgit v1.2.3


From 4b7190e8416d9238e995da993a7fb851996654e2 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 16:49:38 -0700
Subject: samples/bpf: Fix tracex5 to work with MIPS syscalls.

There are two problems:

1) In MIPS the __NR_* macros expand to an expression, this causes the
   sections of the object file to be named like:

  .
  .
  .
  [ 5] kprobe/(5000 + 1) PROGBITS        0000000000000000 000160 ...
  [ 6] kprobe/(5000 + 0) PROGBITS        0000000000000000 000258 ...
  [ 7] kprobe/(5000 + 9) PROGBITS        0000000000000000 000348 ...
  .
  .
  .

The fix here is to use the "asm_offsets" trick to evaluate the macros
in the C compiler and generate a header file with a usable form of the
macros.

2) MIPS syscall numbers start at 5000, so we need a bigger map to hold
the sub-programs.

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile       | 13 +++++++++++++
 samples/bpf/syscall_nrs.c  | 12 ++++++++++++
 samples/bpf/tracex5_kern.c | 11 ++++++++---
 3 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 samples/bpf/syscall_nrs.c

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6c7468eb3684..a0561dc762fe 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -160,6 +160,17 @@ clean:
 	$(MAKE) -C ../../ M=$(CURDIR) clean
 	@rm -f *~
 
+$(obj)/syscall_nrs.s:	$(src)/syscall_nrs.c
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/syscall_nrs.h:	$(obj)/syscall_nrs.s FORCE
+	$(call filechk,offsets,__SYSCALL_NRS_H__)
+
+clean-files += syscall_nrs.h
+
+FORCE:
+
+
 # Verify LLVM compiler tools are available and bpf target is supported by llc
 .PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
 
@@ -180,6 +191,8 @@ verify_target_bpf: verify_cmds
 
 $(src)/*.c: verify_target_bpf
 
+$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
+
 # asm/sysreg.h - inline assembly used by it is incompatible with llvm.
 # But, there is no easy way to fix it, so just exclude it since it is
 # useless for BPF samples.
diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c
new file mode 100644
index 000000000000..ce2a30b11248
--- /dev/null
+++ b/samples/bpf/syscall_nrs.c
@@ -0,0 +1,12 @@
+#include <uapi/linux/unistd.h>
+#include <linux/kbuild.h>
+
+#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR)
+
+void syscall_defines(void)
+{
+	COMMENT("Linux system call numbers.");
+	SYSNR(__NR_write);
+	SYSNR(__NR_read);
+	SYSNR(__NR_mmap);
+}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 7e4cf74553ff..f57f4e1ea1ec 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -9,6 +9,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/seccomp.h>
 #include <uapi/linux/unistd.h>
+#include "syscall_nrs.h"
 #include "bpf_helpers.h"
 
 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
@@ -17,7 +18,11 @@ struct bpf_map_def SEC("maps") progs = {
 	.type = BPF_MAP_TYPE_PROG_ARRAY,
 	.key_size = sizeof(u32),
 	.value_size = sizeof(u32),
+#ifdef __mips__
+	.max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+#else
 	.max_entries = 1024,
+#endif
 };
 
 SEC("kprobe/__seccomp_filter")
@@ -37,7 +42,7 @@ int bpf_prog1(struct pt_regs *ctx)
 }
 
 /* we jump here when syscall number == __NR_write */
-PROG(__NR_write)(struct pt_regs *ctx)
+PROG(SYS__NR_write)(struct pt_regs *ctx)
 {
 	struct seccomp_data sd;
 
@@ -50,7 +55,7 @@ PROG(__NR_write)(struct pt_regs *ctx)
 	return 0;
 }
 
-PROG(__NR_read)(struct pt_regs *ctx)
+PROG(SYS__NR_read)(struct pt_regs *ctx)
 {
 	struct seccomp_data sd;
 
@@ -63,7 +68,7 @@ PROG(__NR_read)(struct pt_regs *ctx)
 	return 0;
 }
 
-PROG(__NR_mmap)(struct pt_regs *ctx)
+PROG(SYS__NR_mmap)(struct pt_regs *ctx)
 {
 	char fmt[] = "mmap\n";
 	bpf_trace_printk(fmt, sizeof(fmt));
-- 
cgit v1.2.3


From 69137ea60c9dad58773a1918de6c1b00b088520c Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 15 Jun 2017 19:07:21 +0300
Subject: pktgen: Specify num packets per thread

Use -n <num>, to specify the number of packets every
thread sends.
Zero means indefinitely.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/pktgen/README.rst                              | 1 +
 samples/pktgen/parameters.sh                           | 7 ++++++-
 samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh | 2 +-
 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh    | 2 +-
 samples/pktgen/pktgen_sample01_simple.sh               | 2 +-
 samples/pktgen/pktgen_sample02_multiqueue.sh           | 3 ++-
 samples/pktgen/pktgen_sample03_burst_single_flow.sh    | 2 +-
 samples/pktgen/pktgen_sample04_many_flows.sh           | 2 +-
 samples/pktgen/pktgen_sample05_flow_per_thread.sh      | 2 +-
 9 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'samples')

diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index 8365c4e5c513..c018d67da1a1 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -22,6 +22,7 @@ across the sample scripts.  Usage example is printed on errors::
   -m : ($DST_MAC)   destination MAC-addr
   -t : ($THREADS)   threads to start
   -c : ($SKB_CLONE) SKB clones send before alloc new SKB
+  -n : ($COUNT)     num messages to send per thread, 0 means indefinitely
   -b : ($BURST)     HW level bursting of SKBs
   -v : ($VERBOSE)   verbose
   -x : ($DEBUG)     debug
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index f70ea7dd5660..036147594a20 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -11,6 +11,7 @@ function usage() {
     echo "  -m : (\$DST_MAC)   destination MAC-addr"
     echo "  -t : (\$THREADS)   threads to start"
     echo "  -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
+    echo "  -n : (\$COUNT)     num messages to send per thread, 0 means indefinitely"
     echo "  -b : (\$BURST)     HW level bursting of SKBs"
     echo "  -v : (\$VERBOSE)   verbose"
     echo "  -x : (\$DEBUG)     debug"
@@ -20,7 +21,7 @@ function usage() {
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:t:c:b:vxh6" option; do
+while getopts "s:i:d:m:t:c:n:b:vxh6" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -48,6 +49,10 @@ while getopts "s:i:d:m:t:c:b:vxh6" option; do
 	  export CLONE_SKB=$OPTARG
 	  info "CLONE_SKB=$CLONE_SKB"
           ;;
+        n)
+	  export COUNT=$OPTARG
+	  info "COUNT=$COUNT"
+          ;;
         b)
 	  export BURST=$OPTARG
 	  info "SKB bursting: BURST=$BURST"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index f3e1bedfd77f..d2694a12de61 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -39,10 +39,10 @@ if [ -z "$DEST_IP" ]; then
 fi
 [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$BURST" ] && BURST=1024
+[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
 
 # Base Config
 DELAY="0"        # Zero means max speed
-COUNT="10000000" # Zero means indefinitely
 
 # General cleanup everything since last run
 pg_ctrl "reset"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index cc102e923241..43604c2db726 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -22,10 +22,10 @@ fi
 if [[ -n "$BURST" ]]; then
     err 1 "Bursting not supported for this mode"
 fi
+[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
 
 # Base Config
 DELAY="0"        # Zero means max speed
-COUNT="10000000" # Zero means indefinitely
 
 # General cleanup everything since last run
 pg_ctrl "reset"
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index 29ef4ba50796..35b7fe34bda2 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -20,10 +20,10 @@ fi
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 # Example enforce param "-m" for dst_mac
 [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
+[ -z "$COUNT" ]   && COUNT="100000" # Zero means indefinitely
 
 # Base Config
 DELAY="0"        # Zero means max speed
-COUNT="100000"   # Zero means indefinitely
 
 # Flow variation random source port between min and max
 UDP_MIN=9
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index c88a161d3e6f..164194d1c79b 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -13,9 +13,10 @@ root_check_run_with_sudo "$@"
 # Required param: -i dev in $DEV
 source ${basedir}/parameters.sh
 
+[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
+
 # Base Config
 DELAY="0"        # Zero means max speed
-COUNT="100000"   # Zero means indefinitely
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
 # Flow variation random source port between min and max
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index 80cf8f5ba6b2..e03dd4cd05ce 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -31,10 +31,10 @@ fi
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$BURST" ]     && BURST=32
 [ -z "$CLONE_SKB" ] && CLONE_SKB="100000"
+[ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
 
 # Base Config
 DELAY="0"  # Zero means max speed
-COUNT="0"  # Zero means indefinitely
 
 # General cleanup everything since last run
 pg_ctrl "reset"
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index f60412e445bb..0fc72d2bcd31 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -15,6 +15,7 @@ source ${basedir}/parameters.sh
 [ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+[ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
 
 # NOTICE:  Script specific settings
 # =======
@@ -26,7 +27,6 @@ source ${basedir}/parameters.sh
 
 # Base Config
 DELAY="0"  # Zero means max speed
-COUNT="0"  # Zero means indefinitely
 
 if [[ -n "$BURST" ]]; then
     err 1 "Bursting not supported for this mode"
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 32ad818e2829..f4fb79409fd0 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -20,11 +20,11 @@ source ${basedir}/parameters.sh
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 [ -z "$BURST" ]     && BURST=32
+[ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
 
 
 # Base Config
 DELAY="0"  # Zero means max speed
-COUNT="0"  # Zero means indefinitely
 
 # General cleanup everything since last run
 pg_ctrl "reset"
-- 
cgit v1.2.3


From e0e16672ee9e2c31d9601d762cf934196b13a7fc Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 15 Jun 2017 19:07:22 +0300
Subject: pktgen: Specify the index of first thread

Use "-f <num>", to specify the index of the first
sender thread.
In default first thread is #0.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/pktgen/README.rst                            |  1 +
 samples/pktgen/parameters.sh                         | 20 ++++++++++++++------
 .../pktgen/pktgen_bench_xmit_mode_netif_receive.sh   |  4 ++--
 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh  |  4 ++--
 samples/pktgen/pktgen_sample02_multiqueue.sh         |  4 ++--
 samples/pktgen/pktgen_sample03_burst_single_flow.sh  |  4 ++--
 samples/pktgen/pktgen_sample04_many_flows.sh         |  4 ++--
 samples/pktgen/pktgen_sample05_flow_per_thread.sh    |  4 ++--
 8 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'samples')

diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index c018d67da1a1..ff8929da61c5 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -21,6 +21,7 @@ across the sample scripts.  Usage example is printed on errors::
   -d : ($DEST_IP)   destination IP
   -m : ($DST_MAC)   destination MAC-addr
   -t : ($THREADS)   threads to start
+  -f : ($F_THREAD)  index of first thread (zero indexed CPU number)
   -c : ($SKB_CLONE) SKB clones send before alloc new SKB
   -n : ($COUNT)     num messages to send per thread, 0 means indefinitely
   -b : ($BURST)     HW level bursting of SKBs
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index 036147594a20..3a6244d5f47a 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -10,6 +10,7 @@ function usage() {
     echo "  -d : (\$DEST_IP)   destination IP"
     echo "  -m : (\$DST_MAC)   destination MAC-addr"
     echo "  -t : (\$THREADS)   threads to start"
+    echo "  -f : (\$F_THREAD)  index of first thread (zero indexed CPU number)"
     echo "  -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
     echo "  -n : (\$COUNT)     num messages to send per thread, 0 means indefinitely"
     echo "  -b : (\$BURST)     HW level bursting of SKBs"
@@ -21,7 +22,7 @@ function usage() {
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:t:c:n:b:vxh6" option; do
+while getopts "s:i:d:m:f:t:c:n:b:vxh6" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -39,11 +40,13 @@ while getopts "s:i:d:m:t:c:n:b:vxh6" option; do
           export DST_MAC=$OPTARG
 	  info "Destination MAC set to: DST_MAC=$DST_MAC"
           ;;
+        f)
+	  export F_THREAD=$OPTARG
+	  info "Index of first thread (zero indexed CPU number): $F_THREAD"
+          ;;
         t)
 	  export THREADS=$OPTARG
-          export CPU_THREADS=$OPTARG
-	  let "CPU_THREADS -= 1"
-	  info "Number of threads to start: $THREADS (0 to $CPU_THREADS)"
+	  info "Number of threads to start: $THREADS"
           ;;
         c)
 	  export CLONE_SKB=$OPTARG
@@ -82,12 +85,17 @@ if [ -z "$PKT_SIZE" ]; then
     info "Default packet size set to: set to: $PKT_SIZE bytes"
 fi
 
+if [ -z "$F_THREAD" ]; then
+    # First thread (F_THREAD) reference the zero indexed CPU number
+    export F_THREAD=0
+fi
+
 if [ -z "$THREADS" ]; then
-    # Zero CPU threads means one thread, because CPU numbers are zero indexed
-    export CPU_THREADS=0
     export THREADS=1
 fi
 
+export L_THREAD=$(( THREADS + F_THREAD - 1 ))
+
 if [ -z "$DEV" ]; then
     usage
     err 2 "Please specify output device"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index d2694a12de61..e5bfe759a0fb 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -48,7 +48,7 @@ DELAY="0"        # Zero means max speed
 pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     # The device name is extended with @name, using thread number to
     # make then unique, but any name will do.
     dev=${DEV}@${thread}
@@ -81,7 +81,7 @@ pg_ctrl "start"
 echo "Done" >&2
 
 # Print results
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
     echo "Device: $dev"
     cat /proc/net/pktgen/$dev | grep -A2 "Result:"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index 43604c2db726..1ad878e95539 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -31,7 +31,7 @@ DELAY="0"        # Zero means max speed
 pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     # The device name is extended with @name, using thread number to
     # make then unique, but any name will do.
     dev=${DEV}@${thread}
@@ -61,7 +61,7 @@ pg_ctrl "start"
 echo "Done" >&2
 
 # Print results
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
     echo "Device: $dev"
     cat /proc/net/pktgen/$dev | grep -A2 "Result:"
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index 164194d1c79b..cbdd3e2bceff 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -33,7 +33,7 @@ fi
 pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     # The device name is extended with @name, using thread number to
     # make then unique, but any name will do.
     dev=${DEV}@${thread}
@@ -71,7 +71,7 @@ pg_ctrl "start"
 echo "Done" >&2
 
 # Print results
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
     echo "Device: $dev"
     cat /proc/net/pktgen/$dev | grep -A2 "Result:"
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index e03dd4cd05ce..8d26e0ca683d 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -40,7 +40,7 @@ DELAY="0"  # Zero means max speed
 pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
@@ -71,7 +71,7 @@ done
 # Run if user hits control-c
 function control_c() {
     # Print results
-    for ((thread = 0; thread < $THREADS; thread++)); do
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	dev=${DEV}@${thread}
 	echo "Device: $dev"
 	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 0fc72d2bcd31..497fb7520464 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -36,7 +36,7 @@ fi
 pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
@@ -78,7 +78,7 @@ done
 # Run if user hits control-c
 function print_result() {
     # Print results
-    for ((thread = 0; thread < $THREADS; thread++)); do
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	dev=${DEV}@${thread}
 	echo "Device: $dev"
 	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index f4fb79409fd0..ac9cfd6b2c0a 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -30,7 +30,7 @@ DELAY="0"  # Zero means max speed
 pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
-for ((thread = 0; thread < $THREADS; thread++)); do
+for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
@@ -66,7 +66,7 @@ done
 # Run if user hits control-c
 function print_result() {
     # Print results
-    for ((thread = 0; thread < $THREADS; thread++)); do
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	dev=${DEV}@${thread}
 	echo "Device: $dev"
 	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-- 
cgit v1.2.3


From 00a3855d68bd1ae633c94b13b8b404a5a238b135 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 21 Jun 2017 13:48:27 -0700
Subject: samples/bpf: fix a build problem

tracex5_kern.c build failed with the following error message:
  ../samples/bpf/tracex5_kern.c:12:10: fatal error: 'syscall_nrs.h' file not found
  #include "syscall_nrs.h"
The generated file syscall_nrs.h is put in build/samples/bpf directory,
but this directory is not in include path, hence build failed.

The fix is to add $(obj) into the clang compilation path.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index a0561dc762fe..e7ec9b8539a5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -197,7 +197,7 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
 # But, there is no easy way to fix it, so just exclude it since it is
 # useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
-	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
 		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-Wno-compare-distinct-pointer-types \
 		-Wno-gnu-variable-sized-type-not-at-end \
-- 
cgit v1.2.3


From a8744f2528c14e4545c6071b6681ab17607be2fa Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 27 Jun 2017 23:08:35 -0700
Subject: bpf: Add test for syscall on fd array/htab lookup

Checks are added to the existing sockex3 and test_map_in_map test.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/sockex3_user.c         | 15 ++++++++++++++-
 samples/bpf/test_map_in_map_user.c | 17 +++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'samples')

diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index b5524d417eb5..877ecf8fc5ac 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -8,6 +8,10 @@
 #include <arpa/inet.h>
 #include <sys/resource.h>
 
+#define PARSE_IP 3
+#define PARSE_IP_PROG_FD (prog_fd[0])
+#define PROG_ARRAY_FD (map_fd[0])
+
 struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
@@ -28,7 +32,9 @@ int main(int argc, char **argv)
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	char filename[256];
 	FILE *f;
-	int i, sock;
+	int i, sock, err, id, key = PARSE_IP;
+	struct bpf_prog_info info = {};
+	uint32_t info_len = sizeof(info);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	setrlimit(RLIMIT_MEMLOCK, &r);
@@ -38,6 +44,13 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	/* Test fd array lookup which returns the id of the bpf_prog */
+	err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
+	assert(!err);
+	err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
+	assert(!err);
+	assert(id == info.id);
+
 	sock = open_raw_sock("lo");
 
 	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index f62fdc2bd428..1aca18539d8d 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -32,6 +32,20 @@ static const char * const test_names[] = {
 
 #define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
 
+static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
+{
+	struct bpf_map_info info = {};
+	uint32_t info_len = sizeof(info);
+	int ret, id;
+
+	ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len);
+	assert(!ret);
+
+	ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id);
+	assert(!ret);
+	assert(id == info.id);
+}
+
 static void populate_map(uint32_t port_key, int magic_result)
 {
 	int ret;
@@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result)
 
 	ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY);
 	assert(!ret);
+	check_map_id(PORT_A, A_OF_PORT_A, port_key);
 
 	ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST);
 	assert(!ret);
+	check_map_id(PORT_A, H_OF_PORT_A, port_key);
 
 	ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST);
 	assert(!ret);
+	check_map_id(PORT_H, H_OF_PORT_H, port_key);
 }
 
 static void test_map_in_map(void)
-- 
cgit v1.2.3


From 40304b2a1567fecc321f640ee4239556dd0f3ee0 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:40 -0700
Subject: bpf: BPF support for sock_ops

Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding
struct that allows BPF programs of this type to access some of the
socket's fields (such as IP addresses, ports, etc.). It uses the
existing bpf cgroups infrastructure so the programs can be attached per
cgroup with full inheritance support. The program will be called at
appropriate times to set relevant connections parameters such as buffer
sizes, SYN and SYN-ACK RTOs, etc., based on connection information such
as IP addresses, port numbers, etc.

Alghough there are already 3 mechanisms to set parameters (sysctls,
route metrics and setsockopts), this new mechanism provides some
distinct advantages. Unlike sysctls, it can set parameters per
connection. In contrast to route metrics, it can also use port numbers
and information provided by a user level program. In addition, it could
set parameters probabilistically for evaluation purposes (i.e. do
something different on 10% of the flows and compare results with the
other 90% of the flows). Also, in cases where IPv6 addresses contain
geographic information, the rules to make changes based on the distance
(or RTT) between the hosts are much easier than route metric rules and
can be global. Finally, unlike setsockopt, it oes not require
application changes and it can be updated easily at any time.

Although the bpf cgroup framework already contains a sock related
program type (BPF_PROG_TYPE_CGROUP_SOCK), I created the new type
(BPF_PROG_TYPE_SOCK_OPS) beccause the existing type expects to be called
only once during the connections's lifetime. In contrast, the new
program type will be called multiple times from different places in the
network stack code.  For example, before sending SYN and SYN-ACKs to set
an appropriate timeout, when the connection is established to set
congestion control, etc. As a result it has "op" field to specify the
type of operation requested.

The purpose of this new program type is to simplify setting connection
parameters, such as buffer sizes, TCP's SYN RTO, etc. For example, it is
easy to use facebook's internal IPv6 addresses to determine if both hosts
of a connection are in the same datacenter. Therefore, it is easy to
write a BPF program to choose a small SYN RTO value when both hosts are
in the same datacenter.

This patch only contains the framework to support the new BPF program
type, following patches add the functionality to set various connection
parameters.

This patch defines a new BPF program type: BPF_PROG_TYPE_SOCKET_OPS
and a new bpf syscall command to load a new program of this type:
BPF_PROG_LOAD_SOCKET_OPS.

Two new corresponding structs (one for the kernel one for the user/BPF
program):

/* kernel version */
struct bpf_sock_ops_kern {
        struct sock *sk;
        __u32  op;
        union {
                __u32 reply;
                __u32 replylong[4];
        };
};

/* user version
 * Some fields are in network byte order reflecting the sock struct
 * Use the bpf_ntohl helper macro in samples/bpf/bpf_endian.h to
 * convert them to host byte order.
 */
struct bpf_sock_ops {
        __u32 op;
        union {
                __u32 reply;
                __u32 replylong[4];
        };
        __u32 family;
        __u32 remote_ip4;     /* In network byte order */
        __u32 local_ip4;      /* In network byte order */
        __u32 remote_ip6[4];  /* In network byte order */
        __u32 local_ip6[4];   /* In network byte order */
        __u32 remote_port;    /* In network byte order */
        __u32 local_port;     /* In host byte horder */
};

Currently there are two types of ops. The first type expects the BPF
program to return a value which is then used by the caller (or a
negative value to indicate the operation is not supported). The second
type expects state changes to be done by the BPF program, for example
through a setsockopt BPF helper function, and they ignore the return
value.

The reply fields of the bpf_sockt_ops struct are there in case a bpf
program needs to return a value larger than an integer.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_load.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'samples')

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index a91c57dd8571..a4be7cfa6519 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
 	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_CGROUP_SKB;
 	} else if (is_cgroup_sk) {
 		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	} else if (is_sockops) {
+		prog_type = BPF_PROG_TYPE_SOCK_OPS;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket) {
-		event += 6;
+	if (is_socket || is_sockops) {
+		if (is_socket)
+			event += 6;
+		else
+			event += 7;
 		if (*event != '/')
 			return 0;
 		event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "xdp", 3) == 0 ||
 		    memcmp(shname, "perf_event", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0 ||
-		    memcmp(shname, "cgroup/", 7) == 0)
+		    memcmp(shname, "cgroup/", 7) == 0 ||
+		    memcmp(shname, "sockops", 7) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
 
-- 
cgit v1.2.3


From ae16189efb0fe2bdc5e702bd6221ed6d0ff5babd Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:41 -0700
Subject: bpf: program to load and attach sock_ops BPF progs

The program load_sock_ops can be used to load sock_ops bpf programs and
to attach it to an existing (v2) cgroup. It can also be used to detach
sock_ops programs.

Examples:
    load_sock_ops [-l] <cg-path> <prog filename>
	Load and attaches a sock_ops program at the specified cgroup.
	If "-l" is used, the program will continue to run to output the
	BPF log buffer.
	If the specified filename does not end in ".o", it appends
	"_kern.o" to the name.

    load_sock_ops -r <cg-path>
	Detaches the currently attached sock_ops program from the
	specified cgroup.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile        |  3 ++
 samples/bpf/load_sock_ops.c | 97 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 samples/bpf/load_sock_ops.c

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index e7ec9b8539a5..015589b50c37 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist
 hostprogs-y += xdp_tx_iptunnel
 hostprogs-y += test_map_in_map
 hostprogs-y += per_socket_stats_example
+hostprogs-y += load_sock_ops
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
 tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
 tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
 tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
+load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
 test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
 trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
 lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
@@ -130,6 +132,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_tracex6 += -lelf
 HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
+HOSTLOADLIBES_load_sock_ops += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c
new file mode 100644
index 000000000000..e5da6cf71a3e
--- /dev/null
+++ b/samples/bpf/load_sock_ops.c
@@ -0,0 +1,97 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+
+static void usage(char *pname)
+{
+	printf("USAGE:\n  %s [-l] <cg-path> <prog filename>\n", pname);
+	printf("\tLoad and attach a sock_ops program to the specified "
+	       "cgroup\n");
+	printf("\tIf \"-l\" is used, the program will continue to run\n");
+	printf("\tprinting the BPF log buffer\n");
+	printf("\tIf the specified filename does not end in \".o\", it\n");
+	printf("\tappends \"_kern.o\" to the name\n");
+	printf("\n");
+	printf("  %s -r <cg-path>\n", pname);
+	printf("\tDetaches the currently attached sock_ops program\n");
+	printf("\tfrom the specified cgroup\n");
+	printf("\n");
+	exit(1);
+}
+
+int main(int argc, char **argv)
+{
+	int logFlag = 0;
+	int error = 0;
+	char *cg_path;
+	char fn[500];
+	char *prog;
+	int cg_fd;
+
+	if (argc < 3)
+		usage(argv[0]);
+
+	if (!strcmp(argv[1], "-r")) {
+		cg_path = argv[2];
+		cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+		error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+		if (error) {
+			printf("ERROR: bpf_prog_detach: %d (%s)\n",
+			       error, strerror(errno));
+			return 2;
+		}
+		return 0;
+	} else if (!strcmp(argv[1], "-h")) {
+		usage(argv[0]);
+	} else if (!strcmp(argv[1], "-l")) {
+		logFlag = 1;
+		if (argc < 4)
+			usage(argv[0]);
+	}
+
+	prog = argv[argc - 1];
+	cg_path = argv[argc - 2];
+	if (strlen(prog) > 480) {
+		fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
+		return 3;
+	}
+	cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+
+	if (!strcmp(prog + strlen(prog)-2, ".o"))
+		strcpy(fn, prog);
+	else
+		sprintf(fn, "%s_kern.o", prog);
+	if (logFlag)
+		printf("loading bpf file:%s\n", fn);
+	if (load_bpf_file(fn)) {
+		printf("ERROR: load_bpf_file failed for: %s\n", fn);
+		printf("%s", bpf_log_buf);
+		return 4;
+	}
+	if (logFlag)
+		printf("TCP BPF Loaded %s\n", fn);
+
+	error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (error) {
+		printf("ERROR: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		return 5;
+	} else if (logFlag) {
+		read_trace_pipe();
+	}
+
+	return error;
+}
-- 
cgit v1.2.3


From 61bc4d8daa7af018d7ea084ea38648828a5a90d5 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:43 -0700
Subject: bpf: Sample bpf program to set SYN/SYN-ACK RTOs

The sample BPF program, tcp_synrto_kern.c, sets the SYN and SYN-ACK
RTOs to 10ms when both hosts are within the same datacenter (i.e.
small RTTs) in an environment where common IPv6 prefixes indicate
both hosts are in the same data center.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile          |  1 +
 samples/bpf/tcp_synrto_kern.c | 69 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 samples/bpf/tcp_synrto_kern.c

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 015589b50c37..e29370a19957 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -113,6 +113,7 @@ always += lwt_len_hist_kern.o
 always += xdp_tx_iptunnel_kern.o
 always += test_map_in_map_kern.o
 always += cookie_uid_helper_example.o
+always += tcp_synrto_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
new file mode 100644
index 000000000000..3c3fc83d81cb
--- /dev/null
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses
+ * and the first 5.5 bytes of the IPv6 addresses are the same (in this example
+ * that means both hosts are in the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_synrto(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 &&
+	    skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Check for TIMEOUT_INIT operation and IPv6 addresses */
+	if (op == BPF_SOCK_OPS_TIMEOUT_INIT &&
+		skops->family == AF_INET6) {
+
+		/* If the first 5.5 bytes of the IPv6 address are the same
+		 * then both hosts are in the same datacenter
+		 * so use an RTO of 10ms
+		 */
+		if (skops->local_ip6[0] == skops->remote_ip6[0] &&
+		    (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+		    (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000))
+			rv = 10;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From c400296bf63d234899cf2448cd3d17199676f188 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:45 -0700
Subject: bpf: Sample bpf program to set initial window

The sample bpf program, tcp_rwnd_kern.c, sets the initial
advertized window to 40 packets in an environment where
distinct IPv6 prefixes indicate that both hosts are not
in the same data center.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile        |  1 +
 samples/bpf/tcp_rwnd_kern.c | 69 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 samples/bpf/tcp_rwnd_kern.c

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index e29370a19957..ca955280cbeb 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -114,6 +114,7 @@ always += xdp_tx_iptunnel_kern.o
 always += test_map_in_map_kern.o
 always += cookie_uid_helper_example.o
 always += tcp_synrto_kern.o
+always += tcp_rwnd_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
new file mode 100644
index 000000000000..3f2a228f81ce
--- /dev/null
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets when using IPv6
+ * and the first 5.5 bytes of the IPv6 addresses are not the same (in this
+ * example that means both hosts are not the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_rwnd(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) !=
+	    55601 && skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Check for RWND_INIT operation and IPv6 addresses */
+	if (op == BPF_SOCK_OPS_RWND_INIT &&
+		skops->family == AF_INET6) {
+
+		/* If the first 5.5 bytes of the IPv6 address are not the same
+		 * then both hosts are not in the same datacenter
+		 * so use a larger initial advertized window (40 packets)
+		 */
+		if (skops->local_ip6[0] != skops->remote_ip6[0] ||
+		    (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) !=
+		    (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000))
+			rv = 40;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 8c4b4c7e9ff0447995750d9329949fa082520269 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:46 -0700
Subject: bpf: Add setsockopt helper function to bpf

Added support for calling a subset of socket setsockopts from
BPF_PROG_TYPE_SOCK_OPS programs. The code was duplicated rather
than making the changes to call the socket setsockopt function because
the changes required would have been larger.

The ops supported are:
  SO_RCVBUF
  SO_SNDBUF
  SO_MAX_PACING_RATE
  SO_PRIORITY
  SO_RCVLOWAT
  SO_MARK

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_helpers.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'samples')

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f4840b8bb8f9..d50ac342dc92 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -60,6 +60,9 @@ static unsigned long long (*bpf_get_prandom_u32)(void) =
 	(void *) BPF_FUNC_get_prandom_u32;
 static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 	(void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+			     int optlen) =
+	(void *) BPF_FUNC_setsockopt;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
-- 
cgit v1.2.3


From d9925368a641391f38cd281e67b948e6b6f3bcca Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:48 -0700
Subject: bpf: Sample BPF program to set buffer sizes

This patch contains a BPF program to set initial receive window to
40 packets and send and receive buffers to 1.5MB. This would usually
be done after doing appropriate checks that indicate the hosts are
far enough away (i.e. large RTT).

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile        |  1 +
 samples/bpf/tcp_bufs_kern.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 samples/bpf/tcp_bufs_kern.c

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ca955280cbeb..3b300db3d74d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -115,6 +115,7 @@ always += test_map_in_map_kern.o
 always += cookie_uid_helper_example.o
 always += tcp_synrto_kern.o
 always += tcp_rwnd_kern.o
+always += tcp_bufs_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
new file mode 100644
index 000000000000..ee83bbabd17c
--- /dev/null
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets and send
+ * and receive buffers to 1.5MB. This would usually be done after
+ * doing appropriate checks that indicate the hosts are far enough
+ * away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_bufs(struct bpf_sock_ops *skops)
+{
+	int bufsize = 1500000;
+	int rwnd_init = 40;
+	int rv = 0;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 &&
+	    skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+
+	/* Usually there would be a check to insure the hosts are far
+	 * from each other so it makes sense to increase buffer sizes
+	 */
+	switch (op) {
+	case BPF_SOCK_OPS_RWND_INIT:
+		rv = rwnd_init;
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		/* Set sndbuf and rcvbuf of active connections */
+		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+				    sizeof(bufsize));
+		rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+					     &bufsize,