diff options
| author | Ingo Molnar <mingo@kernel.org> | 2017-07-30 11:15:13 +0200 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2017-07-30 11:15:13 +0200 |
| commit | f5db340f19f14a8df9dfd22d71fba1513e9f1f7e (patch) | |
| tree | 131d3345bc987aee3c922624de816492e7f323a4 /samples | |
| parent | ee438ec8f33c5af0d4a4ffb935c5b9272e8c2680 (diff) | |
| parent | 38115f2f8cec8087d558c062e779c443a01f87d6 (diff) | |
| download | linux-f5db340f19f14a8df9dfd22d71fba1513e9f1f7e.tar.gz linux-f5db340f19f14a8df9dfd22d71fba1513e9f1f7e.tar.bz2 linux-f5db340f19f14a8df9dfd22d71fba1513e9f1f7e.zip | |
Merge branch 'perf/urgent' into perf/core, to pick up latest fixes and refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'samples')
27 files changed, 960 insertions, 288 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6c7468eb3684..87246be6feb8 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example +hostprogs-y += load_sock_ops # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o @@ -111,6 +113,12 @@ always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o always += test_map_in_map_kern.o always += cookie_uid_helper_example.o +always += tcp_synrto_kern.o +always += tcp_rwnd_kern.o +always += tcp_bufs_kern.o +always += tcp_cong_kern.o +always += tcp_iw_kern.o +always += tcp_clamp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -130,6 +138,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf +HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf @@ -160,6 +169,17 @@ clean: $(MAKE) -C ../../ M=$(CURDIR) clean @rm -f *~ +$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c + $(call if_changed_dep,cc_s_c) + +$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE + $(call filechk,offsets,__SYSCALL_NRS_H__) + +clean-files += syscall_nrs.h + +FORCE: + + # Verify LLVM compiler tools are available and bpf target is supported by llc .PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) @@ -180,11 +200,14 @@ verify_target_bpf: verify_cmds $(src)/*.c: verify_target_bpf +$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c - $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ + -I$(srctree)/tools/testing/selftests/bpf/ \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h deleted file mode 100644 index 9a9c95f2c9fb..000000000000 --- a/samples/bpf/bpf_helpers.h +++ /dev/null @@ -1,181 +0,0 @@ -#ifndef __BPF_HELPERS_H -#define __BPF_HELPERS_H - -/* helper macro to place programs, maps, license in - * different sections in elf_bpf file. Section names - * are interpreted by elf_bpf loader - */ -#define SEC(NAME) __attribute__((section(NAME), used)) - -/* helper functions called from eBPF programs written in C */ -static void *(*bpf_map_lookup_elem)(void *map, void *key) = - (void *) BPF_FUNC_map_lookup_elem; -static int (*bpf_map_update_elem)(void *map, void *key, void *value, - unsigned long long flags) = - (void *) BPF_FUNC_map_update_elem; -static int (*bpf_map_delete_elem)(void *map, void *key) = - (void *) BPF_FUNC_map_delete_elem; -static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = - (void *) BPF_FUNC_probe_read; -static unsigned long long (*bpf_ktime_get_ns)(void) = - (void *) BPF_FUNC_ktime_get_ns; -static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = - (void *) BPF_FUNC_trace_printk; -static void (*bpf_tail_call)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_tail_call; -static unsigned long long (*bpf_get_smp_processor_id)(void) = - (void *) BPF_FUNC_get_smp_processor_id; -static unsigned long long (*bpf_get_current_pid_tgid)(void) = - (void *) BPF_FUNC_get_current_pid_tgid; -static unsigned long long (*bpf_get_current_uid_gid)(void) = - (void *) BPF_FUNC_get_current_uid_gid; -static int (*bpf_get_current_comm)(void *buf, int buf_size) = - (void *) BPF_FUNC_get_current_comm; -static int (*bpf_perf_event_read)(void *map, int index) = - (void *) BPF_FUNC_perf_event_read; -static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = - (void *) BPF_FUNC_clone_redirect; -static int (*bpf_redirect)(int ifindex, int flags) = - (void *) BPF_FUNC_redirect; -static int (*bpf_perf_event_output)(void *ctx, void *map, - unsigned long long flags, void *data, - int size) = - (void *) BPF_FUNC_perf_event_output; -static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = - (void *) BPF_FUNC_get_stackid; -static int (*bpf_probe_write_user)(void *dst, void *src, int size) = - (void *) BPF_FUNC_probe_write_user; -static int (*bpf_current_task_under_cgroup)(void *map, int index) = - (void *) BPF_FUNC_current_task_under_cgroup; -static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_get_tunnel_key; -static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_set_tunnel_key; -static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_get_tunnel_opt; -static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_set_tunnel_opt; -static unsigned long long (*bpf_get_prandom_u32)(void) = - (void *) BPF_FUNC_get_prandom_u32; -static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_head; - -/* llvm builtin functions that eBPF C program may use to - * emit BPF_LD_ABS and BPF_LD_IND instructions - */ -struct sk_buff; -unsigned long long load_byte(void *skb, - unsigned long long off) asm("llvm.bpf.load.byte"); -unsigned long long load_half(void *skb, - unsigned long long off) asm("llvm.bpf.load.half"); -unsigned long long load_word(void *skb, - unsigned long long off) asm("llvm.bpf.load.word"); - -/* a helper structure used by eBPF C program - * to describe map attributes to elf_bpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; -}; - -static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = - (void *) BPF_FUNC_skb_load_bytes; -static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = - (void *) BPF_FUNC_skb_store_bytes; -static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l3_csum_replace; -static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l4_csum_replace; -static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_skb_under_cgroup; -static int (*bpf_skb_change_head)(void *, int len, int flags) = - (void *) BPF_FUNC_skb_change_head; - -#if defined(__x86_64__) - -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) - -#elif defined(__s390x__) - -#define PT_REGS_PARM1(x) ((x)->gprs[2]) -#define PT_REGS_PARM2(x) ((x)->gprs[3]) -#define PT_REGS_PARM3(x) ((x)->gprs[4]) -#define PT_REGS_PARM4(x) ((x)->gprs[5]) -#define PT_REGS_PARM5(x) ((x)->gprs[6]) -#define PT_REGS_RET(x) ((x)->gprs[14]) -#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->gprs[2]) -#define PT_REGS_SP(x) ((x)->gprs[15]) -#define PT_REGS_IP(x) ((x)->psw.addr) - -#elif defined(__aarch64__) - -#define PT_REGS_PARM1(x) ((x)->regs[0]) -#define PT_REGS_PARM2(x) ((x)->regs[1]) -#define PT_REGS_PARM3(x) ((x)->regs[2]) -#define PT_REGS_PARM4(x) ((x)->regs[3]) -#define PT_REGS_PARM5(x) ((x)->regs[4]) -#define PT_REGS_RET(x) ((x)->regs[30]) -#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[0]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->pc) - -#elif defined(__powerpc__) - -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#elif defined(__sparc__) - -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) -#if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#else -#define PT_REGS_IP(x) ((x)->pc) -#endif - -#endif - -#ifdef __powerpc__ -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(__sparc__) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#else -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) -#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) -#endif - -#endif diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 74456b3eb89a..899f40310bc3 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + bool is_sockops = strncmp(event, "sockops", 7) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_CGROUP_SKB; } else if (is_cgroup_sk) { prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + } else if (is_sockops) { + prog_type = BPF_PROG_TYPE_SOCK_OPS; } else { printf("Unknown event '%s'\n", event); return -1; @@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket) { - event += 6; + if (is_socket || is_sockops) { + if (is_socket) + event += 6; + else + event += 7; if (*event != '/') return 0; event++; @@ -516,16 +522,18 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) processed_sec[maps_shndx] = true; } - /* load programs that need map fixup (relocations) */ + /* process all relo sections, and rewrite bpf insns for maps */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) continue; if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) continue; + if (shdr.sh_type == SHT_REL) { struct bpf_insn *insns; + /* locate prog sec that need map fixup (relocations) */ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, &shdr_prog, &data_prog)) continue; @@ -535,26 +543,15 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) continue; insns = (struct bpf_insn *) data_prog->d_buf; - - processed_sec[shdr.sh_info] = true; - processed_sec[i] = true; + processed_sec[i] = true; /* relo section */ if (parse_relo_and_apply(data, symbols, &shdr, insns, map_data, nr_maps)) continue; - - if (memcmp(shname_prog, "kprobe/", 7) == 0 || - memcmp(shname_prog, "kretprobe/", 10) == 0 || - memcmp(shname_prog, "tracepoint/", 11) == 0 || - memcmp(shname_prog, "xdp", 3) == 0 || - memcmp(shname_prog, "perf_event", 10) == 0 || - memcmp(shname_prog, "socket", 6) == 0 || - memcmp(shname_prog, "cgroup/", 7) == 0) - load_and_attach(shname_prog, insns, data_prog->d_size); } } - /* load programs that don't use maps */ + /* load programs */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) @@ -569,8 +566,13 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0 || - memcmp(shname, "cgroup/", 7) == 0) - load_and_attach(shname, data->d_buf, data->d_size); + memcmp(shname, "cgroup/", 7) == 0 || + memcmp(shname, "sockops", 7) == 0) { + ret = load_and_attach(shname, data->d_buf, + data->d_size); + if (ret != 0) + goto done; + } } ret = 0; diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c new file mode 100644 index 000000000000..e5da6cf71a3e --- /dev/null +++ b/samples/bpf/load_sock_ops.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> + +static void usage(char *pname) +{ + printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname); + printf("\tLoad and attach a sock_ops program to the specified " + "cgroup\n"); + printf("\tIf \"-l\" is used, the program will continue to run\n"); + printf("\tprinting the BPF log buffer\n"); + printf("\tIf the specified filename does not end in \".o\", it\n"); + printf("\tappends \"_kern.o\" to the name\n"); + printf("\n"); + printf(" %s -r <cg-path>\n", pname); + printf("\tDetaches the currently attached sock_ops program\n"); + printf("\tfrom the specified cgroup\n"); + printf("\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int logFlag = 0; + int error = 0; + char *cg_path; + char fn[500]; + char *prog; + int cg_fd; + + if (argc < 3) + usage(argv[0]); + + if (!strcmp(argv[1], "-r")) { + cg_path = argv[2]; + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + if (error) { + printf("ERROR: bpf_prog_detach: %d (%s)\n", + error, strerror(errno)); + return 2; + } + return 0; + } else if (!strcmp(argv[1], "-h")) { + usage(argv[0]); + } else if (!strcmp(argv[1], "-l")) { + logFlag = 1; + if (argc < 4) + usage(argv[0]); + } + + prog = argv[argc - 1]; + cg_path = argv[argc - 2]; + if (strlen(prog) > 480) { + fprintf(stderr, "ERROR: program name too long (> 480 chars)\n"); + return 3; + } + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + + if (!strcmp(prog + strlen(prog)-2, ".o")) + strcpy(fn, prog); + else + sprintf(fn, "%s_kern.o", prog); + if (logFlag) + printf("loading bpf file:%s\n", fn); + if (load_bpf_file(fn)) { + printf("ERROR: load_bpf_file failed for: %s\n", fn); + printf("%s", bpf_log_buf); + return 4; + } + if (logFlag) + printf("TCP BPF Loaded %s\n", fn); + + error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (error) { + printf("ERROR: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + return 5; + } else if (logFlag) { + read_trace_pipe(); + } + + return error; +} diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index b5524d417eb5..877ecf8fc5ac 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -8,6 +8,10 @@ #include <arpa/inet.h> #include <sys/resource.h> +#define PARSE_IP 3 +#define PARSE_IP_PROG_FD (prog_fd[0]) +#define PROG_ARRAY_FD (map_fd[0]) + struct bpf_flow_keys { __be32 src; __be32 dst; @@ -28,7 +32,9 @@ int main(int argc, char **argv) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; - int i, sock; + int i, sock, err, id, key = PARSE_IP; + struct bpf_prog_info info = {}; + uint32_t info_len = sizeof(info); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); setrlimit(RLIMIT_MEMLOCK, &r); @@ -38,6 +44,13 @@ int main(int argc, char **argv) return 1; } + /* Test fd array lookup which returns the id of the bpf_prog */ + err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len); + assert(!err); + err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id); + assert(!err); + assert(id == info.id); + sock = open_raw_sock("lo"); assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4], diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c new file mode 100644 index 000000000000..ce2a30b11248 --- /dev/null +++ b/samples/bpf/syscall_nrs.c @@ -0,0 +1,12 @@ +#include <uapi/linux/unistd.h> +#include <linux/kbuild.h> + +#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR) + +void syscall_defines(void) +{ + COMMENT("Linux system call numbers."); + SYSNR(__NR_write); + SYSNR(__NR_read); + SYSNR(__NR_mmap); +} diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c new file mode 100644 index 000000000000..ee83bbabd17c --- /dev/null +++ b/samples/bpf/tcp_bufs_kern.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets and send + * and receive buffers to 1.5MB. This would usually be done after + * doing appropriate checks that indicate the hosts are far enough + * away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_bufs(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Nothing to do */ + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c new file mode 100644 index 000000000000..d68eadd9ca2d --- /dev/null +++ b/samples/bpf/tcp_clamp_kern.c @@ -0,0 +1,102 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp + * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within + * the same datacenter. For his example, we assume they are within the same + * datacenter when the first 5.5 bytes of their IPv6 addresses are the same. + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_clamp(struct bpf_sock_ops *skops) +{ + int bufsize = 150000; + int to_init = 10; + int clamp = 100; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check that both hosts are within same datacenter. For this example + * it is the case when the first 5.5 bytes of their IPv6 addresses are + * the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + rv = to_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, + &bufsize, sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c new file mode 100644 index 000000000000..dac15bce1fa9 --- /dev/null +++ b/samples/bpf/tcp_cong_kern.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set congestion control to dctcp when both hosts are + * in the same datacenter (as deteremined by IPv6 prefix). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_cong(struct bpf_sock_ops *skops) +{ + char cong[] = "dctcp"; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check if both hosts are in the same datacenter. For this + * example they are if the 1st 5.5 bytes in the IPv6 address + * are the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_NEEDS_ECN: + rv = 1; + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c new file mode 100644 index 000000000000..23c5122ef819 --- /dev/null +++ b/samples/bpf/tcp_iw_kern.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial congestion window and initial receive + * window to 40 packets and send and receive buffers to 1.5MB. This + * would usually be done after doing appropriate checks that indicate + * the hosts are far enough away (i.e. large RTT). + * |
