diff options
Diffstat (limited to 'samples')
39 files changed, 2216 insertions, 971 deletions
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 8ae4940025f8..dbb817dbacfc 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -1,7 +1,6 @@ cpustat fds_example lathist -load_sock_ops lwt_len_hist map_perf_test offwaketime diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index db1a91dfa702..65e667bdf979 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -40,7 +40,6 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example -hostprogs-y += load_sock_ops hostprogs-y += xdp_redirect hostprogs-y += xdp_redirect_map hostprogs-y += xdp_redirect_cpu @@ -53,6 +52,7 @@ hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query hostprogs-y += xdp_sample_pkts +hostprogs-y += hbm # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -60,9 +60,9 @@ LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o -fds_example-objs := bpf_load.o fds_example.o -sockex1-objs := bpf_load.o sockex1_user.o -sockex2-objs := bpf_load.o sockex2_user.o +fds_example-objs := fds_example.o +sockex1-objs := sockex1_user.o +sockex2-objs := sockex2_user.o sockex3-objs := bpf_load.o sockex3_user.o tracex1-objs := bpf_load.o tracex1_user.o tracex2-objs := bpf_load.o tracex2_user.o @@ -71,7 +71,6 @@ tracex4-objs := bpf_load.o tracex4_user.o tracex5-objs := bpf_load.o tracex5_user.o tracex6-objs := bpf_load.o tracex6_user.o tracex7-objs := bpf_load.o tracex7_user.o -load_sock_ops-objs := bpf_load.o load_sock_ops.o test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS) lathist-objs := bpf_load.o lathist_user.o @@ -87,18 +86,18 @@ test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o -xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o +xdp_router_ipv4-objs := xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS) sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o +xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect-objs := bpf_load.o xdp_redirect_user.o -xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o +xdp_redirect-objs := xdp_redirect_user.o +xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o @@ -109,6 +108,7 @@ xdpsock-objs := xdpsock_user.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -163,10 +163,10 @@ always += xdp2skb_meta_kern.o always += syscall_tp_kern.o always += cpustat_kern.o always += xdp_adjust_tail_kern.o -always += xdpsock_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o +always += hbm_out_kern.o KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -266,6 +266,8 @@ $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h +$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h +$(obj)/hbm.o: $(src)/hbm.h # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h index 20dc5cefec84..544237980582 100644 --- a/samples/bpf/bpf_insn.h +++ b/samples/bpf/bpf_insn.h @@ -164,6 +164,16 @@ struct bpf_insn; .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -174,6 +184,16 @@ struct bpf_insn; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh new file mode 100755 index 000000000000..56c8b4115c95 --- /dev/null +++ b/samples/bpf/do_hbm_test.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2019 Facebook +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. + +Usage() { + echo "Script for testing HBM (Host Bandwidth Manager) framework." + echo "It creates a cgroup to use for testing and load a BPF program to limit" + echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" + echo "loads. The output is the goodput in Mbps (unless -D was used)." + echo "" + echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]" + echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]" + echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" + echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]" + echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" + echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" + echo " Where:" + echo " out egress (default)" + echo " -b or --bpf BPF program filename to load and attach." + echo " Default is hbm_out_kern.o for egress," + echo " -c or -cc TCP congestion control (cubic or dctcp)" + echo " --debug print BPF trace buffer" + echo " -d or --delay add a delay in ms using netem" + echo " -D In addition to the goodput in Mbps, it also outputs" + echo " other detailed information. This information is" + echo " test dependent (i.e. iperf3 or netperf)." + echo " -E enable ECN (not required for dctcp)" + echo " -f or --flows number of concurrent flows (default=1)" + echo " -i or --id cgroup id (an integer, default is 1)" + echo " -N use netperf instead of iperf3" + echo " -l do not limit flows using loopback" + echo " -h Help" + echo " -p or --port iperf3 port (default is 5201)" + echo " -P use an iperf3 instance for each flow" + echo " -q use the specified qdisc" + echo " -r or --rate rate in Mbps (default 1s 1Gbps)" + echo " -R Use TCP_RR for netperf. 1st flow has req" + echo " size of 10KB, rest of 1MB. Reply in all" + echo " cases is 1 byte." + echo " More detailed output for each flow can be found" + echo " in the files netperf.<cg>.<flow>, where <cg> is the" + echo " cgroup id as specified with the -i flag, and <flow>" + echo " is the flow id starting at 1 and increasing by 1 for" + echo " flow (as specified by -f)." + echo " -s or --server hostname of netperf server. Used to create netperf" + echo " test traffic between to hosts (default is within host)" + echo " netserver must be running on the host." + echo " -S or --stats whether to update hbm stats (default is yes)." + echo " -t or --time duration of iperf3 in seconds (default=5)" + echo " -w Work conserving flag. cgroup can increase its" + echo " bandwidth beyond the rate limit specified" + echo " while there is available bandwidth. Current" + echo " implementation assumes there is only one NIC" + echo " (eth0), but can be extended to support multiple" + echo " NICs." + echo " cubic or dctcp specify which TCP CC to use" + echo " " + exit +} + +#set -x + +debug_flag=0 +args="$@" +name="$0" +netem=0 +cc=x +dir="-o" +dir_name="out" +dur=5 +flows=1 +id=1 +prog="" +port=5201 +rate=1000 +multi_iperf=0 +flow_cnt=1 +use_netperf=0 +rr=0 +ecn=0 +details=0 +server="" +qdisc="" +flags="" +do_stats=0 + +function start_hbm () { + rm -f hbm.out + echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out + echo " " >> hbm.out + ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 & + echo $! +} + +processArgs () { + for i in $args ; do + case $i in + # Support for upcomming ingress rate limiting + #in) # support for upcoming ingress rate limiting + # dir="-i" + # dir_name="in" + # ;; + out) + dir="-o" + dir_name="out" + ;; + -b=*|--bpf=*) + prog="${i#*=}" + ;; + -c=*|--cc=*) + cc="${i#*=}" + ;; + --debug) + flags="$flags -d" + debug_flag=1 + ;; + -d=*|--delay=*) + netem="${i#*=}" + ;; + -D) + details=1 + ;; + -E) + ecn=1 + ;; + # Support for upcomming fq Early Departure Time egress rate limiting + #--edt) + # prog="hbm_out_edt_kern.o" + # qdisc="fq" + # ;; + -f=*|--flows=*) + flows="${i#*=}" + ;; + -i=*|--id=*) + id="${i#*=}" + ;; + -l) + flags="$flags -l" + ;; + -N) + use_netperf=1 + ;; + -p=*|--port=*) + port="${i#*=}" + ;; + -P) + multi_iperf=1 + ;; + -q=*) + qdisc="${i#*=}" + ;; + -r=*|--rate=*) + rate="${i#*=}" + ;; + -R) + rr=1 + ;; + -s=*|--server=*) + server="${i#*=}" + ;; + -S|--stats) + flags="$flags -s" + do_stats=1 + ;; + -t=*|--time=*) + dur="${i#*=}" + ;; + -w) + flags="$flags -w" + ;; + cubic) + cc=cubic + ;; + dctcp) + cc=dctcp + ;; + *) + echo "Unknown arg:$i" + Usage + ;; + esac + done +} + +processArgs + +if [ $debug_flag -eq 1 ] ; then + rm -f hbm_out.log +fi + +hbm_pid=$(start_hbm) +usleep 100000 + +host=`hostname` +cg_base_dir=/sys/fs/cgroup +cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" + +echo $$ >> $cg_dir/cgroup.procs + +ulimit -l unlimited + +rm -f ss.out +rm -f hbm.[0-9]*.$dir_name +if [ $ecn -ne 0 ] ; then + sysctl -w -q -n net.ipv4.tcp_ecn=1 +fi + +if [ $use_netperf -eq 0 ] ; then + cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` + if [ "$cc" != "x" ] ; then + sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc + fi +fi + +if [ "$netem" -ne "0" ] ; then + if [ "$qdisc" != "" ] ; then + echo "WARNING: Ignoring -q options because -d option used" + fi + tc qdisc del dev lo root > /dev/null 2>&1 + tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 +elif [ "$qdisc" != "" ] ; then + tc qdisc del dev lo root > /dev/null 2>&1 + tc qdisc add dev lo root $qdisc > /dev/null 2>&1 +fi + +n=0 +m=$[$dur * 5] +hn="::1" +if [ $use_netperf -ne 0 ] ; then + if [ "$server" != "" ] ; then + hn=$server + fi +fi + +( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & + +if [ $use_netperf -ne 0 ] ; then + begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ + awk '{ print $1 }'` + if [ "$begNetserverPid" == "" ] ; then + if [ "$server" == "" ] ; then + ( ./netserver > /dev/null 2>&1) & + usleep 100000 + fi + fi + flow_cnt=1 + if [ "$server" == "" ] ; then + np_server=$host + else + np_server=$server + fi + if [ "$cc" == "x" ] ; then + np_cc="" + else + np_cc="-K $cc,$cc" + fi + replySize=1 + while [ $flow_cnt -le $flows ] ; do + if [ $rr -ne 0 ] ; then + reqSize=1M + if [ $flow_cnt -eq 1 ] ; then + reqSize=10K + fi + if [ "$dir" == "-i" ] ; then + replySize=$reqSize + reqSize=1 + fi + ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & + else + if [ "$dir" == "-i" ] ; then + ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & + else + ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & + fi + fi + flow_cnt=$[flow_cnt+1] + done + +# sleep for duration of test (plus some buffer) + n=$[dur+2] + sleep $n + +# force graceful termination of netperf + pids=`pgrep netperf` + for p in $pids ; do + kill -SIGALRM $p + done + + flow_cnt=1 + rate=0 + if [ $details -ne 0 ] ; then + echo "" + echo "Details for HBM in cgroup $id" + if [ $do_stats -eq 1 ] ; then + if [ -e hbm.$id.$dir_name ] ; then + cat hbm.$id.$dir_name + fi + fi + fi + while [ $flow_cnt -le $flows ] ; do + if [ "$dir" == "-i" ] ; then + r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` + else + r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` + fi + echo "rate for flow $flow_cnt: $r" + rate=$[rate+r] + if [ $details -ne 0 ] ; then + echo "-----" + echo "Details for cgroup $id, flow $flow_cnt" + cat netperf.$id.$flow_cnt + fi + flow_cnt=$[flow_cnt+1] + done + if [ $details -ne 0 ] ; then + echo "" + delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` + echo "PING AVG DELAY:$delay" + echo "AGGREGATE_GOODPUT:$rate" + else + echo $rate + fi +elif [ $multi_iperf -eq 0 ] ; then + (iperf3 -s -p $port -1 > /dev/null 2>&1) & + usleep 100000 + iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id + rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` + rate=`echo $rates | grep -o "[0-9]*$"` + + if [ $details -ne 0 ] ; then + echo "" + echo "Details for HBM in cgroup $id" + if [ $do_stats -eq 1 ] ; then + if [ -e hbm.$id.$dir_name ] ; then + cat hbm.$id.$dir_name + fi + fi + delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` + echo "PING AVG DELAY:$delay" + echo "AGGREGATE_GOODPUT:$rate" + else + echo $rate + fi +else + flow_cnt=1 + while [ $flow_cnt -le $flows ] ; do + (iperf3 -s -p $port -1 > /dev/null 2>&1) & + ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & + port=$[port+1] + flow_cnt=$[flow_cnt+1] + done + n=$[dur+1] + sleep $n + flow_cnt=1 + rate=0 + if [ $details -ne 0 ] ; then + echo "" + echo "Details for HBM in cgroup $id" + if [ $do_stats -eq 1 ] ; then + if [ -e hbm.$id.$dir_name ] ; then + cat hbm.$id.$dir_name + fi + fi + fi + + while [ $flow_cnt -le $flows ] ; do + r=`cat iperf3.$id.$flow_cnt` +# echo "rate for flow $flow_cnt: $r" + if [ $details -ne 0 ] ; then + echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" + fi + rate=$[rate+r] + flow_cnt=$[flow_cnt+1] + done + if [ $details -ne 0 ] ; then + delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` + echo "PING AVG DELAY:$delay" + echo "AGGREGATE_GOODPUT:$rate" + else + echo $rate + fi +fi + +if [ $use_netperf -eq 0 ] ; then + sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc +fi +if [ $ecn -ne 0 ] ; then + sysctl -w -q -n net.ipv4.tcp_ecn=0 +fi +if [ "$netem" -ne "0" ] ; then + tc qdisc del dev lo root > /dev/null 2>&1 +fi + +sleep 2 + +hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` +if [ "$hbmPid" == "$hbm_pid" ] ; then + kill $hbm_pid +fi + +sleep 1 + +# Detach any BPF programs that may have lingered +ttx=`bpftool cgroup tree | grep hbm` +v=2 +for x in $ttx ; do + if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then + cg=$x ; v=0 + else + if [ $v -eq 0 ] ; then + id=$x ; v=1 + else + if [ $v -eq 1 ] ; then + type=$x ; bpftool cgroup detach $cg $type id $id + v=0 + fi + fi + fi +done + +if [ $use_netperf -ne 0 ] ; then + if [ "$server" == "" ] ; then + if [ "$begNetserverPid" == "" ] ; then + netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` + if [ "$netserverPid" != "" ] ; then + kill $netserverPid + fi + fi + fi +fi +exit diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 9854854f05d1..e51eb060244e 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -14,8 +14,8 @@ #include <bpf/bpf.h> +#include "bpf/libbpf.h" #include "bpf_insn.h" -#include "bpf_load.h" #include "sock_example.h" #define BPF_F_PIN (1 << 0) @@ -57,10 +57,14 @@ static int bpf_prog_create(const char *object) BPF_EXIT_INSN(), }; size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); + char bpf_log_buf[BPF_LOG_BUF_SIZE]; + struct bpf_object *obj; + int prog_fd; if (object) { - assert(!load_bpf_file((char *)object)); - return prog_fd[0]; + assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC, + &obj, &prog_fd)); + return prog_fd; } else { return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, insns, insns_cnt, "GPL", 0, diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c new file mode 100644 index 000000000000..8408ccb7409f --- /dev/null +++ b/samples/bpf/hbm.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Example program for Host Bandwidth Managment + * + * This program loads a cgroup skb BPF program to enforce cgroup output + * (egress) or input (ingress) bandwidth limits. + * + * USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog] + * Where: + * -d Print BPF trace debug buffer + * -l Also limit flows doing loopback + * -n <#> To create cgroup \"/hbm#\" and attach prog + * Default is /hbm1 + * -r <rate> Rate limit in Mbps + * -s Get HBM stats (marked, dropped, etc.) + * -t <time> Exit after specified seconds (deault is 0) + * -w Work conserving flag. cgroup can increase its bandwidth + * beyond the rate limit specified while there is available + * bandwidth. Current implementation assumes there is only + * NIC (eth0), but can be extended to support multiple NICs. + * Currrently only supported for egress. + * -h Print this info + * prog BPF program file name. Name defaults to hbm_out_kern.o + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> + +#include "bpf_load.h" +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" +#include "hbm.h" +#include "bpf_util.h" +#include "bpf/bpf.h" +#include "bpf/libbpf.h" + +bool outFlag = true; +int minRate = 1000; /* cgroup rate limit in Mbps */ +int rate = 1000; /* can grow if rate conserving is enabled */ +int dur = 1; +bool stats_flag; +bool loopback_flag; +bool debugFlag; +bool work_conserving_flag; + +static void Usage(void); +static void read_trace_pipe2(void); +static void do_error(char *msg, bool errno_flag); + +#define DEBUGFS "/sys/kernel/debug/tracing/" + +struct bpf_object *obj; +int bpfprog_fd; +int cgroup_storage_fd; + +static void read_trace_pipe2(void) +{ + int trace_fd; + FILE *outf; + char *outFname = "hbm_out.log"; + + trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) { + printf("Error opening trace_pipe\n"); + return; + } + +// Future support of ingress +// if (!outFlag) +// outFname = "hbm_in.log"; + outf = fopen(outFname, "w"); + + if (outf == NULL) + printf("Error creating %s\n", outFname); + + while (1) { + static char buf[4097]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf) - 1); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + if (outf != NULL) { + fprintf(outf, "%s\n", buf); + fflush(outf); + } + } + } +} + +static void do_error(char *msg, bool errno_flag) +{ + if (errno_flag) + printf("ERROR: %s, errno: %d\n", msg, errno); + else + printf("ERROR: %s\n", msg); + exit(1); +} + +static int prog_load(char *prog) +{ + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .file = prog, + .expected_attach_type = BPF_CGROUP_INET_EGRESS, + }; + int map_fd; + struct bpf_map *map; + + int ret = 0; + + if (access(prog, O_RDONLY) < 0) { + printf("Error accessing file %s: %s\n", prog, strerror(errno)); + return 1; + } + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd)) + ret = 1; + if (!ret) { + map = bpf_object__find_map_by_name(obj, "queue_stats"); + map_fd = bpf_map__fd(map); + if (map_fd < 0) { + printf("Map not found: %s\n", strerror(map_fd)); + ret = 1; + } + } + + if (ret) { + printf("ERROR: load_bpf_file failed for: %s\n", prog); + printf(" Output from verifier:\n%s\n------\n", bpf_log_buf); + ret = -1; + } else { + ret = map_fd; + } + + return ret; +} + +static int run_bpf_prog(char *prog, int c |
