summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/cgroup.c1
-rw-r--r--kernel/bpf/map_in_map.c17
-rw-r--r--kernel/bpf/stackmap.c9
-rw-r--r--net/core/filter.c32
-rw-r--r--net/core/lwt_bpf.c1
-rw-r--r--net/xdp/xdp_umem.c16
-rw-r--r--samples/bpf/Makefile1
-rw-r--r--samples/bpf/asm_goto_workaround.h16
-rw-r--r--tools/bpf/bpftool/Makefile9
-rw-r--r--tools/bpf/bpftool/json_writer.c7
-rw-r--r--tools/bpf/bpftool/json_writer.h5
-rw-r--r--tools/include/uapi/linux/pkt_sched.h1163
-rw-r--r--tools/lib/bpf/bpf.c19
-rw-r--r--tools/testing/selftests/bpf/Makefile1
-rw-r--r--tools/testing/selftests/bpf/test_progs.c30
16 files changed, 1293 insertions, 36 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a2f53642592b..befe570be5ba 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
return kind_ops[BTF_INFO_KIND(t->info)];
}
-bool btf_name_offset_valid(const struct btf *btf, u32 offset)
+static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
return BTF_STR_OFFSET_VALID(offset) &&
offset < btf->hdr.str_len;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9425c2fb872f..ab612fe9862f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
+ /* fall through */
default:
return NULL;
}
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 99d243e1ad6e..52378d3e34b3 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -12,6 +12,7 @@
struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
{
struct bpf_map *inner_map, *inner_map_meta;
+ u32 inner_map_meta_size;
struct fd f;
f = fdget(inner_map_ufd);
@@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
return ERR_PTR(-EINVAL);
}
- inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER);
+ inner_map_meta_size = sizeof(*inner_map_meta);
+ /* In some cases verifier needs to access beyond just base map. */
+ if (inner_map->ops == &array_map_ops)
+ inner_map_meta_size = sizeof(struct bpf_array);
+
+ inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
if (!inner_map_meta) {
fdput(f);
return ERR_PTR(-ENOMEM);
@@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->key_size = inner_map->key_size;
inner_map_meta->value_size = inner_map->value_size;
inner_map_meta->map_flags = inner_map->map_flags;
- inner_map_meta->ops = inner_map->ops;
inner_map_meta->max_entries = inner_map->max_entries;
+ /* Misc members not needed in bpf_map_meta_equal() check. */
+ inner_map_meta->ops = inner_map->ops;
+ if (inner_map->ops == &array_map_ops) {
+ inner_map_meta->unpriv_array = inner_map->unpriv_array;
+ container_of(inner_map_meta, struct bpf_array, map)->index_mask =
+ container_of(inner_map, struct bpf_array, map)->index_mask;
+ }
+
fdput(f);
return inner_map_meta;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index d9e2483669d0..d43b14535827 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr,
if (nhdr->n_type == BPF_BUILD_ID &&
nhdr->n_namesz == sizeof("GNU") &&
- nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+ nhdr->n_descsz > 0 &&
+ nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
memcpy(build_id,
note_start + note_offs +
ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
- BPF_BUILD_ID_SIZE);
+ nhdr->n_descsz);
+ memset(build_id + nhdr->n_descsz, 0,
+ BPF_BUILD_ID_SIZE - nhdr->n_descsz);
return 0;
}
new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -311,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
+ memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
}
return;
}
@@ -321,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
+ memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
continue;
}
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
diff --git a/net/core/filter.c b/net/core/filter.c
index 2b3b436ef545..7559d6835ecb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2020,18 +2020,19 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
u32 flags)
{
- /* skb->mac_len is not set on normal egress */
- unsigned int mlen = skb->network_header - skb->mac_header;
+ unsigned int mlen = skb_network_offset(skb);
- __skb_pull(skb, mlen);
+ if (mlen) {
+ __skb_pull(skb, mlen);
- /* At ingress, the mac header has already been pulled once.
- * At egress, skb_pospull_rcsum has to be done in case that
- * the skb is originated from ingress (i.e. a forwarded skb)
- * to ensure that rcsum starts at net header.
- */
- if (!skb_at_tc_ingress(skb))
- skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ }
skb_pop_mac_header(skb);
skb_reset_mac_len(skb);
return flags & BPF_F_INGRESS ?
@@ -4119,6 +4120,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
break;
case SO_MAX_PACING_RATE: /* 32bit version */
+ if (val != ~0U)
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate);
@@ -4132,7 +4137,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
sk->sk_rcvlowat = val ? : 1;
break;
case SO_MARK:
- sk->sk_mark = val;
+ if (sk->sk_mark != val) {
+ sk->sk_mark = val;
+ sk_dst_reset(sk);
+ }
break;
default:
ret = -EINVAL;
@@ -5309,7 +5317,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
- /* else: fall through */
+ /* else, fall through */
default:
return NULL;
}
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 3e85437f7106..a648568c5e8f 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -63,6 +63,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
lwt->name ? : "<unknown>");
ret = BPF_OK;
} else {
+ skb_reset_mac_header(skb);
ret = skb_do_redirect(skb);
if (ret == 0)
ret = BPF_REDIRECT;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index a264cf2accd0..d4de871e7d4d 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -41,13 +41,20 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
* not know if the device has more tx queues than rx, or the opposite.
* This might also change during run time.
*/
-static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
- u16 queue_id)
+static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
+ u16 queue_id)
{
+ if (queue_id >= max_t(unsigned int,
+ dev->real_num_rx_queues,
+ dev->real_num_tx_queues))
+ return -EINVAL;
+
if (queue_id < dev->real_num_rx_queues)
dev->_rx[queue_id].umem = umem;
if (queue_id < dev->real_num_tx_queues)
dev->_tx[queue_id].umem = umem;
+
+ return 0;
}
struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
@@ -88,7 +95,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
goto out_rtnl_unlock;
}
- xdp_reg_umem_at_qid(dev, umem, queue_id);
+ err = xdp_reg_umem_at_qid(dev, umem, queue_id);
+ if (err)
+ goto out_rtnl_unlock;
+
umem->dev = dev;
umem->queue_id = queue_id;
if (force_copy)
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 66ae15f27c70..db1a91dfa702 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -279,6 +279,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
+ -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
new file mode 100644
index 000000000000..5cd7c1d1a5d5
--- /dev/null
+++ b/samples/bpf/asm_goto_workaround.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Facebook */
+#ifndef __ASM_GOTO_WORKAROUND_H
+#define __ASM_GOTO_WORKAROUND_H
+
+/* this will bring in asm_volatile_goto macro definition
+ * if enabled by compiler and config options.
+ */
+#include <linux/types.h>
+
+#ifdef asm_volatile_goto
+#undef asm_volatile_goto
+#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#endif
+
+#endif
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 492f0f24e2d3..4ad1f0894d53 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -93,9 +93,16 @@ BFD_SRCS = jit_disasm.c
SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c))
ifeq ($(feature-libbfd),1)
+ LIBS += -lbfd -ldl -lopcodes
+else ifeq ($(feature-libbfd-liberty),1)
+ LIBS += -lbfd -ldl -lopcodes -liberty
+else ifeq ($(feature-libbfd-liberty-z),1)
+ LIBS += -lbfd -ldl -lopcodes -liberty -lz
+endif
+
+ifneq ($(filter -lbfd,$(LIBS)),)
CFLAGS += -DHAVE_LIBBFD_SUPPORT
SRCS += $(BFD_SRCS)
-LIBS += -lbfd -lopcodes
endif
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index bff7ee026680..6046dcab51cc 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -1,15 +1,10 @@
-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
/*
* Simple streaming JSON writer
*
* This takes care of the annoying bits of JSON syntax like the commas
* after elements
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Stephen Hemminger <stephen@networkplumber.org>
*/
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
index c1ab51aed99c..cb9a1993681c 100644
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -5,11 +5,6 @@
* This takes care of the annoying bits of JSON syntax like the commas
* after elements
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Stephen Hemminger <stephen@networkplumber.org>
*/
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
new file mode 100644
index 000000000000..0d18b1d1fbbc
--- /dev/null
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -0,0 +1,1163 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_SCHED_H
+#define __LINUX_PKT_SCHED_H
+
+#include <linux/types.h>
+
+/* Logical priority bands not depending on specific packet scheduler.
+ Every scheduler will map them to real traffic classes, if it has
+ no more precise mechanism to classify packets.
+
+ These numbers have no special meaning, though their coincidence
+ with obsolete IPv6 values is not occasional :-). New IPv6 drafts
+ preferred full anarchy inspired by diffserv group.
+
+ Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
+ class, actually, as rule it will be handled with more care than
+ filler or even bulk.
+ */
+
+#define TC_PRIO_BESTEFFORT 0
+#define TC_PRIO_FILLER 1
+#define TC_PRIO_BULK 2
+#define TC_PRIO_INTERACTIVE_BULK 4
+#define TC_PRIO_INTERACTIVE 6
+#define TC_PRIO_CONTROL 7
+
+#define TC_PRIO_MAX 15
+
+/* Generic queue statistics, available for all the elements.
+ Particular schedulers may have also their private records.
+ */
+
+struct tc_stats {
+ __u64 bytes; /* Number of enqueued bytes */
+ __u32 packets; /* Number of enqueued packets */
+ __u32 drops; /* Packets dropped because of lack of resources */
+ __u32 overlimits; /* Number of throttle events when this
+ * flow goes out of allocated bandwidth */
+ __u32 bps; /* Current flow byte rate */
+ __u32 pps; /* Current flow packet rate */
+ __u32 qlen;
+ __u32 backlog;
+};
+
+struct tc_estimator {
+ signed char interval;
+ unsigned char ewma_log;
+};
+
+/* "Handles"
+ ---------
+
+ All the traffic control objects have 32bit identifiers, or "handles".
+
+ They can be considered as opaque numbers from user API viewpoint,
+ but actually they always consist of two fields: major and
+ minor numbers, which are interpreted by kernel specially,
+ that may be used by applications, though not recommended.
+
+ F.e. qdisc handles always have minor number equal to zero,
+ classes (or flows) have major equal to parent qdisc major, and
+ minor uniquely identifying class inside qdisc.
+
+ Macros to manipulate handles:
+ */
+
+#define TC_H_MAJ_MASK (0xFFFF0000U)
+#define TC_H_MIN_MASK (0x0000FFFFU)
+#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
+#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
+#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
+
+#define TC_H_UNSPEC (0U)
+#define TC_H_ROOT (0xFFFFFFFFU)
+#define TC_H_INGRESS (0xFFFFFFF1U)
+#define TC_H_CLSACT TC_H_INGRESS
+
+#define TC_H_MIN_PRIORITY 0xFFE0U
+#define TC_H_MIN_INGRESS 0xFFF2U
+#define TC_H_MIN_EGRESS 0xFFF3U
+
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+ TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+ TC_LINKLAYER_ETHERNET,
+ TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
+struct tc_ratespec {
+ unsigned char cell_log;
+ __u8 linklayer; /* lower 4 bits */
+ unsigned short overhead;
+ short cell_align;
+ unsigned short mpu;
+ __u32 rate;
+};
+
+#define TC_RTAB_SIZE 1024
+
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned int linklayer;
+ unsigned int mpu;
+ unsigned int mtu;
+ unsigned int tsize;
+};
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
+/* FIFO section */
+
+struct tc_fifo_qopt {
+ __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
+};
+
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+ __u32 limit; /* Queue length in packets. */
+};
+
+/* PRIO section */
+
+#define TCQ_PRIO_BANDS 16
+#define TCQ_MIN_PRIO_BANDS 2
+
+struct tc_prio_qopt {
+ int bands; /* Number of bands */
+ __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+};
+
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+ __u16 bands; /* Number of bands */
+ __u16 max_bands; /* Maximum number of queues */
+};
+
+/* PLUG section */
+
+#define TCQ_PLUG_BUFFER 0
+#define TCQ_PLUG_RELEASE_ONE 1
+#define TCQ_PLUG_RELEASE_INDEFINITE 2
+#define TCQ_PLUG_LIMIT 3
+
+struct tc_plug_qopt {
+ /* TCQ_PLUG_BUFFER: Inset a plug into the queue and
+ * buffer any incoming packets
+ * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
+ * to beginning of the next plug.
+ * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
+ * Stop buffering packets until the next TCQ_PLUG_BUFFER
+ * command is received (just act as a pass-thru queue).
+ * TCQ_PLUG_LIMIT: Increase/decrease queue size
+ */
+ int action;
+ __u32 limit;
+};
+
+/* TBF section */
+
+struct tc_tbf_qopt {
+ struct tc_ratespec rate;
+ struct tc_ratespec peakrate;
+ __u32 limit;
+ __u32 buffer;
+ __u32 mtu;
+};
+
+enum {
+ TCA_TBF_UNSPEC,
+ TCA_TBF_PARMS,
+ TCA_TBF_RTAB,
+ TCA_TBF_PTAB,
+ TCA_TBF_RATE64,
+ TCA_TBF_PRATE64,
+ TCA_TBF_BURST,
+ TCA_TBF_PBURST,
+ TCA_TBF_PAD,
+ __TCA_TBF_MAX,
+};
+
+#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
+
+
+/* TEQL section */
+
+/* TEQL does not require any parameters */
+
+/* SFQ section */
+
+struct tc_sfq_qopt {
+ unsigned quantum; /* Bytes per round allocated to flow */
+ int perturb_period; /* Period of hash perturbation */
+ __u32 limit; /* Maximal packets in queue */
+ unsigned divisor; /* Hash divisor */
+ unsigned flows; /* Maximal number of flows */
+};
+
+struct tc_sfqred_stats {
+ __u32 prob_drop; /* Early drops, below max threshold */
+ __u32 forced_drop; /* Early drops, after max threshold */
+ __u32 prob_mark; /* Marked packets, below max threshold */
+ __u32 forced_mark; /* Marked packets, after max threshold */
+ __u32 prob_mark_head; /* Marked packets, below max threshold */
+ __u32 forced_mark_head;/* Marked packets, after max threshold */
+};
+
+struct tc_sfq_qopt_v1 {
+ struct tc_sfq_qopt v0;
+ unsigned int depth; /* max number of packets per flow */
+ unsigned int headdrop;
+/* SFQRED parameters */
+ __u32 limit; /* HARD maximal flow queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+ __u32 max_P; /* probability, high resolution */
+/* SFQRED stats */
+ struct tc_sfqred_stats stats;
+};
+
+
+struct tc_sfq_xstats {
+ __s32 allot;
+};
+
+/* RED section */
+
+enum {
+ TCA_RED_UNSPEC,
+ TCA_RED_PARMS,
+ TCA_RED_STAB,
+ TCA_RED_MAX_P,
+ __TCA_RED_MAX,
+};
+
+#define TCA_RED_MAX (__TCA_RED_MAX - 1)
+
+struct tc_red_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+#define TC_RED_ECN 1
+#define TC_RED_HARDDROP 2
+#define TC_RED_ADAPTATIVE 4
+};
+
+struct tc_red_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+};
+
+/* GRED section */
+
+#define MAX_DPs 16
+
+enum {
+ TCA_GRED_UNSPEC,
+ TCA_GRED_PARMS,
+ TCA_GRED_STAB,
+ TCA_GRED_DPS,
+ TCA_GRED_MAX_P,
+ TCA_GRED_LIMIT,
+ TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */
+ __TCA_GRED_MAX,
+};
+
+#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_ENTRY_UNSPEC,
+ TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */
+ __TCA_GRED_VQ_ENTRY_MAX,
+};
+#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_UNSPEC,
+ TCA_GRED_VQ_PAD,
+ TCA_GRED_VQ_DP, /* u32 */
+ TCA_GRED_VQ_STAT_BYTES, /* u64 */
+ TCA_GRED_VQ_STAT_PACKETS, /* u32 */
+ TCA_GRED_VQ_STAT_BACKLOG, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_PDROP, /* u32 */
+ TCA_GRED_VQ_STAT_OTHER, /* u32 */
+ TCA_GRED_VQ_FLAGS, /* u32 */
+ __TCA_GRED_VQ_MAX
+};
+
+#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1)
+
+struct tc_gred_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ __u32 DP; /* up to 2^32 DPs */
+ __u32 backlog;
+ __u32 qave;
+ __u32 forced;
+ __u32 early;
+ __u32 other;
+ __u32 pdrop;
+ __u8 Wlog; /* log(W) */
+ __u8 Plog; /* log(P_max/(qth_max-qth_min)) */
+ __u8 Scell_log; /* cell size for idle damping */
+ __u8 prio; /* prio of this VQ */
+ __u32 packets;
+ __u32 bytesin;
+};
+
+/* gred setup */
+struct tc_gred_sopt {
+ __u32 DPs;
+ __u32 def_DP;
+ __u8 grio;
+ __u8 flags;
+ __u16 pad1;
+};
+
+/* CHOKe section */
+
+enum {
+ TCA_CHOKE_UNSPEC,
+ TCA_CHOKE_PARMS,
+ TCA_CHOKE_STAB,
+ TCA_CHOKE_MAX_P,
+ __TCA_CHOKE_MAX,
+};
+
+#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
+
+struct tc_choke_qopt {
+ __u32 limit; /* Hard queue length (packets) */
+ __u32 qth_min; /* Min average threshold (packets) */
+ __u32 qth_max; /* Max average threshold (packets) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags; /* see RED flags */
+};
+
+struct tc_choke_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+ __u32 matched; /* Drops due to flow match */
+};
+
+/* HTB section */
+#define TC_HTB_NUMPRIO 8
+#define TC_HTB_MAXDEPTH 8
+#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */
+
+struct tc_htb_opt {
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 buffer;
+ __u32 cbuffer;
+ __u32 quantum;
+ __u32 level; /* out only */
+ __u32 prio;
+};
+struct tc_htb_glob {
+ __u32 version; /* to match HTB/TC */
+ __u32 rate2quantum; /* bps->quantum divisor */
+ __u32 defcls; /* default class number */
+ __u32 debug; /* debug flags */
+
+ /* stats */
+ __u32 direct_pkts; /* count of non shaped packets */
+};
+enum {
+ TCA_HTB_UNSPEC,
+ TCA_HTB_PARMS,
+ TCA_HTB_INIT,
+ TCA_HTB_CTAB,
+ TCA_HTB_RTAB,
+ TCA_HTB_DIRECT_QLEN,
+ TCA_HTB_RATE64,
+ TCA_HTB_CEIL64,
+ TCA_HTB_PAD,
+ __TCA_HTB_MAX,
+};
+
+#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
+
+struct tc_htb_xstats {
+ __u32 lends;
+ __u32 borrows;
+ __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */
+ __s32 tokens;
+ __s32 ctokens;
+};
+
+/* HFSC section */
+
+struct tc_hfsc_qopt {
+ __u16 defcls; /* default class */
+};
+
+struct tc_service_curve {
+ __u32 m1; /* slope of the first segment in bps */
+ __u32 d; /* x-projection of the first segment in us */
+ __u32 m2; /* slope of the second segment in bps */
+};
+
+struct tc_hfsc_stats {
+ __u64 work; /* total work done */
+ __u64 rtwork; /* work done by real-time criteria */
+ __u32 period; /* current period */
+ __u32 level; /* class level in hierarchy */
+};
+
+enum {
+ TCA_HFSC_UNSPEC,
+ TCA_HFSC_RSC,
+ TCA_HFSC_FSC,
+ TCA_HFSC_USC,
+ __TCA_HFSC_MAX,
+};
+
+#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
+
+
+/* CBQ section */
+
+#define TC_CBQ_MAXPRIO 8
+#define TC_CBQ_MAXLEVEL 8
+#define TC_CBQ_DEF_EWMA 5
+
+struct tc_cbq_lssopt {
+ unsigned char change;
+ unsigned char flags;
+#define TCF_CBQ_LSS_BOUNDED 1
+#define TCF_CBQ_LSS_ISOLATED 2
+ unsigned char ewma_log;
+ unsigned char level;
+#define TCF_CBQ_LSS_FLAGS 1
+#define TCF_CBQ_LSS_EWMA 2
+#define TCF_CBQ_LSS_MAXIDLE 4
+#define TCF_CBQ_LSS_MINIDLE 8
+#define TCF_CBQ_LSS_OFFTIME 0x10
+#define TCF_CBQ_LSS_AVPKT 0x20
+ __u32 maxidle;
+ __u32 minidle;
+ __u32 offtime;
+ __u32 avpkt;
+};
+
+struct tc_cbq_wrropt {
+ unsigned char flags;
+ unsigned char priority;
+ unsigned char cpriority;
+ unsigned char __reserved;
+ __u32 allot;
+ __u32 weight;
+};
+
+struct tc_cbq_ovl {
+ unsigned char strategy;
+#define TC_CBQ_OVL_CLASSIC 0
+#define TC_CBQ_OVL_DELAY 1
+#define TC_CBQ_OVL_LOWPRIO 2
+#define TC_CBQ_OVL_DROP 3
+#define TC_CBQ_OVL_RCLASSIC 4
+ unsigned char priority2;
+ __u16 pad;
+ __u32 penalty;
+};
+
+struct tc_cbq_police {
+ unsigned char police;
+ unsigned char __res1;
+ unsigned short __res2;
+};
+
+struct tc_cbq_fopt {
+ __u32 split;
+ __u32 defmap;
+ __u32 defchange;
+};
+
+struct tc_cbq_xstats {
+ __u32 borrows;
+ __u32 overactions;
+ __s32 avgidle;
+ __s32 undertime;
+};
+
+enum {
+ TCA_CBQ_UNSPEC,
+ TCA_CBQ_LSSOPT,
+ TCA_CBQ_WRROPT,
+ TCA_CBQ_FOPT,
+ TCA_CBQ_OVL_STRATEGY,
+ TCA_CBQ_RATE,
+ TCA_CBQ_RTAB,
+ TCA_CBQ_POLICE,
+ __TCA_CBQ_MAX,
+};
+
+#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
+
+/* dsmark section */
+
+enum {
+ TCA_DSMARK_UNSPEC,
+ TCA_DSMARK_INDICES,
+ TCA_DSMARK_DEFAULT_INDEX,
+ TCA_DSMARK_SET_TC_INDEX,
+ TCA_DSMARK_MASK,
+ TCA_DSMARK_VALUE,
+ __TCA_DSMARK_MAX,
+};
+
+#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
+
+/* ATM section */
+
+enum {
+ TCA_ATM_UNSPEC,
+ TCA_ATM_FD, /* file/socket descriptor */
+ TCA_ATM_PTR, /* pointer to descriptor - later */
+ TCA_ATM_HDR, /* LL header */
+ TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
+ TCA_ATM_ADDR, /* PVC address (for output only) */
+ TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
+ __TCA_ATM_MAX,
+};
+
+#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
+
+/* Network emulator */
+
+enum {
+ TCA_NETEM_UNSPEC,
+ TCA_NETEM_CORR,
+ TCA_NETEM_DELAY_DIST,
+ TCA_NETEM_REORDER,
+ TCA_NETEM_CORRUPT,
+ TCA_NETEM_LOSS,
+ TCA_NETEM_RATE,
+ TCA_NETEM_ECN,
+ TCA_NETEM_RATE64,
+ TCA_NETEM_PAD,
+ TCA_NETEM_LATENCY64,
+ TCA_NETEM_JITTER64,
+ TCA_NETEM_SLOT,
+ TCA_NETEM_SLOT_DIST,
+ __TCA_NETEM_MAX,
+};
+
+#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
+
+struct tc_netem_qopt {
+ __u32 latency; /* added delay (us) */
+ __u32 limit; /* fifo limit (packets) */
+ __u32 loss; /* random packet loss (0=none ~0=100%) */
+ __u32 gap; /* re-ordering gap (0 for none) */
+ __u32 duplicate; /* random packet dup (0=none ~0=100%) */
+ __u32 jitter; /* random jitter in latency (us) */
+};
+
+struct tc_netem_corr {
+ __u32 delay_corr; /* delay correlation */
+ __u32 loss_corr; /* packet loss correlation */
+ __u32 dup_corr; /* duplicate correlation */
+};
+
+struct tc_netem_reorder {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_corrupt {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_rate {
+ __u32 rate; /* byte/s */
+ __s32 packet_overhead;
+ __u32 cell_size;
+ __s32 cell_overhead;
+};
+
+struct tc_netem_slot {
+ __s64 min_delay; /* nsec */
+ __s64 max_delay;
+ __s32 max_packets;
+ __s32 max_bytes;
+ __s64 dist_delay; /* nsec */
+ __s64 dist_jitter; /* nsec */
+};
+
+enum {
+ NETEM_LOSS_UNSPEC,
+ NETEM_LOSS_GI, /* General Intuitive - 4 state model */
+ NETEM_LOSS_GE, /* Gilbert Elliot models */
+ __NETEM_LOSS_MAX
+};
+#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
+
+/* State transition probabilities for 4 state model */
+struct tc_netem_gimodel {
+ __u32 p13;
+ __u32 p31;
+ __u32 p32;
+ __u32 p14;
+ __u32 p23;
+};
+
+/* Gilbert-Elliot models */
+struct tc_netem_gemodel {
+ __u32 p;
+ __u32 r;
+ __u32 h;
+ __u32 k1;
+};
+
+#define NETEM_DIST_SCALE 8192
+#define NETEM_DIST_MAX 16384
+
+/* DRR */
+
+enum {
+ TCA_DRR_UNSPEC,
+ TCA_DRR_QUANTUM,
+ __TCA_DRR_MAX
+};
+
+#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
+
+struct tc_drr_stats {
+ __u32 deficit;
+};
+
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+enum {
+ TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */
+ TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */
+ __TC_MQPRIO_HW_OFFLOAD_MAX
+};
+
+#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+
+enum {
+ TC_MQPRIO_MODE_DCB,
+ TC_MQPRIO_MODE_CHANNEL,
+ __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+ TC_MQPRIO_SHAPER_DCB,
+ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
+ __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
+struct tc_mqprio_qopt {
+ __u8 num_tc;
+ __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
+ __u8 hw;
+ __u16 count[TC_QOPT_MAX_QUEUE];
+ __u16 offset[TC_QOPT_MAX_QUEUE];
+};
+
+#define TC_MQPRIO_F_MODE 0x1
+#define TC_MQPRIO_F_SHAPER 0x2
+#define TC_MQPRIO_F_MIN_RATE 0x4
+#define TC_MQPRIO_F_MAX_RATE 0x8
+
+enum {
+ TCA_MQPRIO_UNSPEC,
+ TCA_MQPRIO_MODE,
+ TCA_MQPRIO_SHAPER,
+ TCA_MQPRIO_MIN_RATE64,