From b7fb0916544de44ce099d9f3b6129c86b484de25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 13 May 2021 15:20:52 +0200 Subject: net: bridge: mcast: add ip4+ip6 mcast router timers to mdb netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have split the multicast router state into two, one for IPv4 and one for IPv6, also add individual timers to the mdb netlink router port dump. Leaving the old timer attribute for backwards compatibility. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 13d59c51ef5b..6b56a7549531 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -627,6 +627,8 @@ enum { MDBA_ROUTER_PATTR_UNSPEC, MDBA_ROUTER_PATTR_TIMER, MDBA_ROUTER_PATTR_TYPE, + MDBA_ROUTER_PATTR_INET_TIMER, + MDBA_ROUTER_PATTR_INET6_TIMER, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) -- cgit v1.2.3 From 79a7f8bdb159d9914b58740f3d31d602a6e4aca8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:03 -0700 Subject: bpf: Introduce bpf_sys_bpf() helper and program type. Add placeholders for bpf_sys_bpf() helper and new program type. Make sure to check that expected_attach_type is zero for future extensibility. Allow tracing helper functions to be used in this program type, since they will only execute from user context via bpf_prog_test_run. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-2-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ec6d85a81744..c92648f38144 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -937,6 +937,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ }; enum bpf_attach_type { @@ -4735,6 +4736,12 @@ union bpf_attr { * be zero-terminated except when **str_size** is 0. * * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4903,6 +4910,7 @@ union bpf_attr { FN(check_mtu), \ FN(for_each_map_elem), \ FN(snprintf), \ + FN(sys_bpf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 387544bfa291a22383d60b40f887360e2b931ec6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:10 -0700 Subject: bpf: Introduce fd_idx Typical program loading sequence involves creating bpf maps and applying map FDs into bpf instructions in various places in the bpf program. This job is done by libbpf that is using compiler generated ELF relocations to patch certain instruction after maps are created and BTFs are loaded. The goal of fd_idx is to allow bpf instructions to stay immutable after compilation. At load time the libbpf would still create maps as usual, but it wouldn't need to patch instructions. It would store map_fds into __u32 fd_array[] and would pass that pointer to sys_bpf(BPF_PROG_LOAD). Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-9-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c92648f38144..de58a714ed36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1098,8 +1098,8 @@ enum bpf_link_type { /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD - * insn[0].imm: map fd + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx * insn[1].imm: 0 * insn[0].off: 0 * insn[1].off: 0 @@ -1107,15 +1107,19 @@ enum bpf_link_type { * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx * insn[1].imm: offset into value * insn[0].off: 0 * insn[1].off: 0 * ldimm64 rewrite: address of map[0]+offset * verifier type: PTR_TO_MAP_VALUE */ -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 @@ -1315,6 +1319,8 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; + __u32 :32; /* pad */ + __aligned_u64 fd_array; /* array of FDs */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 3d78417b60fba249cc555468cb72d96f5cde2964 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:11 -0700 Subject: bpf: Add bpf_btf_find_by_name_kind() helper. Add new helper: long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) Description Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. Return Returns btf_id and btf_obj_fd in lower and upper 32 bits. It will be used by loader program to find btf_id to attach the program to and to find btf_ids of ksyms. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-10-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index de58a714ed36..3cc07351c1cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4748,6 +4748,12 @@ union bpf_attr { * Execute bpf syscall with given arguments. * Return * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4917,6 +4923,7 @@ union bpf_attr { FN(for_each_map_elem), \ FN(snprintf), \ FN(sys_bpf), \ + FN(btf_find_by_name_kind), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3abea089246f76c1517b054ddb5946f3f1dbd2c0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:12 -0700 Subject: bpf: Add bpf_sys_close() helper. Add bpf_sys_close() helper to be used by the syscall/loader program to close intermediate FDs and other cleanup. Note this helper must never be allowed inside fdget/fdput bracketing. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-11-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3cc07351c1cf..4cd9a0181f27 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4754,6 +4754,12 @@ union bpf_attr { * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. * Return * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4924,6 +4930,7 @@ union bpf_attr { FN(snprintf), \ FN(sys_bpf), \ FN(btf_find_by_name_kind), \ + FN(sys_close), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 5d67f349590ddc94b6d4e25f19085728db9de697 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 May 2021 18:40:32 -0700 Subject: bpf: Add cmd alias BPF_PROG_RUN Add BPF_PROG_RUN command as an alias to BPF_RPOG_TEST_RUN to better indicate the full range of use cases done by the command. Suggested-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210519014032.20908-1-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4cd9a0181f27..418b9b813d65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -837,6 +837,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, -- cgit v1.2.3 From 3e87f192b405960c0fe83e0925bd0dadf4f8cf43 Mon Sep 17 00:00:00 2001 From: Denis Salopek Date: Tue, 11 May 2021 23:00:04 +0200 Subject: bpf: Add lookup_and_delete_elem support to hashtab Extend the existing bpf_map_lookup_and_delete_elem() functionality to hashtab map types, in addition to stacks and queues. Create a new hashtab bpf_map_ops function that does lookup and deletion of the element under the same bucket lock and add the created map_ops to bpf.h. Signed-off-by: Denis Salopek Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/4d18480a3e990ffbf14751ddef0325eed3be2966.1620763117.git.denis.salopek@sartura.hr --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 418b9b813d65..562adeac1d67 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -527,6 +527,15 @@ union bpf_iter_link_info { * Look up an element with the given *key* in the map referred to * by the file descriptor *fd*, and if found, delete the element. * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types * implement this command as a "pop" operation, deleting the top * element rather than one corresponding to *key*. @@ -536,6 +545,10 @@ union bpf_iter_link_info { * This command is only valid for the following map types: * * **BPF_MAP_TYPE_QUEUE** * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** * * Return * Returns zero on success. On error, -1 is returned and *errno* -- cgit v1.2.3 From e624d4ed4aa8cc3c69d1359b0aaea539203ed266 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 19 May 2021 17:07:45 +0800 Subject: xdp: Extend xdp_redirect_map with broadcast support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two flags BPF_F_BROADCAST and BPF_F_EXCLUDE_INGRESS to extend xdp_redirect_map for broadcast support. With BPF_F_BROADCAST the packet will be broadcasted to all the interfaces in the map. with BPF_F_EXCLUDE_INGRESS the ingress interface will be excluded when do broadcasting. When getting the devices in dev hash map via dev_map_hash_get_next_key(), there is a possibility that we fall back to the first key when a device was removed. This will duplicate packets on some interfaces. So just walk the whole buckets to avoid this issue. For dev array map, we also walk the whole map to find valid interfaces. Function bpf_clear_redirect_map() was removed in commit ee75aef23afe ("bpf, xdp: Restructure redirect actions"). Add it back as we need to use ri->map again. With test topology: +-------------------+ +-------------------+ | Host A (i40e 10G) | ---------- | eno1(i40e 10G) | +-------------------+ | | | Host B | +-------------------+ | | | Host C (i40e 10G) | ---------- | eno2(i40e 10G) | +-------------------+ | | | +------+ | | veth0 -- | Peer | | | veth1 -- | | | | veth2 -- | NS | | | +------+ | +-------------------+ On Host A: # pktgen/pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -s 64 On Host B(Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 128G Memory): Use xdp_redirect_map and xdp_redirect_map_multi in samples/bpf for testing. All the veth peers in the NS have a XDP_DROP program loaded. The forward_map max_entries in xdp_redirect_map_multi is modify to 4. Testing the performance impact on the regular xdp_redirect path with and without patch (to check impact of additional check for broadcast mode): 5.12 rc4 | redirect_map i40e->i40e | 2.0M | 9.7M 5.12 rc4 | redirect_map i40e->veth | 1.7M | 11.8M 5.12 rc4 + patch | redirect_map i40e->i40e | 2.0M | 9.6M 5.12 rc4 + patch | redirect_map i40e->veth | 1.7M | 11.7M Testing the performance when cloning packets with the redirect_map_multi test, using a redirect map size of 4, filled with 1-3 devices: 5.12 rc4 + patch | redirect_map multi i40e->veth (x1) | 1.7M | 11.4M 5.12 rc4 + patch | redirect_map multi i40e->veth (x2) | 1.1M | 4.3M 5.12 rc4 + patch | redirect_map multi i40e->veth (x3) | 0.8M | 2.6M Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Acked-by: Martin KaFai Lau Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20210519090747.1655268-3-liuhangbin@gmail.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 562adeac1d67..2c1ba70abbf1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2555,8 +2555,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -5122,6 +5126,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ -- cgit v1.2.3 From 7e97d274db920df479e222fed10e7b242f90ffb0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 17 May 2021 13:24:25 +0200 Subject: can: uapi: update CAN-FD frame description Since an early version of the CAN-FD specification the bit that defines a CAN-FD frame on the wire, has been renamed from Extended Data Length (EDL) to FD Frame (FDF). To avoid confusion, update the struct canfd_frame description in the UAPI headers accordingly. Link: https://lore.kernel.org/r/20210517113727.77597-1-mkl@pengutronix.de Suggested-by: Ayoub Kaanich Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index c7535352fef6..ac5d7a31671f 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -123,8 +123,8 @@ struct can_frame { /* * defined bits for canfd_frame.flags * - * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to - * be set in the CAN frame bitstream on the wire. The EDL bit switch turns + * The use of struct canfd_frame implies the FD Frame (FDF) bit to + * be set in the CAN frame bitstream on the wire. The FDF bit switch turns * the CAN controllers bitstream processor into the CAN FD mode which creates * two new options within the CAN FD frame specification: * -- cgit v1.2.3 From 02546884221279da2725e87e35348290470363d7 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 11 Apr 2017 15:43:43 +0200 Subject: can: uapi: introduce CANFD_FDF flag for mixed content in struct canfd_frame The struct can_frame and struct canfd_frame intentionally share the same layout to be able to write CAN frame content into a CAN FD frame structure. When this is done the former differentiation via CAN_MTU / CANFD_MTU is lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of using struct canfd_frame for mixed CAN/CAN FD content (dual use). N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. Link: https://lore.kernel.org/r/20170411134343.3089-1-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index ac5d7a31671f..90801ada2bbe 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -135,9 +135,18 @@ struct can_frame { * controller only the CANFD_BRS bit is relevant for real CAN controllers when * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make * sense for virtual CAN interfaces to test applications with echoed frames. + * + * The struct can_frame and struct canfd_frame intentionally share the same + * layout to be able to write CAN frame content into a CAN FD frame structure. + * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets + * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of + * using struct canfd_frame for mixed CAN / CAN FD content (dual use). + * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of + * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. */ #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ +#define CANFD_FDF 0x04 /* mark CAN FD for dual use of struct canfd_frame */ /** * struct canfd_frame - CAN flexible data rate frame structure -- cgit v1.2.3 From 133dc203d77dff617d9c4673973ef3859be2c476 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 4 May 2021 17:54:06 +0200 Subject: netfilter: nft_exthdr: Support SCTP chunks Chunks are SCTP header extensions similar in implementation to IPv6 extension headers or TCP options. Reusing exthdr expression to find and extract field values from them is therefore pretty straightforward. For now, this supports extracting data from chunks at a fixed offset (and length) only - chunks themselves are an extensible data structure; in order to make all fields available, a nested extension search is needed. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1fb4ca18ffbb..19715e2679d1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -813,11 +813,13 @@ enum nft_exthdr_flags { * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers * @NFT_EXTHDR_OP_TCP: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options + * @NFT_EXTHDR_OP_SCTP: match against sctp chunks */ enum nft_exthdr_op { NFT_EXTHDR_OP_IPV6, NFT_EXTHDR_OP_TCPOPT, NFT_EXTHDR_OP_IPV4, + NFT_EXTHDR_OP_SCTP, __NFT_EXTHDR_OP_MAX }; #define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) -- cgit v1.2.3 From e1d9a90a9bfdb0735062d3adb16b07314b4b7b01 Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:35 +0530 Subject: net: ethernet: rmnet: Support for ingress MAPv5 checksum offload Adding support for processing of MAPv5 downlink packets. It involves parsing the Mapv5 packet and checking the csum header to know whether the hardware has validated the checksum and is valid or not. Based on the checksum valid bit the corresponding stats are incremented and skb->ip_summed is marked either CHECKSUM_UNNECESSARY or left as CHEKSUM_NONE to let network stack revalidate the checksum and update the respective snmp stats. Current MAPV1 header has been modified, the reserved field in the Mapv1 header is now used for next header indication. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cd5b382a4138..1f753dcd85e1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1236,6 +1236,7 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) enum { IFLA_RMNET_UNSPEC, -- cgit v1.2.3 From b6e5d27e32ef6089d316ce7e1ecaf595584d4b84 Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:36 +0530 Subject: net: ethernet: rmnet: Add support for MAPv5 egress packets Adding support for MAPv5 egress packets. This involves adding the MAPv5 header and setting the csum_valid_required in the checksum header to request HW compute the checksum. Corresponding stats are incremented based on whether the checksum is computed in software or HW. New stat has been added which represents the count of packets whose checksum is calculated by the HW. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1f753dcd85e1..a5a7f0e64865 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1237,6 +1237,7 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, -- cgit v1.2.3 From 4677efc486e1872f62d4632c50f7183f82296fa6 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:19 +0300 Subject: devlink: Introduce rate object Allow registering rate object for devlink ports with dedicated devlink_rate_leaf_{create|destroy}() API. Implement new netlink DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info. Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for notifications when creating/deleting leaf rate object. Rate API is intended to be used for rate limiting of individual devlink ports (leafs) and their aggregates (nodes). Example: $ devlink port show pci/0000:03:00.0/0 pci/0000:03:00.0/1 $ devlink port function rate show pci/0000:03:00.0/0: type leaf pci/0000:03:00.0/1: type leaf Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f6008b2fa60f..0c27b45c47db 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -126,6 +126,11 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_TEST, + DEVLINK_CMD_RATE_GET, /* can dump */ + DEVLINK_CMD_RATE_SET, + DEVLINK_CMD_RATE_NEW, + DEVLINK_CMD_RATE_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -206,6 +211,10 @@ enum devlink_port_flavour { */ }; +enum devlink_rate_type { + DEVLINK_RATE_TYPE_LEAF, +}; + enum devlink_param_cmode { DEVLINK_PARAM_CMODE_RUNTIME, DEVLINK_PARAM_CMODE_DRIVERINIT, @@ -534,6 +543,8 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ + + DEVLINK_ATTR_RATE_TYPE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 1897db2ec3109eb1dd07b357c95c5e03d54e41b9 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:22 +0300 Subject: devlink: Allow setting tx rate for devlink rate leaf objects Implement support for DEVLINK_CMD_RATE_SET command with new attributes DEVLINK_ATTR_RATE_TX_{SHARE|MAX} that are used to set devlink rate shared/max tx rate values. Extend devlink ops with new callbacks rate_leaf_tx_{share|max}_set() to allow supporting drivers to implement rate control through devlink. New attributes are optional. Driver implementations are allowed to support either or both of them. Shared rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_share 10mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_share 10mbit Max rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_max 100mbit Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0c27b45c47db..ae94cd2a1078 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -545,6 +545,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ DEVLINK_ATTR_RATE_TYPE, /* u16 */ + DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ + DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a8ecb93ef03de4c59fb6289f99bc9616a852c917 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:25 +0300 Subject: devlink: Introduce rate nodes Implement support for DEVLINK_CMD_RATE_{NEW|DEL} commands that are used to create and delete devlink rate nodes. Add new attribute DEVLINK_ATTR_RATE_NODE_NAME that specify node name string. The node name is an alphanumeric identifier. No valid node name can be a devlink port index, eg. decimal number. Extend devlink ops with new callbacks rate_node_{new|del}() and rate_node_tx_{share|max}_set() to allow supporting drivers to implement ports rate grouping and setting tx rate of rate nodes through devlink. Expose devlink_rate_nodes_destroy() function to allow vendor driver do proper cleanup of internally allocated resources for the nodes if the driver goes down or due to any other reasons which requires nodes to be destroyed. Disallow moving device from switchdev to legacy mode if any node exists on that device. User must explicitly delete nodes before switching mode. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate set netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit Add + set command can be combined: $ devlink port function rate add netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node tx_share 10mbit tx_max 100mbit $ devlink port function rate del netdevsim/netdevsim10/group1 Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae94cd2a1078..7e15853b77fe 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -213,6 +213,7 @@ enum devlink_port_flavour { enum devlink_rate_type { DEVLINK_RATE_TYPE_LEAF, + DEVLINK_RATE_TYPE_NODE, }; enum devlink_param_cmode { @@ -547,6 +548,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TYPE, /* u16 */ DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ + DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From d7555984507822458b32a6405881038241d140be Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:28 +0300 Subject: devlink: Allow setting parent node of rate objects Refactor DEVLINK_CMD_RATE_{GET|SET} command handlers to support setting a node as a parent for another rate object (leaf or node) by means of new attribute DEVLINK_ATTR_RATE_PARENT_NODE_NAME. Extend devlink ops with new callbacks rate_{leaf|node}_parent_set() to set node as a parent for rate object to allow supporting drivers to implement rate grouping through devlink. Driver implementations are allowed to support leafs or node children only. Invoking callback with NULL as parent should be threated by the driver as unset parent action. Extend rate object struct with reference counter to disallow deleting a node with any child pointing to it. User should unset parent for the child explicitly. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate add netdevsim/netdevsim10/group2 $ devlink port function rate set netdevsim/netdevsim10/group1 parent group2 $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node parent group2 $ devlink port function rate set netdevsim/netdevsim10/group1 noparent Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7e15853b77fe..32f53a0069d6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -549,6 +549,7 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From e32ea44c7ae476f4c90e35ab0a29dc8ff082bc11 Mon Sep 17 00:00:00 2001 From: Andreas Roeseler Date: Thu, 3 Jun 2021 16:22:11 -0500 Subject: icmp: fix lib conflict with trinity Including and in the dependencies breaks compilation of trinity due to multiple definitions. is only used in to provide the definition of the struct in_addr, but this can be substituted out by using the datatype __be32. Signed-off-by: Andreas Roeseler Signed-off-by: David S. Miller --- include/uapi/linux/icmp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index c1da8244c5e1..163c0998aec9 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -154,7 +153,7 @@ struct icmp_ext_echo_iio { struct { struct icmp_ext_echo_ctype3_hdr ctype3_hdr; union { - struct in_addr ipv4_addr; + __be32 ipv4_addr; struct in6_addr ipv6_addr; } ip_addr; } addr; -- cgit v1.2.3 From e2cf17d3774c323ef6dab6e9f7c0cfc5e742afd9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Jun 2021 12:27:07 +0200 Subject: netfilter: add new hook nfnl subsystem This nfnl subsystem allows to dump the list of all active netfiler hooks, e.g. defrag, conntrack, nf/ip/arp/ip6tables and so on. This helps to see what kind of features are currently enabled in the network stack. Sample output from nft tool using this infra: $ nft list hook ip input family ip hook input { +0000000010 nft_do_chain_inet [nf_tables] # nft table firewalld INPUT +0000000100 nf_nat_ipv4_local_in [nf_nat] +2147483647 ipv4_confirm [nf_conntrack] } Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink.h | 3 +- include/uapi/linux/netfilter/nfnetlink_hook.h | 55 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/netfilter/nfnetlink_hook.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 5bc960f220b3..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -60,7 +60,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTHELPER 9 #define NFNL_SUBSYS_NFTABLES 10 #define NFNL_SUBSYS_NFT_COMPAT 11 -#define NFNL_SUBSYS_COUNT 12 +#define NFNL_SUBSYS_HOOK 12 +#define NFNL_SUBSYS_COUNT 13 /* Reserved control nfnetlink messages */ #define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h new file mode 100644 index 000000000000..912ec60b26b0 --- /dev/null +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _NFNL_HOOK_H_ +#define _NFNL_HOOK_H_ + +enum nfnl_hook_msg_types { + NFNL_MSG_HOOK_GET, + NFNL_MSG_HOOK_MAX, +}; + +/** + * enum nfnl_hook_attributes - netfilter hook netlink attributes + * + * @NFNLA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFNLA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFNLA_HOOK_DEV: netdevice name (NLA_STRING) + * @NFNLA_HOOK_FUNCTION_NAME: hook function name (NLA_STRING) + * @NFNLA_HOOK_MODULE_NAME: kernel module that registered this hook (NLA_STRING) + * @NFNLA_HOOK_CHAIN_INFO: basechain hook metadata (NLA_NESTED) + */ +enum nfnl_hook_attributes { + NFNLA_HOOK_UNSPEC, + NFNLA_HOOK_HOOKNUM, + NFNLA_HOOK_PRIORITY, + NFNLA_HOOK_DEV, + NFNLA_HOOK_FUNCTION_NAME, + NFNLA_HOOK_MODULE_NAME, + NFNLA_HOOK_CHAIN_INFO, + __NFNLA_HOOK_MAX +}; +#define NFNLA_HOOK_MAX (__NFNLA_HOOK_MAX - 1) + +/** + * enum nfnl_hook_chain_info_attributes - chain description + * + * NFNLA_HOOK_INFO_DESC: nft chain and table name (enum nft_table_attributes) (NLA_NESTED) + * NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + */ +enum nfnl_hook_chain_info_attributes { + NFNLA_HOOK_INFO_UNSPEC, + NFNLA_HOOK_INFO_DESC, + NFNLA_HOOK_INFO_TYPE, + __NFNLA_HOOK_INFO_MAX, +}; +#define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) + +/** + * enum nfnl_hook_chaintype - chain type + * + * @NFNL_HOOK_TYPE_NFTABLES nf_tables base chain + */ +enum nfnl_hook_chaintype { + NFNL_HOOK_TYPE_NFTABLES = 0x1, +}; + +#endif /* _NFNL_HOOK_H */ -- cgit v1.2.3 From d409989b59ad0b8d108706db25e17c320a9664eb Mon Sep 17 00:00:00 2001 From: Chen Li Date: Mon, 7 Jun 2021 09:44:35 +0800 Subject: netlink: simplify NLMSG_DATA with NLMSG_HDRLEN The NLMSG_LENGTH(0) may confuse the API users, NLMSG_HDRLEN is much more clear. Besides, some code style problems are also fixed. Signed-off-by: Chen Li Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 3d94269bbfa8..4c0cde075c27 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -91,9 +91,10 @@ struct nlmsghdr { #define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) #define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) -#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_DATA(nlh) ((void *)(((char *)nlh) + NLMSG_HDRLEN)) #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ - (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) + (struct nlmsghdr *)(((char *)(nlh)) + \ + NLMSG_ALIGN((nlh)->nlmsg_len))) #define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len <= (len)) -- cgit v1.2.3 From f07b2a5b04d4a50d931a0afe4e3e114ce09a2e4b Mon Sep 17 00:00:00 2001 From: Arseny Krasnov Date: Fri, 11 Jun 2021 14:12:22 +0300 Subject: virtio/vsock: defines and constants for SEQPACKET Add set of defines and constants for SOCK_SEQPACKET support in vsock. Signed-off-by: Arseny Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/uapi/linux/virtio_vsock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 1d57ed3d84d2..3dd3555b2740 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -38,6 +38,9 @@ #include #include +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */ + struct virtio_vsock_config { __le64 guest_cid; } __attribute__((packed)); @@ -65,6 +68,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, }; enum virtio_vsock_op { @@ -91,4 +95,9 @@ enum virtio_vsock_shutdown { VIRTIO_VSOCK_SHUTDOWN_SEND = 2, }; +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_SEQ_EOR = 1, +}; + #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From 00e77ed8e64d5f271c1f015c7153545980d48a76 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 12 Jun 2021 10:20:55 +0200 Subject: rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME In some cases, for example in the upcoming WWAN framework changes, there's no natural "parent netdev", so sometimes dummy netdevs are created or similar. IFLA_PARENT_DEV_NAME is a new attribute intended to contain a device (sysfs, struct device) name that can be used instead when creating a new netdev, if the rtnetlink family implements it. As suggested by Parav Pandit, we also introduce IFLA_PARENT_DEV_BUS_NAME attribute in order to uniquely identify a device on the system (with bus/name pair). ip-link(8) support for the generic parent device attributes will help us avoid code duplication, so no other link type will require a custom code to handle the parent name attribute. E.g. the WWAN interface creation command will looks like this: $ ip link add wwan0-1 parent-dev wwan0 type wwan channel-id 1 So, some future subsystem (or driver) FOO will have an interface creation command that looks like this: $ ip link add foo1-3 parent-dev foo1 type foo bar-id 3 baz-type Y Below is an example of dumping link info of a random device with these new attributes: $ ip --details link show wlp0s20f3 4: wlp0s20f3: mtu 1500 qdisc noqueue state UP mode DORMANT group default qlen 1000 ... parent_bus pci parent_dev 0000:00:14.3 Co-developed-by: Sergey Ryazanov Signed-off-by: Sergey Ryazanov Co-developed-by: Loic Poulain Signed-off-by: Loic Poulain Suggested-by: Sergey Ryazanov Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a5a7f0e64865..4882e81514b6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -341,6 +341,13 @@ enum { IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + __IFLA_MAX }; -- cgit v1.2.3 From 88b710532e53de2466d1033fb1d5125aabf3215a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 12 Jun 2021 10:20:56 +0200 Subject: wwan: add interface creation support Add support to create (and destroy) interfaces via a new rtnetlink kind "wwan". The responsible driver has to use the new wwan_register_ops() to make this possible. Signed-off-by: Johannes Berg Signed-off-by: Sergey Ryazanov Signed-off-by: Loic Poulain Signed-off-by: David S. Miller --- include/uapi/linux/wwan.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/uapi/linux/wwan.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wwan.h b/include/uapi/linux/wwan.h new file mode 100644 index 000000000000..32a2720b4d11 --- /dev/null +++ b/include/uapi/linux/wwan.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2021 Intel Corporation. + */ +#ifndef _UAPI_WWAN_H_ +#define _UAPI_WWAN_H_ + +enum { + IFLA_WWAN_UNSPEC, + IFLA_WWAN_LINK_ID, /* u32 */ + + __IFLA_WWAN_MAX +}; +#define IFLA_WWAN_MAX (__IFLA_WWAN_MAX - 1) + +#endif /* _UAPI_WWAN_H_ */ -- cgit v1.2.3 From e061047684af63f2d4f1338ec73140f6e29eb59f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:21 +0900 Subject: bpf: Support BPF_FUNC_get_socket_cookie() for BPF_PROG_TYPE_SK_REUSEPORT. We will call sock_reuseport.prog for socket migration in the next commit, so the eBPF program has to know which listener is closing to select a new listener. We can currently get a unique ID of each listener in the userspace by calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map. This patch makes the pointer of sk available in sk_reuseport_md so that we can get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program. Suggested-by: Martin KaFai Lau Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-9-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2c1ba70abbf1..f3b72588442b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5416,6 +5416,7 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + __bpf_md_ptr(struct bpf_sock *, sk); }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From d5e4ddaeb6ab2c3c7fbb7b247a6d34bb0b18d87e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:22 +0900 Subject: bpf: Support socket migration by eBPF. This patch introduces a new bpf_attach_type for BPF_PROG_TYPE_SK_REUSEPORT to check if the attached eBPF program is capable of migrating sockets. When the eBPF program is attached, we run it for socket migration if the expected_attach_type is BPF_SK_REUSEPORT_SELECT_OR_MIGRATE or net.ipv4.tcp_migrate_req is enabled. Currently, the expected_attach_type is not enforced for the BPF_PROG_TYPE_SK_REUSEPORT type of program. Thus, this commit follows the earlier idea in the commit aac3fc320d94 ("bpf: Post-hooks for sys_bind") to fix up the zero expected_attach_type in bpf_prog_load_fixup_attach_type(). Moreover, this patch adds a new field (migrating_sk) to sk_reuseport_md to select a new listener based on the child socket. migrating_sk varies depending on if it is migrating a request in the accept queue or during 3WHS. - accept_queue : sock (ESTABLISHED/SYN_RECV) - 3WHS : request_sock (NEW_SYN_RECV) In the eBPF program, we can select a new listener by BPF_FUNC_sk_select_reuseport(). Also, we can cancel migration by returning SK_DROP. This feature is useful when listeners have different settings at the socket API level or when we want to free resources as soon as possible. - SK_PASS with selected_sk, select it as a new listener - SK_PASS with selected_sk NULL, fallbacks to the random selection - SK_DROP, cancel the migration. There is a noteworthy point. We select a listening socket in three places, but we do not have struct skb at closing a listener or retransmitting a SYN+ACK. On the other hand, some helper functions do not expect skb is NULL (e.g. skb_header_pointer() in BPF_FUNC_skb_load_bytes(), skb_tail_pointer() in BPF_FUNC_skb_load_bytes_relative()). So we allocate an empty skb temporarily before running the eBPF program. Suggested-by: Martin KaFai Lau Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/netdev/20201123003828.xjpjdtk4ygl6tg6h@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/netdev/20201203042402.6cskdlit5f3mw4ru@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/netdev/20201209030903.hhow5r53l6fmozjn@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-10-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3b72588442b..bf9252c7381e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -994,6 +994,8 @@ enum bpf_attach_type { BPF_SK_LOOKUP, BPF_XDP, BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, __MAX_BPF_ATTACH_TYPE }; @@ -5416,7 +5418,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From 8c40602b4be17571dfd75102f4f1e690311c5210 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 16 Jun 2021 16:52:56 +0200 Subject: net/smc: Add netlink support for SMC statistics Add the netlink function which collects the statistics information and delivers it to the userspace. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 3e68da07fba2..f32f11b30963 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -47,6 +47,7 @@ enum { SMC_NETLINK_GET_LGR_SMCD, SMC_NETLINK_GET_DEV_SMCD, SMC_NETLINK_GET_DEV_SMCR, + SMC_NETLINK_GET_STATS, }; /* SMC_GENL_FAMILY top level attributes */ @@ -58,6 +59,7 @@ enum { SMC_GEN_LGR_SMCD, /* nest */ SMC_GEN_DEV_SMCD, /* nest */ SMC_GEN_DEV_SMCR, /* nest */ + SMC_GEN_STATS, /* nest */ __SMC_GEN_MAX, SMC_GEN_MAX = __SMC_GEN_MAX - 1 }; @@ -159,4 +161,71 @@ enum { SMC_NLA_DEV_MAX = __SMC_NLA_DEV_MAX - 1 }; +/* SMC_NLA_STATS_T_TX(RX)_RMB_SIZE nested attributes */ +/* SMC_NLA_STATS_TX(RX)PLOAD_SIZE nested attributes */ +enum { + SMC_NLA_STATS_PLOAD_PAD, + SMC_NLA_STATS_PLOAD_8K, /* u64 */ + SMC_NLA_STATS_PLOAD_16K, /* u64 */ + SMC_NLA_STATS_PLOAD_32K, /* u64 */ + SMC_NLA_STATS_PLOAD_64K, /* u64 */ + SMC_NLA_STATS_PLOAD_128K, /* u64 */ + SMC_NLA_STATS_PLOAD_256K, /* u64 */ + SMC_NLA_STATS_PLOAD_512K, /* u64 */ + SMC_NLA_STATS_PLOAD_1024K, /* u64 */ + SMC_NLA_STATS_PLOAD_G_1024K, /* u64 */ + __SMC_NLA_STATS_PLOAD_MAX, + SMC_NLA_STATS_PLOAD_MAX = __SMC_NLA_STATS_PLOAD_MAX - 1 +}; + +/* SMC_NLA_STATS_T_TX(RX)_RMB_STATS nested attributes */ +enum { + SMC_NLA_STATS_RMB_PAD, + SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT, /* u64 */ + SMC_NLA_STATS_RMB_SIZE_SM_CNT, /* u64 */ + SMC_NLA_STATS_RMB_FULL_PEER_CNT, /* u64 */ + SMC_NLA_STATS_RMB_FULL_CNT, /* u64 */ + SMC_NLA_STATS_RMB_REUSE_CNT, /* u64 */ + SMC_NLA_STATS_RMB_ALLOC_CNT, /* u64 */ + SMC_NLA_STATS_RMB_DGRADE_CNT, /* u64 */ + __SMC_NLA_STATS_RMB_MAX, + SMC_NLA_STATS_RMB_MAX = __SMC_NLA_STATS_RMB_MAX - 1 +}; + +/* SMC_NLA_STATS_SMCD_TECH and _SMCR_TECH nested attributes */ +enum { + SMC_NLA_STATS_T_PAD, + SMC_NLA_STATS_T_TX_RMB_SIZE, /* nest */ + SMC_NLA_STATS_T_RX_RMB_SIZE, /* nest */ + SMC_NLA_STATS_T_TXPLOAD_SIZE, /* nest */ + SMC_NLA_STATS_T_RXPLOAD_SIZE, /* nest */ + SMC_NLA_STATS_T_TX_RMB_STATS, /* nest */ + SMC_NLA_STATS_T_RX_RMB_STATS, /* nest */ + SMC_NLA_STATS_T_CLNT_V1_SUCC, /* u64 */ + SMC_NLA_STATS_T_CLNT_V2_SUCC, /* u64 */ + SMC_NLA_STATS_T_SRV_V1_SUCC, /* u64 */ + SMC_NLA_STATS_T_SRV_V2_SUCC, /* u64 */ + SMC_NLA_STATS_T_SENDPAGE_CNT, /* u64 */ + SMC_NLA_STATS_T_SPLICE_CNT, /* u64 */ + SMC_NLA_STATS_T_CORK_CNT, /* u64 */ + SMC_NLA_STATS_T_NDLY_CNT, /* u64 */ + SMC_NLA_STATS_T_URG_DATA_CNT, /* u64 */ + SMC_NLA_STATS_T_RX_BYTES, /* u64 */ + SMC_NLA_STATS_T_TX_BYTES, /* u64 */ + SMC_NLA_STATS_T_RX_CNT, /* u64 */ + SMC_NLA_STATS_T_TX_CNT, /* u64 */ + __SMC_NLA_STATS_T_MAX, + SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1 +}; + +/* SMC_GEN_STATS attributes */ +enum { + SMC_NLA_STATS_PAD, + SMC_NLA_STATS_SMCD_TECH, /* nest */ + SMC_NLA_STATS_SMCR_TECH, /* nest */ + SMC_NLA_STATS_CLNT_HS_ERR_CNT, /* u64 */ + SMC_NLA_STATS_SRV_HS_ERR_CNT, /* u64 */ + __SMC_NLA_STATS_MAX, + SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From f0dd7bf5e33066e554442c509ef6351728b95b51 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 16 Jun 2021 16:52:57 +0200 Subject: net/smc: Add netlink support for SMC fallback statistics Add support to collect more detailed SMC fallback reason statistics and provide these statistics to user space on the netlink interface. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index f32f11b30963..0f7f87c70baf 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -48,6 +48,7 @@ enum { SMC_NETLINK_GET_DEV_SMCD, SMC_NETLINK_GET_DEV_SMCR, SMC_NETLINK_GET_STATS, + SMC_NETLINK_GET_FBACK_STATS, }; /* SMC_GENL_FAMILY top level attributes */ @@ -60,6 +61,7 @@ enum { SMC_GEN_DEV_SMCD, /* nest */ SMC_GEN_DEV_SMCR, /* nest */ SMC_GEN_STATS, /* nest */ + SMC_GEN_FBACK_STATS, /* nest */ __SMC_GEN_MAX, SMC_GEN_MAX = __SMC_GEN_MAX - 1 }; @@ -228,4 +230,16 @@ enum { __SMC_NLA_STATS_MAX, SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1 }; + +/* SMC_GEN_FBACK_STATS attributes */ +enum { + SMC_NLA_FBACK_STATS_PAD, + SMC_NLA_FBACK_STATS_TYPE, /* u8 */ + SMC_NLA_FBACK_STATS_SRV_CNT, /* u64 */ + SMC_NLA_FBACK_STATS_CLNT_CNT, /* u64 */ + SMC_NLA_FBACK_STATS_RSN_CODE, /* u32 */ + SMC_NLA_FBACK_STATS_RSN_CNT, /* u16 */ + __SMC_NLA_FBACK_STATS_MAX, + SMC_NLA_FBACK_STATS_MAX = __SMC_NLA_FBACK_STATS_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From 836382dc24717af203ce06703530528827086955 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 16 Jun 2021 22:25:05 +0200 Subject: netfilter: nf_tables: add last expression Add a new optional expression that tells you when last matching on a given rule / set element element has happened. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 19715e2679d1..e94d1fa554cb 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1195,6 +1195,21 @@ enum nft_counter_attributes { }; #define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) +/** + * enum nft_last_attributes - nf_tables last expression netlink attributes + * + * @NFTA_LAST_SET: last update has been set, zero means never updated (NLA_U32) + * @NFTA_LAST_MSECS: milliseconds since last update (NLA_U64) + */ +enum nft_last_attributes { + NFTA_LAST_UNSPEC, + NFTA_LAST_SET, + NFTA_LAST_MSECS, + NFTA_LAST_PAD, + __NFTA_LAST_MAX +}; +#define NFTA_LAST_MAX (__NFTA_LAST_MAX - 1) + /** * enum nft_log_attributes - nf_tables log expression netlink attributes * -- cgit v1.2.3 From 2d8ea148e553e1dd4e80a87741abdfb229e2b323 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 17 Jun 2021 11:37:11 +0800 Subject: net: fix mistake path for netdev_features_strings Th_strings arrays netdev_features_strings, tunable_strings, and phy_tunable_strings has been moved to file net/ethtool/common.c. So fixes the comment. Signed-off-by: Jian Shen Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cfef6b08169a..67aa7134b301 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -233,7 +233,7 @@ enum tunable_id { ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ /* * Add your fresh new tunable attribute above and remember to update - * tunable_strings[] in net/core/ethtool.c + * tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_TUNABLE_COUNT, }; @@ -297,7 +297,7 @@ enum phy_tunable_id { ETHTOOL_PHY_EDPD, /* * Add your fresh new phy tunable attribute above and remember to update - * phy_tunable_strings[] in net/core/ethtool.c + * phy_tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_PHY_TUNABLE_COUNT, }; -- cgit v1.2.3 From 8b532109bf885b7b59b93487bc4672eb6d071b78 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Thu, 17 Jun 2021 19:16:44 +0200 Subject: seg6: add support for SRv6 End.DT46 Behavior IETF RFC 8986 [1] includes the definition of SRv6 End.DT4, End.DT6, and End.DT46 Behaviors. The current SRv6 code in the Linux kernel only implements End.DT4 and End.DT6 which can be used respectively to support IPv4-in-IPv6 and IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. The proposed End.DT46 implementation is meant to support the decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. The implementation of the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies the setup and operations of SRv6 VPNs. The SRv6 End.DT46 Behavior leverages the infrastructure of SRv6 End.DT{4,6} Behaviors implemented so far, because it makes use of a VRF device in order to force the routing lookup into the associated routing table. To make the End.DT46 work properly, it must be guaranteed that the routing table used for routing lookup operations is bound to one and only one VRF during the tunnel creation. Such constraint has to be enforced by enabling the VRF strict_mode sysctl parameter, i.e.: $ sysctl -wq net.vrf.strict_mode=1 Note that the same approach is used for the SRv6 End.DT4 Behavior and for the End.DT6 Behavior in VRF mode. The command used to instantiate an SRv6 End.DT46 Behavior is straightforward, i.e.: $ ip -6 route add 2001:db8::1 encap seg6local action End.DT46 vrftable 100 dev vrf100. [1] https://www.rfc-editor.org/rfc/rfc8986.html#name-enddt46-decapsulation-and-s ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Performance and impact of SRv6 End.DT46 Behavior on the SRv6 Networking ======================================================================= This patch aims to add the SRv6 End.DT46 Behavior with minimal impact on the performance of SRv6 End.DT4 and End.DT6 Behaviors. In order to verify this, we tested the performance of the newly introduced SRv6 End.DT46 Behavior and compared it with the performance of SRv6 End.DT{4,6} Behaviors, considering both the patched kernel and the kernel before applying the End.DT46 patch (referred to as vanilla kernel). In details, the following decapsulation scenarios were considered: 1.a) IPv6 traffic in SRv6 End.DT46 Behavior on patched kernel; 1.b) IPv4 traffic in SRv6 End.DT46 Behavior on patched kernel; 2.a) SRv6 End.DT6 Behavior (VRF mode) on patched kernel; 2.b) SRv6 End.DT4 Behavior on patched kernel; 3.a) SRv6 End.DT6 Behavior (VRF mode) on vanilla kernel (without the End.DT46 patch); 3.b) SRv6 End.DT4 Behavior on vanilla kernel (without the End.DT46 patch). All tests were performed on a testbed deployed on the CloudLab [2] facilities. We considered IPv{4,6} traffic handled by a single core (at 2.4 GHz on a Xeon(R) CPU E5-2630 v3) on kernel 5.13-rc1 using packets of size ~ 100 bytes. Scenario (1.a): average 684.70 kpps; std. dev. 0.7 kpps; Scenario (1.b): average 711.69 kpps; std. dev. 1.2 kpps; Scenario (2.a): average 690.70 kpps; std. dev. 1.2 kpps; Scenario (2.b): average 722.22 kpps; std. dev. 1.7 kpps; Scenario (3.a): average 690.02 kpps; std. dev. 2.6 kpps; Scenario (3.b): average 721.91 kpps; std. dev. 1.2 kpps; Considering the results for the patched kernel (1.a, 1.b, 2.a, 2.b) we observe that the performance degradation incurred in using End.DT46 rather than End.DT6 and End.DT4 respectively for IPv6 and IPv4 traffic is minimal, around 0.9% and 1.5%. Such very minimal performance degradation is the price to be paid if one prefers to use a single tunnel capable of handling both types of traffic (IPv4 and IPv6). Comparing the results for End.DT4 and End.DT6 under the patched and the vanilla kernel (2.a, 2.b, 3.a, 3.b) we observe that the introduction of the End.DT46 patch has no impact on the performance of End.DT4 and End.DT6. [2] https://www.cloudlab.us Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/seg6_local.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 5ae3ace84de0..332b18f318f8 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -64,6 +64,8 @@ enum { SEG6_LOCAL_ACTION_END_AM = 14, /* custom BPF action */ SEG6_LOCAL_ACTION_END_BPF = 15, + /* decap and lookup of DA in v4 or v6 table */ + SEG6_LOCAL_ACTION_END_DT46 = 16, __SEG6_LOCAL_ACTION_MAX, }; -- cgit v1.2.3 From 752e906732c69412087f716e93baa0330cb7cce3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:07 -0700 Subject: mptcp: add csum_enabled in mptcp_sock This patch added a new member named csum_enabled in struct mptcp_sock, used a dummy mptcp_is_checksum_enabled() helper to initialize it. Also added a new member named mptcpi_csum_enabled in struct mptcp_info to expose the csum_enabled flag. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 8eb3c0844bff..7b05f710