From ca34ce29fc4b0e929cc6aada40829d17ab50fee4 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 8 Aug 2022 09:47:23 -0700
Subject: bpf: Improve docstring for BPF_F_USER_BUILD_ID flag

Most tools which use bpf_get_stack or bpf_get_stackid symbolicate the
stack - meaning the stack of addresses in the target process' address
space is transformed into meaningful symbol names. The
BPF_F_USER_BUILD_ID flag eases this process by finding the build_id of
the file-backed vma which the address falls in and translating the
address to an offset within the backing file.

To be more specific, the offset is a "file offset" from the beginning of
the backing file. The symbols in ET_DYN ELF objects have a st_value
which is also described as an "offset" - but an offset in the process
address space, relative to the base address of the object.

It's necessary to translate between the "file offset" and "virtual
address offset" during symbolication before they can be directly
compared. Failure to do so can lead to confusing bugs, so this patch
clarifies language in the documentation in an attempt to keep this from
happening.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220808164723.3107500-1-davemarchevsky@fb.com
---
 include/uapi/linux/bpf.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7bf9ba1329be..534e33fb1029 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3008,8 +3008,18 @@ union bpf_attr {
  * 		**BPF_F_USER_STACK**
  * 			Collect a user space stack instead of a kernel stack.
  * 		**BPF_F_USER_BUILD_ID**
- * 			Collect buildid+offset instead of ips for user stack,
- * 			only valid if **BPF_F_USER_STACK** is also specified.
+ * 			Collect (build_id, file_offset) instead of ips for user
+ * 			stack, only valid if **BPF_F_USER_STACK** is also
+ * 			specified.
+ *
+ * 			*file_offset* is an offset relative to the beginning
+ * 			of the executable or shared object file backing the vma
+ * 			which the *ip* falls in. It is *not* an offset relative
+ * 			to that object's base address. Accordingly, it must be
+ * 			adjusted by adding (sh_addr - sh_offset), where
+ * 			sh_{addr,offset} correspond to the executable section
+ * 			containing *file_offset* in the object, for comparisons
+ * 			to symbols' st_value to be valid.
  *
  * 		**bpf_get_stack**\ () can collect up to
  * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
-- 
cgit v1.2.3


From c8996c98f703b09afe77a1d247dae691c9849dc1 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 9 Aug 2022 08:08:02 +0200
Subject: bpf: Add BPF-helper for accessing CLOCK_TAI

Commit 3dc6ffae2da2 ("timekeeping: Introduce fast accessor to clock tai")
introduced a fast and NMI-safe accessor for CLOCK_TAI. Especially in time
sensitive networks (TSN), where all nodes are synchronized by Precision Time
Protocol (PTP), it's helpful to have the possibility to generate timestamps
based on CLOCK_TAI instead of CLOCK_MONOTONIC. With a BPF helper for TAI in
place, it becomes very convenient to correlate activity across different
machines in the network.

Use cases for such a BPF helper include functionalities such as Tx launch
time (e.g. ETF and TAPRIO Qdiscs) and timestamping.

Note: CLOCK_TAI is nothing new per se, only the NMI-safe variant of it is.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
[Kurt: Wrote changelog and renamed helper]
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://lore.kernel.org/r/20220809060803.5773-2-kurt@linutronix.de
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 534e33fb1029..7d1e2794d83e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5341,6 +5341,18 @@ union bpf_attr {
  *		**-EACCES** if the SYN cookie is not valid.
  *
  *		**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * u64 bpf_ktime_get_tai_ns(void)
+ *	Description
+ *		A nonsettable system-wide clock derived from wall-clock time but
+ *		ignoring leap seconds.  This clock does not experience
+ *		discontinuities and backwards jumps caused by NTP inserting leap
+ *		seconds as CLOCK_REALTIME does.
+ *
+ *		See: **clock_gettime**\ (**CLOCK_TAI**)
+ *	Return
+ *		Current *ktime*.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5551,6 +5563,7 @@ union bpf_attr {
 	FN(tcp_raw_gen_syncookie_ipv6),	\
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
+	FN(ktime_get_tai_ns),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 4961d0772578e8737afe61370743f3bc22867111 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 12 Aug 2022 16:37:27 +0100
Subject: bpf: Clear up confusion in bpf_skb_adjust_room()'s documentation

Adding or removing room space _below_ layers 2 or 3, as the description
mentions, is ambiguous. This was written with a mental image of the
packet with layer 2 at the top, layer 3 under it, and so on. But it has
led users to believe that it was on lower layers (before the beginning
of the L2 and L3 headers respectively).

Let's make it more explicit, and specify between which layers the room
space is adjusted.

Reported-by: Rumen Telbizov <rumen.telbizov@menlosecurity.com>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220812153727.224500-3-quentin@isovalent.com
---
 include/uapi/linux/bpf.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7d1e2794d83e..934a2a8beb87 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2573,10 +2573,12 @@ union bpf_attr {
  *		There are two supported modes at this time:
  *
  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- *		  (room space is added or removed below the layer 2 header).
+ * 		  (room space is added or removed between the layer 2 and
+ * 		  layer 3 headers).
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * 		  (room space is added or removed below the layer 3 header).
+ * 		  (room space is added or removed between the layer 3 and
+ * 		  layer 4 headers).
  *
  *		The following flags are supported at this time:
  *
-- 
cgit v1.2.3


From 5d81757835859760a38a54a87eff856d4e578836 Mon Sep 17 00:00:00 2001
From: Emeel Hakim <ehakim@nvidia.com>
Date: Thu, 18 Aug 2022 18:32:30 +0300
Subject: net: macsec: Expose MACSEC_SALT_LEN definition to user space

Expose MACSEC_SALT_LEN definition to user space to be
used in various user space applications such as iproute.
Iproute will use this as part of adding macsec extended
packet number support.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Emeel Hakim <ehakim@nvidia.com>
Link: https://lore.kernel.org/r/20220818153229.4721-1-ehakim@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_macsec.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index 3af2aa069a36..d5b6d1f37353 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -22,6 +22,8 @@
 
 #define MACSEC_KEYID_LEN 16
 
+#define MACSEC_SALT_LEN 12
+
 /* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */
 #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL
 #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL
-- 
cgit v1.2.3


From 1202cdd665315c525b5237e96e0bedc76d7e754f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed, 17 Aug 2022 17:43:21 -0700
Subject: Remove DECnet support from kernel

DECnet is an obsolete network protocol that receives more attention
from kernel janitors than users. It belongs in computer protocol
history museum not in Linux kernel.

It has been "Orphaned" in kernel since 2010. The iproute2 support
for DECnet was dropped in 5.0 release. The documentation link on
Sourceforge says it is abandoned there as well.

Leave the UAPI alone to keep userspace programs compiling.
This means that there is still an empty neighbour table
for AF_DECNET.

The table of /proc/sys/net entries was updated to match
current directories and reformatted to be alphabetical.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: David Ahern <dsahern@kernel.org>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/dn.h               | 149 ----------------------------------
 include/uapi/linux/netfilter_decnet.h |  72 ----------------
 include/uapi/linux/netlink.h          |   2 +-
 3 files changed, 1 insertion(+), 222 deletions(-)
 delete mode 100644 include/uapi/linux/dn.h
 delete mode 100644 include/uapi/linux/netfilter_decnet.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h
deleted file mode 100644
index 36ca71bd8bbe..000000000000
--- a/include/uapi/linux/dn.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_DN_H
-#define _LINUX_DN_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-/*
-
-	DECnet Data Structures and Constants
-
-*/
-
-/* 
- * DNPROTO_NSP can't be the same as SOL_SOCKET, 
- * so increment each by one (compared to ULTRIX)
- */
-#define DNPROTO_NSP     2                       /* NSP protocol number       */
-#define DNPROTO_ROU     3                       /* Routing protocol number   */
-#define DNPROTO_NML     4                       /* Net mgt protocol number   */
-#define DNPROTO_EVL     5                       /* Evl protocol number (usr) */
-#define DNPROTO_EVR     6                       /* Evl protocol number (evl) */
-#define DNPROTO_NSPT    7                       /* NSP trace protocol number */
-
-
-#define DN_ADDL		2
-#define DN_MAXADDL	2 /* ULTRIX headers have 20 here, but pathworks has 2 */
-#define DN_MAXOPTL	16
-#define DN_MAXOBJL	16
-#define DN_MAXACCL	40
-#define DN_MAXALIASL	128
-#define DN_MAXNODEL	256
-#define DNBUFSIZE	65023
-
-/* 
- * SET/GET Socket options  - must match the DSO_ numbers below
- */
-#define SO_CONDATA      1
-#define SO_CONACCESS    2
-#define SO_PROXYUSR     3
-#define SO_LINKINFO     7
-
-#define DSO_CONDATA     1        /* Set/Get connect data                */
-#define DSO_DISDATA     10       /* Set/Get disconnect data             */
-#define DSO_CONACCESS   2        /* Set/Get connect access data         */
-#define DSO_ACCEPTMODE  4        /* Set/Get accept mode                 */
-#define DSO_CONACCEPT   5        /* Accept deferred connection          */
-#define DSO_CONREJECT   6        /* Reject deferred connection          */
-#define DSO_LINKINFO    7        /* Set/Get link information            */
-#define DSO_STREAM      8        /* Set socket type to stream           */
-#define DSO_SEQPACKET   9        /* Set socket type to sequenced packet */
-#define DSO_MAXWINDOW   11       /* Maximum window size allowed         */
-#define DSO_NODELAY	12       /* Turn off nagle                      */
-#define DSO_CORK        13       /* Wait for more data!                 */
-#define DSO_SERVICES	14       /* NSP Services field                  */
-#define DSO_INFO	15       /* NSP Info field                      */
-#define DSO_MAX         15       /* Maximum option number               */
-
-
-/* LINK States */
-#define LL_INACTIVE	0
-#define LL_CONNECTING	1
-#define LL_RUNNING	2
-#define LL_DISCONNECTING 3
-
-#define ACC_IMMED 0
-#define ACC_DEFER 1
-
-#define SDF_WILD        1                  /* Wild card object          */
-#define SDF_PROXY       2                  /* Addr eligible for proxy   */
-#define SDF_UICPROXY    4                  /* Use uic-based proxy       */
-
-/* Structures */
-
-
-struct dn_naddr {
-	__le16		a_len;
-	__u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */
-};
-
-struct sockaddr_dn {
-	__u16		sdn_family;
-	__u8		sdn_flags;
-	__u8		sdn_objnum;
-	__le16		sdn_objnamel;
-	__u8		sdn_objname[DN_MAXOBJL];
-	struct   dn_naddr	sdn_add;
-};
-#define sdn_nodeaddrl   sdn_add.a_len   /* Node address length  */
-#define sdn_nodeaddr    sdn_add.a_addr  /* Node address         */
-
-
-
-/*
- * DECnet set/get DSO_CONDATA, DSO_DISDATA (optional data) structure
- */
-struct optdata_dn {
-        __le16  opt_status;     /* Extended status return */
-#define opt_sts opt_status
-        __le16  opt_optl;       /* Length of user data    */
-        __u8   opt_data[16];   /* User data              */
-};
-
-struct accessdata_dn {
-	__u8		acc_accl;
-	__u8		acc_acc[DN_MAXACCL];
-	__u8 		acc_passl;
-	__u8		acc_pass[DN_MAXACCL];
-	__u8 		acc_userl;
-	__u8		acc_user[DN_MAXACCL];
-};
-
-/*
- * DECnet logical link information structure
- */
-struct linkinfo_dn {
-        __u16  idn_segsize;    /* Segment size for link */
-        __u8   idn_linkstate;  /* Logical link state    */
-};
-
-/*
- * Ethernet address format (for DECnet)
- */
-union etheraddress {
-        __u8 dne_addr[ETH_ALEN];      /* Full ethernet address */
-  struct {
-                __u8 dne_hiord[4];    /* DECnet HIORD prefix   */
-                __u8 dne_nodeaddr[2]; /* DECnet node address   */
-  } dne_remote;
-};
-
-
-/*
- * DECnet physical socket address format
- */
-struct dn_addr {
-        __le16 dna_family;      /* AF_DECnet               */
-        union etheraddress dna_netaddr; /* DECnet ethernet address */
-};
-
-#define DECNET_IOCTL_BASE 0x89 /* PROTOPRIVATE range */
-
-#define SIOCSNETADDR  _IOW(DECNET_IOCTL_BASE, 0xe0, struct dn_naddr)
-#define SIOCGNETADDR  _IOR(DECNET_IOCTL_BASE, 0xe1, struct dn_naddr)
-#define OSIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, int)
-#define OSIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, int)
-
-#endif /* _LINUX_DN_H */
diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h
deleted file mode 100644
index 3c77f54560f2..000000000000
--- a/include/uapi/linux/netfilter_decnet.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __LINUX_DECNET_NETFILTER_H
-#define __LINUX_DECNET_NETFILTER_H
-
-/* DECnet-specific defines for netfilter. 
- * This file (C) Steve Whitehouse 1999 derived from the
- * ipv4 netfilter header file which is
- * (C)1998 Rusty Russell -- This code is GPL.
- */
-
-#include <linux/netfilter.h>
-
-/* only for userspace compatibility */
-#ifndef __KERNEL__
-
-#include <limits.h> /* for INT_MIN, INT_MAX */
-
-/* kernel define is in netfilter_defs.h */
-#define NF_DN_NUMHOOKS		7
-#endif /* ! __KERNEL__ */
-
-/* DECnet Hooks */
-/* After promisc drops, checksum checks. */
-#define NF_DN_PRE_ROUTING	0
-/* If the packet is destined for this box. */
-#define NF_DN_LOCAL_IN		1
-/* If the packet is destined for another interface. */
-#define NF_DN_FORWARD		2
-/* Packets coming from a local process. */
-#define NF_DN_LOCAL_OUT		3
-/* Packets about to hit the wire. */
-#define NF_DN_POST_ROUTING	4
-/* Input Hello Packets */
-#define NF_DN_HELLO		5
-/* Input Routing Packets */
-#define NF_DN_ROUTE		6
-
-enum nf_dn_hook_priorities {
-	NF_DN_PRI_FIRST = INT_MIN,
-	NF_DN_PRI_CONNTRACK = -200,
-	NF_DN_PRI_MANGLE = -150,
-	NF_DN_PRI_NAT_DST = -100,
-	NF_DN_PRI_FILTER = 0,
-	NF_DN_PRI_NAT_SRC = 100,
-	NF_DN_PRI_DNRTMSG = 200,
-	NF_DN_PRI_LAST = INT_MAX,
-};
-
-struct nf_dn_rtmsg {
-	int nfdn_ifindex;
-};
-
-#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)))
-
-#ifndef __KERNEL__
-/* backwards compatibility for userspace */
-#define DNRMG_L1_GROUP 0x01
-#define DNRMG_L2_GROUP 0x02
-#endif
-
-enum {
-	DNRNG_NLGRP_NONE,
-#define DNRNG_NLGRP_NONE	DNRNG_NLGRP_NONE
-	DNRNG_NLGRP_L1,
-#define DNRNG_NLGRP_L1		DNRNG_NLGRP_L1
-	DNRNG_NLGRP_L2,
-#define DNRNG_NLGRP_L2		DNRNG_NLGRP_L2
-	__DNRNG_NLGRP_MAX
-};
-#define DNRNG_NLGRP_MAX	(__DNRNG_NLGRP_MAX - 1)
-
-#endif /*__LINUX_DECNET_NETFILTER_H*/
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 855dffb4c1c3..1e543cf0568c 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -20,7 +20,7 @@
 #define NETLINK_CONNECTOR	11
 #define NETLINK_NETFILTER	12	/* netfilter subsystem */
 #define NETLINK_IP6_FW		13
-#define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_DNRTMSG		14	/* DECnet routing messages (obsolete) */
 #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
 #define NETLINK_GENERIC		16
 /* leave room for NETLINK_DM (DM Events) */
-- 
cgit v1.2.3


From 91350fe152930c0d61a362af68272526490efea5 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Sun, 21 Aug 2022 14:35:17 +0300
Subject: bpf, flow_dissector: Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode
 for bpf progs

Currently, attaching BPF_PROG_TYPE_FLOW_DISSECTOR programs completely
replaces the flow-dissector logic with custom dissection logic. This
forces implementors to write programs that handle dissection for any
flows expected in the namespace.

It makes sense for flow-dissector BPF programs to just augment the
dissector with custom logic (e.g. dissecting certain flows or custom
protocols), while enjoying the broad capabilities of the standard
dissector for any other traffic.

Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode. Flow-dissector BPF
programs may return this to indicate no dissection was made, and
fallback to the standard dissector is requested.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220821113519.116765-3-shmulik.ladkani@gmail.com
---
 include/uapi/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 934a2a8beb87..7f87012b012e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5861,6 +5861,11 @@ enum bpf_ret_code {
 	 *    represented by BPF_REDIRECT above).
 	 */
 	BPF_LWT_REROUTE = 128,
+	/* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR
+	 *   to indicate that no custom dissection was performed, and
+	 *   fallback to standard dissector is requested.
+	 */
+	BPF_FLOW_DISSECTOR_CONTINUE = 129,
 };
 
 struct bpf_sock {
-- 
cgit v1.2.3


From 2172fb8007eaafbef18563afb6c1ae5a976bf787 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 23 Aug 2022 15:25:54 -0700
Subject: bpf: update bpf_{g,s}et_retval documentation

* replace 'syscall' with 'upper layers', still mention that it's being
  exported via syscall errno
* describe what happens in set_retval(-EPERM) + return 1
* describe what happens with bind's 'return 3'

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20220823222555.523590-5-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7f87012b012e..644600dbb114 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5085,17 +5085,29 @@ union bpf_attr {
  *
  * int bpf_get_retval(void)
  *	Description
- *		Get the syscall's return value that will be returned to userspace.
+ *		Get the BPF program's return value that will be returned to the upper layers.
  *
- *		This helper is currently supported by cgroup programs only.
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
  *	Return
- *		The syscall's return value.
+ *		The BPF program's return value.
  *
  * int bpf_set_retval(int retval)
  *	Description
- *		Set the syscall's return value that will be returned to userspace.
+ *		Set the BPF program's return value that will be returned to the upper layers.
+ *
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
+ *
+ *		Note that there is the following corner case where the program exports an error
+ *		via bpf_set_retval but signals success via 'return 1':
+ *
+ *			bpf_set_retval(-EPERM);
+ *			return 1;
+ *
+ *		In this case, the BPF program's return value will use helper's -EPERM. This
+ *		still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
  *
- *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
-- 
cgit v1.2.3


From 30b6055428a90cc52d4add164df12b94ab07c3fd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 19 Aug 2022 13:02:20 -0700
Subject: net: improve and fix netlink kdoc

Subsequent patch will render the kdoc from
include/uapi/linux/netlink.h into Documentation.
We need to fix the warnings. While at it move
the comments on struct nlmsghdr to a proper
kdoc comment.

Link: https://lore.kernel.org/r/20220819200221.422801-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netlink.h | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 1e543cf0568c..e0ab261ceca2 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -41,12 +41,20 @@ struct sockaddr_nl {
        	__u32		nl_groups;	/* multicast groups mask */
 };
 
+/**
+ * struct nlmsghdr - fixed format metadata header of Netlink messages
+ * @nlmsg_len:   Length of message including header
+ * @nlmsg_type:  Message content type
+ * @nlmsg_flags: Additional flags
+ * @nlmsg_seq:   Sequence number
+ * @nlmsg_pid:   Sending process port ID
+ */
 struct nlmsghdr {
-	__u32		nlmsg_len;	/* Length of message including header */
-	__u16		nlmsg_type;	/* Message content */
-	__u16		nlmsg_flags;	/* Additional flags */
-	__u32		nlmsg_seq;	/* Sequence number */
-	__u32		nlmsg_pid;	/* Sending process port ID */
+	__u32		nlmsg_len;
+	__u16		nlmsg_type;
+	__u16		nlmsg_flags;
+	__u32		nlmsg_seq;
+	__u32		nlmsg_pid;
 };
 
 /* Flags values */
@@ -337,6 +345,9 @@ enum netlink_attribute_type {
  *	bitfield32 type (U32)
  * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
  * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
+ *
+ * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes
+ * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number
  */
 enum netlink_policy_type_attr {
 	NL_POLICY_TYPE_ATTR_UNSPEC,
-- 
cgit v1.2.3


From e7a7b84e33178db4a839c5e1773247be17597c1f Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Sat, 30 Jul 2022 10:56:43 +0530
Subject: wifi: cfg80211: Add link_id parameter to various key operations for
 MLO

Add support for various key operations on MLD by adding new parameter
link_id. Pass the link_id received from userspace to driver for add_key,
get_key, del_key, set_default_key, set_default_mgmt_key and
set_default_beacon_key to support configuring keys specific to each MLO
link. Userspace must not specify link ID for MLO pairwise key since it
is common for all the MLO links.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/20220730052643.1959111-4-quic_vjakkam@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ffb7c573e299..573db20403dc 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -377,14 +377,22 @@
  *	the non-transmitting interfaces are deleted as well.
  *
  * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified
- *	by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC.
+ *	by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC
+ *	represents peer's MLD address for MLO pairwise key. For MLO group key,
+ *	the link is identified by %NL80211_ATTR_MLO_LINK_ID.
  * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT,
  *	%NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD.
+ *	For MLO connection, the link to set default key is identified by
+ *	%NL80211_ATTR_MLO_LINK_ID.
  * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA,
  *	%NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER,
- *	and %NL80211_ATTR_KEY_SEQ attributes.
+ *	and %NL80211_ATTR_KEY_SEQ attributes. %NL80211_ATTR_MAC represents
+ *	peer's MLD address for MLO pairwise key. The link to add MLO
+ *	group key is identified by %NL80211_ATTR_MLO_LINK_ID.
  * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX
- *	or %NL80211_ATTR_MAC.
+ *	or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC represents peer's MLD address
+ *	for MLO pairwise key. The link to delete group key is identified by
+ *	%NL80211_ATTR_MLO_LINK_ID.
  *
  * @NL80211_CMD_GET_BEACON: (not used)
  * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface
-- 
cgit v1.2.3


From d4ccaf58a8472123ac97e6db03932c375b5c45ba Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Wed, 24 Aug 2022 16:31:13 -0700
Subject: bpf: Introduce cgroup iter

Cgroup_iter is a type of bpf_iter. It walks over cgroups in four modes:

 - walking a cgroup's descendants in pre-order.
 - walking a cgroup's descendants in post-order.
 - walking a cgroup's ancestors.
 - process only the given cgroup.

When attaching cgroup_iter, one can set a cgroup to the iter_link
created from attaching. This cgroup is passed as a file descriptor
or cgroup id and serves as the starting point of the walk. If no
cgroup is specified, the starting point will be the root cgroup v2.

For walking descendants, one can specify the order: either pre-order or
post-order. For walking ancestors, the walk starts at the specified
cgroup and ends at the root.

One can also terminate the walk early by returning 1 from the iter
program.

Note that because walking cgroup hierarchy holds cgroup_mutex, the iter
program is called with cgroup_mutex held.

Currently only one session is supported, which means, depending on the
volume of data bpf program intends to send to user space, the number
of cgroups that can be walked is limited. For example, given the current
buffer size is 8 * PAGE_SIZE, if the program sends 64B data for each
cgroup, assuming PAGE_SIZE is 4kb, the total number of cgroups that can
be walked is 512. This is a limitation of cgroup_iter. If the output
data is larger than the kernel buffer size, after all data in the
kernel buffer is consumed by user space, the subsequent read() syscall
will signal EOPNOTSUPP. In order to work around, the user may have to
update their program to reduce the volume of data sent to output. For
example, skip some uninteresting cgroups. In future, we may extend
bpf_iter flags to allow customizing buffer size.

Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220824233117.1312810-2-haoluo@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 644600dbb114..0f61f09f467a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type (enum bpf_attach_type) */
 };
 
+enum bpf_cgroup_iter_order {
+	BPF_ITER_ORDER_UNSPEC = 0,
+	BPF_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+};
+
 union bpf_iter_link_info {
 	struct {
 		__u32	map_fd;
 	} map;
+	struct {
+		enum bpf_cgroup_iter_order order;
+
+		/* At most one of cgroup_fd and cgroup_id can be non-zero. If
+		 * both are zero, the walk starts from the default cgroup v2
+		 * root. For walking v1 hierarchy, one should always explicitly
+		 * specify cgroup_fd.
+		 */
+		__u32	cgroup_fd;
+		__u64	cgroup_id;
+	} cgroup;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
@@ -6176,11 +6195,22 @@ struct bpf_link_info {
 		struct {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
+
+			/* If the iter specific field is 32 bits, it can be put
+			 * in the first or second union. Otherwise it should be
+			 * put in the second union.
+			 */
 			union {
 				struct {
 					__u32 map_id;
 				} map;
 			};
+			union {
+				struct {
+					__u64 cgroup_id;
+					__u32 order;
+				} cgroup;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
-- 
cgit v1.2.3


From d4ffb6f39f1a1b260966b43a4ffdb64779c650dd Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 25 Aug 2022 15:39:36 -0700
Subject: bpf: Add CGROUP prefix to cgroup_iter_order

bpf_cgroup_iter_order is globally visible but the entries do not have
CGROUP prefix. As requested by Andrii, put a CGROUP in the names
in bpf_cgroup_iter_order.

This patch fixes two previous commits: one introduced the API and
the other uses the API in bpf selftest (that is, the selftest
cgroup_hierarchical_stats).

I tested this patch via the following command:

  test_progs -t cgroup,iter,btf_dump

Fixes: d4ccaf58a847 ("bpf: Introduce cgroup iter")
Fixes: 88886309d2e8 ("selftests/bpf: add a selftest for cgroup hierarchical stats collection")
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220825223936.1865810-1-haoluo@google.com
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/uapi/linux/bpf.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f61f09f467a..bdf4bc6d8d6b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -88,11 +88,11 @@ struct bpf_cgroup_storage_key {
 };
 
 enum bpf_cgroup_iter_order {
-	BPF_ITER_ORDER_UNSPEC = 0,
-	BPF_ITER_SELF_ONLY,		/* process only a single object. */
-	BPF_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
-	BPF_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
-	BPF_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+	BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
+	BPF_CGROUP_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
 };
 
 union bpf_iter_link_info {
-- 
cgit v1.2.3


From 54c4ef34c4b6f9720fded620e2893894f9f2c554 Mon Sep 17 00:00:00 2001
From: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
Date: Thu, 25 Aug 2022 05:04:49 +0300
Subject: openvswitch: allow specifying ifindex of new interfaces

CRIU is preserving ifindexes of net devices after restoration. However,
current Open vSwitch API does not allow to target ifindex, so we cannot
correctly restore OVS configuration.

Add new OVS_DP_ATTR_IFINDEX for OVS_DP_CMD_NEW and use it as desired
ifindex.
Use OVS_VPORT_ATTR_IFINDEX during OVS_VPORT_CMD_NEW to specify new netdev
ifindex.

Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/openvswitch.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index ce3e1738d427..94066f87e9ee 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -76,6 +76,8 @@ enum ovs_datapath_cmd {
  * datapath.  Always present in notifications.
  * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
  * datapath. Always present in notifications.
+ * @OVS_DP_ATTR_IFINDEX: Interface index for a new datapath netdev. Only
+ * valid for %OVS_DP_CMD_NEW requests.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_DP_* commands.
@@ -92,6 +94,7 @@ enum ovs_datapath_attr {
 	OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in
 				     * per-cpu dispatch mode
 				     */
+	OVS_DP_ATTR_IFINDEX,
 	__OVS_DP_ATTR_MAX
 };
 
-- 
cgit v1.2.3


From aa75622c3be4d5819ce69c714acbcbd67bba5d65 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Thu, 25 Aug 2022 23:08:06 +0100
Subject: bpf: Fix a few typos in BPF helpers documentation

Address a few typos in the documentation for the BPF helper functions.
They were reported by Jakub [0], who ran spell checkers on the generated
man page [1].

[0] https://lore.kernel.org/linux-man/d22dcd47-023c-8f52-d369-7b5308e6c842@gmail.com/T/#mb02e7d4b7fb61d98fa914c77b581184e9a9537af
[1] https://lore.kernel.org/linux-man/eb6a1e41-c48e-ac45-5154-ac57a2c76108@gmail.com/T/#m4a8d1b003616928013ffcd1450437309ab652f9f

v3: Do not copy unrelated (and breaking) elements to tools/ header
v2: Turn a ',' into a ';'

Reported-by: Jakub Wilk <jwilk@jwilk.net>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220825220806.107143-1-quentin@isovalent.com
---
 include/uapi/linux/bpf.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bdf4bc6d8d6b..962960a98835 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4456,7 +4456,7 @@ union bpf_attr {
  *
  *		**-EEXIST** if the option already exists.
  *
- *		**-EFAULT** on failrue to parse the existing header options.
+ *		**-EFAULT** on failure to parse the existing header options.
  *
  *		**-EPERM** if the helper cannot be used under the current
  *		*skops*\ **->op**.
@@ -4665,7 +4665,7 @@ union bpf_attr {
  *		a *map* with *task* as the **key**.  From this
  *		perspective,  the usage is not much different from
  *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
- *		helper enforces the key must be an task_struct and the map must also
+ *		helper enforces the key must be a task_struct and the map must also
  *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
  *
  *		Underneath, the value is stored locally at *task* instead of
@@ -4723,7 +4723,7 @@ union bpf_attr {
  *
  * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
  *	Description
- *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		Returns the stored IMA hash of the *inode* (if it's available).
  *		If the hash is larger than *size*, then only *size*
  *		bytes will be copied to *dst*
  *	Return
@@ -4747,12 +4747,12 @@ union bpf_attr {
  *
  *		The argument *len_diff* can be used for querying with a planned
  *		size change. This allows to check MTU prior to changing packet
- *		ctx. Providing an *len_diff* adjustment that is larger than the
+ *		ctx. Providing a *len_diff* adjustment that is larger than the
  *		actual packet size (resulting in negative packet size) will in
- *		principle not exceed the MTU, why it is not considered a
- *		failure.  Other BPF-helpers are needed for performing the
- *		planned size change, why the responsability for catch a negative
- *		packet size belong in those helpers.
+ *		principle not exceed the MTU, which is why it is not considered
+ *		a failure.  Other BPF helpers are needed for performing the
+ *		planned size change; therefore the responsibility for catching
+ *		a negative packet size belongs in those helpers.
  *
  *		Specifying *ifindex* zero means the MTU check is performed
  *		against the current net device.  This is practical if this isn't
-- 
cgit v1.2.3


From abc340b38ba25cd6c7aa2c0bd9150d30738c82d0 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Fri, 26 Aug 2022 14:46:59 +0300
Subject: xfrm: interface: support collect metadata mode

This commit adds support for 'collect_md' mode on xfrm interfaces.

Each net can have one collect_md device, created by providing the
IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be
altered and has no if_id or link device attributes.

On transmit to this device, the if_id is fetched from the attached dst
metadata on the skb. If exists, the link property is also fetched from
the metadata. The dst metadata type used is METADATA_XFRM which holds
these properties.

On the receive side, xfrmi_rcv_cb() populates a dst metadata for each
packet received and attaches it to the skb. The if_id used in this case is
fetched from the xfrm state, and the link is fetched from the incoming
device. This information can later be used by upper layers such as tc,
ebpf, and ip rules.

Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst
metadata is postponed until after scrubing. Similarly, xfrm_input() is
adapted to avoid dropping metadata dsts by only dropping 'valid'
(skb_valid_dst(skb) == true) dsts.

Policy matching on packets arriving from collect_md xfrmi devices is
done by using the xfrm state existing in the skb's sec_path.
The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session()
is changed to keep the details of the if_id extraction tucked away
in xfrm_interface.c.

Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e36d9d2c65a7..d96f13a42589 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -694,6 +694,7 @@ enum {
 	IFLA_XFRM_UNSPEC,
 	IFLA_XFRM_LINK,
 	IFLA_XFRM_IF_ID,
+	IFLA_XFRM_COLLECT_METADATA,
 	__IFLA_XFRM_MAX
 };
 
-- 
cgit v1.2.3


From 2c2493b9da9166478fe072e3054f8a5741dadb02 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Fri, 26 Aug 2022 14:47:00 +0300
Subject: xfrm: lwtunnel: add lwtunnel support for xfrm interfaces in
 collect_md mode

Allow specifying the xfrm interface if_id and link as part of a route
metadata using the lwtunnel infrastructure.

This allows for example using a single xfrm interface in collect_md
mode as the target of multiple routes each specifying a different if_id.

With the appropriate changes to iproute2, considering an xfrm device
ipsec1 in collect_md mode one can for example add a route specifying
an if_id like so:

ip route add <SUBNET> dev ipsec1 encap xfrm if_id 1

In which case traffic routed to the device via this route would use
if_id in the xfrm interface policy lookup.

Or in the context of vrf, one can also specify the "link" property:

ip route add <SUBNET> dev ipsec1 encap xfrm if_id 1 link_dev eth15

Note: LWT_XFRM_LINK uses NLA_U32 similar to IFLA_XFRM_LINK even though
internally "link" is signed. This is consistent with other _LINK
attributes in other devices as well as in bpf and should not have an
effect as device indexes can't be negative.

Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/lwtunnel.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 2e206919125c..229655ef792f 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -15,6 +15,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_SEG6_LOCAL,
 	LWTUNNEL_ENCAP_RPL,
 	LWTUNNEL_ENCAP_IOAM6,
+	LWTUNNEL_ENCAP_XFRM,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
@@ -111,4 +112,13 @@ enum {
 
 #define LWT_BPF_MAX_HEADROOM 256
 
+enum {
+	LWT_XFRM_UNSPEC,
+	LWT_XFRM_IF_ID,
+	LWT_XFRM_LINK,
+	__LWT_XFRM_MAX,
+};
+
+#define LWT_XFRM_MAX (__LWT_XFRM_MAX - 1)
+
 #endif /* _UAPI_LWTUNNEL_H_ */
-- 
cgit v1.2.3


From 690252f19f0e486abb8590b3a7a03d4e065d93d4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 25 Aug 2022 20:09:31 -0700
Subject: netlink: add support for ext_ack missing attributes

There is currently no way to report via extack in a structured way
that an attribute is missing. This leads to families resorting to
string messages.

Add a pair of attributes - @offset and @type for machine-readable
way of reporting missing attributes. The @offset points to the
nest which should have contained the attribute, @type is the
expected nla_type. The offset will be skipped if the attribute
is missing at the message level rather than inside a nest.

User space should be able to figure out which attribute enum
(AKA attribute space AKA attribute set) the nest pointed to by
@offset is using.

Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/netlink.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index e0ab261ceca2..e0689dbd2cde 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -140,6 +140,10 @@ struct nlmsgerr {
  *	be used - in the success case - to identify a created
  *	object or operation or similar (binary)
  * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute
+ * @NLMSGERR_ATTR_MISS_TYPE: type of a missing required attribute,
+ *	%NLMSGERR_ATTR_MISS_NEST will not be present if the attribute was
+ *	missing at the message level
+ * @NLMSGERR_ATTR_MISS_NEST: offset of the nest where attribute was missing
  * @__NLMSGERR_ATTR_MAX: number of attributes
  * @NLMSGERR_ATTR_MAX: highest attribute number
  */
@@ -149,6 +153,8 @@ enum nlmsgerr_attrs {
 	NLMSGERR_ATTR_OFFS,
 	NLMSGERR_ATTR_COOKIE,
 	NLMSGERR_ATTR_POLICY,
+	NLMSGERR_ATTR_MISS_TYPE,
+	NLMSGERR_ATTR_MISS_NEST,
 
 	__NLMSGERR_ATTR_MAX,
 	NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
-- 
cgit v1.2.3


From 44c51472bef83bb70b43e2f4b7a592096f32a855 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik@metanetworks.com>
Date: Wed, 31 Aug 2022 17:40:09 +0300
Subject: bpf: Support getting tunnel flags

Existing 'bpf_skb_get_tunnel_key' extracts various tunnel parameters
(id, ttl, tos, local and remote) but does not expose ip_tunnel_info's
tun_flags to the BPF program.

It makes sense to expose tun_flags to the BPF program.

Assume for example multiple GRE tunnels maintained on a single GRE
interface in collect_md mode. The program expects origins to initiate
over GRE, however different origins use different GRE characteristics
(e.g. some prefer to use GRE checksum, some do not; some pass a GRE key,
some do not, etc..).

A BPF program getting tun_flags can therefore remember the relevant
flags (e.g. TUNNEL_CSUM, TUNNEL_SEQ...) for each initiating remote. In
the reply path, the program can use 'bpf_skb_set_tunnel_key' in order
to correctly reply to the remote, using similar characteristics, based
on the stored tunnel flags.

Introduce BPF_F_TUNINFO_FLAGS flag for bpf_skb_get_tunnel_key. If
specified, 'bpf_tunnel_key->tunnel_flags' is set with the tun_flags.

Decided to use the existing unused 'tunnel_ext' as the storage for the
'tunnel_flags' in order to avoid changing bpf_tunnel_key's layout.

Also, the following has been considered during the design:

  1. Convert the "interesting" internal TUNNEL_xxx flags back to BPF_F_yyy
     and place into the new 'tunnel_flags' field. This has 2 drawbacks:

     - The BPF_F_yyy flags are from *set_tunnel_key* enumeration space,
       e.g. BPF_F_ZERO_CSUM_TX. It is awkward that it is "returned" into
       tunnel_flags from a *get_tunnel_key* call.
     - Not all "interesting" TUNNEL_xxx flags can be mapped to existing
       BPF_F_yyy flags, and it doesn't make sense to create new BPF_F_yyy
       flags just for purposes of the returned tunnel_flags.

  2. Place key.tun_flags into 'tunnel_flags' but mask them, keeping only
     "interesting" flags. That's ok, but the drawback is that what's
     "interesting" for my usecase might be limiting for other usecases.

Therefore I decided to expose what's in key.tun_flags *as is*, which seems
most flexible. The BPF user can just choose to ignore bits he's not
interested in. The TUNNEL_xxx are also UAPI, so no harm exposing them
back in the get_tunnel_key call.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220831144010.174110-1-shmulik.ladkani@gmail.com
---
 include/uapi/linux/bpf.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 962960a98835..837c0f9b7fdd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5659,6 +5659,11 @@ enum {
 	BPF_F_SEQ_NUMBER		= (1ULL << 3),
 };
 
+/* BPF_FUNC_skb_get_tunnel_key flags. */
+enum {
+	BPF_F_TUNINFO_FLAGS		= (1ULL << 4),
+};
+
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
@@ -5848,7 +5853,10 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;	/* Padding, future use. */
+	union {
+		__u16 tunnel_ext;	/* compat */
+		__be16 tunnel_flags;
+	};
 	__u32 tunnel_label;
 	union {
 		__u32 local_ipv4;
-- 
cgit v1.2.3


From 5854a09b49574da5a77a0f36ad7b021a2661321d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 31 Aug 2022 14:12:42 -0500
Subject: net/ipv4: Use __DECLARE_FLEX_ARRAY() helper

We now have a cleaner way to keep compatibility with user-space
(a.k.a. not breaking it) when we need to keep in place a one-element
array (for its use in user-space) together with a flexible-array
member (for its use in kernel-space) without making it hard to read
at the source level. This is through the use of the new
__DECLARE_FLEX_ARRAY() helper macro.

The size and memory layout of the structure is preserved after the
changes. See below.

Before changes:

$ pahole -C ip_msfilter net/ipv4/igmp.o
struct ip_msfilter {
	union {
		struct {
			__be32     imsf_multiaddr_aux;   /*     0     4 */
			__be32     imsf_interface_aux;   /*     4     4 */
			__u32      imsf_fmode_aux;       /*     8     4 */
			__u32      imsf_numsrc_aux;      /*    12     4 */
			__be32     imsf_slist[1];        /*    16     4 */
		};                                       /*     0    20 */
		struct {
			__be32     imsf_multiaddr;       /*     0     4 */
			__be32     imsf_interface;       /*     4     4 */
			__u32      imsf_fmode;           /*     8     4 */
			__u32      imsf_numsrc;          /*    12     4 */
			__be32     imsf_slist_flex[0];   /*    16     0 */
		};                                       /*     0    16 */
	};                                               /*     0    20 */

	/* size: 20, cachelines: 1, members: 1 */
	/* last cacheline: 20 bytes */
};

After changes:

$ pahole -C ip_msfilter net/ipv4/igmp.o
struct ip_msfilter {
	__be32                     imsf_multiaddr;       /*     0     4 */
	__be32                     imsf_interface;       /*     4     4 */
	__u32                      imsf_fmode;           /*     8     4 */
	__u32                      imsf_numsrc;          /*    12     4 */
	union {
		__be32             imsf_slist[1];        /*    16     4 */
		struct {
			struct {
			} __empty_imsf_slist_flex;       /*    16     0 */
			__be32     imsf_slist_flex[0];   /*    16     0 */
		};                                       /*    16     0 */
	};                                               /*    16     4 */

	/* size: 20, cachelines: 1, members: 5 */
	/* last cacheline: 20 bytes */
};

In the past, we had to duplicate the whole original structure within
a union, and update the names of all the members. Now, we just need to
declare the flexible-array member to be used in kernel-space through
the __DECLARE_FLEX_ARRAY() helper together with the one-element array,
within a union. This makes the source code more clean and easier to read.

Link: https://github.com/KSPP/linux/issues/193
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 14168225cecd..578daa6f816b 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -188,21 +188,13 @@ struct ip_mreq_source {
 };
 
 struct ip_msfilter {
+	__be32		imsf_multiaddr;
+	__be32		imsf_interface;
+	__u32		imsf_fmode;
+	__u32		imsf_numsrc;
 	union {
-		struct {
-			__be32		imsf_multiaddr_aux;
-			__be32		imsf_interface_aux;
-			__u32		imsf_fmode_aux;
-			__u32		imsf_numsrc_aux;
-			__be32		imsf_slist[1];
-		};
-		struct {
-			__be32		imsf_multiaddr;
-			__be32		imsf_interface;
-			__u32		imsf_fmode;
-			__u32		imsf_numsrc;
-			__be32		imsf_slist_flex[];
-		};
+		__be32		imsf_slist[1];
+		__DECLARE_FLEX_ARRAY(__be32, imsf_slist_flex);
 	};
 };
 
-- 
cgit v1.2.3


From a36c421690b3e5dee38fc12abfcabda742f00064 Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Fri, 26 Aug 2022 10:00:31 -0700
Subject: wifi: nl80211: Add POWERED_ADDR_CHANGE feature

Add a new extended feature bit signifying that the wireless hardware
supports changing the MAC address while the underlying net_device is
powered. Note that this has a different meaning from
IFF_LIVE_ADDR_CHANGE as additional restrictions might be imposed by
the hardware, such as:

 - No connection is active on this interface, carrier is off
 - No scan is in progress
 - No offchannel operations are in progress

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 573db20403dc..a00a23840c57 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6281,6 +6281,14 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC
  *	detection.
  *
+ * @NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE: Device can perform a MAC address
+ *	change without having to bring the underlying network device down
+ *	first. For example, in station mode this can be used to vary the
+ *	origin MAC address prior to a connection to a new AP for privacy
+ *	or other reasons. Note that certain driver specific restrictions
+ *	might apply, e.g. no scans in progress, no offchannel operations
+ *	in progress, and no active connections.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -6348,6 +6356,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_BSS_COLOR,
 	NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD,
 	NL80211_EXT_FEATURE_RADAR_BACKGROUND,
+	NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From 6522047c65764c9aaec8009e73daa8c0b138c701 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 2 Sep 2022 16:12:50 +0200
Subject: wifi: nl80211: add MLD address to assoc BSS entries

Add an MLD address attribute to BSS entries that the interface
is currently associated with to help userspace figure out what's
going on.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index a00a23840c57..c32e7616a366 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4959,6 +4959,7 @@ enum nl80211_bss_scan_width {
  *	using the nesting index as the antenna number.
  * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz
  * @NL80211_BSS_MLO_LINK_ID: MLO link ID of the BSS (u8).
+ * @NL80211_BSS_MLD_ADDR: MLD address of this BSS if connected to it.
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -4985,6 +4986,7 @@ enum nl80211_bss {
 	NL80211_BSS_CHAIN_SIGNAL,
 	NL80211_BSS_FREQUENCY_OFFSET,
 	NL80211_BSS_MLO_LINK_ID,
+	NL80211_BSS_MLD_ADDR,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
-- 
cgit v1.2.3


From 27ed9353aec9de4277b3389c9f2b04beb6ab7622 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 31 Aug 2022 08:26:57 -0700
Subject: bpf: Update descriptions for helpers bpf_get_func_arg[_cnt]()

Now instead of the number of arguments, the number of registers
holding argument values are stored in trampoline. Update
the description of bpf_get_func_arg[_cnt]() helpers. Previous
programs without struct arguments should continue to work
as usual.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220831152657.2078805-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 793103b10eab..3df78c56c1bf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5079,12 +5079,12 @@ union bpf_attr {
  *
  * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
  *	Description
- *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ *		Get **n**-th argument register (zero based) of the traced function (for tracing programs)
  *		returned in **value**.
  *
  *	Return
  *		0 on success.
- *		**-EINVAL** if n >= arguments count of traced function.
+ *		**-EINVAL** if n >= argument register count of traced function.
  *
  * long bpf_get_func_ret(void *ctx, u64 *value)
  *	Description
@@ -5097,10 +5097,11 @@ union bpf_attr {
  *
  * long bpf_get_func_arg_cnt(void *ctx)
  *	Description
- *		Get number of arguments of the traced function (for tracing programs).
+ *		Get number of registers of the traced function (for tracing programs) where
+ *		function arguments are stored in these registers.
  *
  *	Return
- *		The number of arguments of the traced function.
+ *		The number of argument registers of the traced function.
  *
  * int bpf_get_retval(void)
  *	Description
-- 
cgit v1.2.3


From a0a4de4d897f5ce672e086cb6b9f91a306af6953 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 22 Aug 2022 16:41:21 +0200
Subject: netfilter: remove NFPROTO_DECNET

Decnet has been removed. so no need to reserve space in arrays for it.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/uapi/linux/netfilter.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index 53411ccc69db..5a79ccb76701 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -63,7 +63,9 @@ enum {
 	NFPROTO_NETDEV =  5,
 	NFPROTO_BRIDGE =  7,
 	NFPROTO_IPV6   = 10,
+#ifndef __KERNEL__ /* no longer supported by kernel */
 	NFPROTO_DECNET = 12,
+#endif
 	NFPROTO_NUMPROTO,
 };
 
-- 
cgit v1.2.3


From 061834624c87282c6d9d8c5395aaff4380e5e1fc Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Sep 2022 19:07:21 +0200
Subject: can: set CANFD_FDF flag in all CAN FD frame structures

To simplify the testing in user space all struct canfd_frame's provided by
the CAN subsystem of the Linux kernel now have the CANFD_FDF flag set in
canfd_frame::flags.

NB: Handcrafted ETH_P_CANFD frames introduced via PF_PACKET socket might
not set this bit correctly. During the check for sufficient headroom in
PF_PACKET sk_buffs the uninitialized CAN sk_buff data structures are filled.
In the case of a CAN FD frame the CANFD_FDF flag is set accordingly.

As the CAN frame content is already zero initialized in alloc_canfd_skb()
the obsolete initialization of cf->flags in the CTU CAN FD driver has been
removed as it would overwrite the already set CANFD_FDF flag.

Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/all/20220912170725.120748-4-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 90801ada2bbe..7b23eeeb3273 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -141,8 +141,8 @@ struct can_frame {
  * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets
  * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of
  * using struct canfd_frame for mixed CAN / CAN FD content (dual use).
- * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of
- * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux.
+ * Since the introduction of CAN XL the CANFD_FDF flag is set in all CAN FD
+ * frame structures provided by the CAN subsystem of the Linux kernel.
  */
 #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */
 #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */
-- 
cgit v1.2.3


From 1a3e3034c049503ec6992a4a7d573e7fff31fac4 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Sep 2022 19:07:22 +0200
Subject: can: canxl: introduce CAN XL data structure

This patch adds defines for data structures and length information for
CAN XL (CAN with eXtended data Length) which can transfer up to 2048
byte inside a single frame.

Notable changes from CAN FD:

- the 11 bit arbitration field is now named 'priority' instead of 'can_id'
  (there are no 29 bit identifiers nor RTR frames anymore)
- the data length needs a uint16 value to cover up to 2048 byte
  (the length element position is different to struct can[fd]_frame)
- new fields (SDT, AF) and a SEC bit have been introduced
- the virtual CAN interface identifier is not part if the CAN XL frame
  struct as this VCID value is stored in struct skbuff (analog to vlan id)

Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/all/20220912170725.120748-5-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 7b23eeeb3273..dd645ea72306 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -48,6 +48,7 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/stddef.h> /* for offsetof */
 
 /* controller area network (CAN) kernel definitions */
 
@@ -60,6 +61,7 @@
 #define CAN_SFF_MASK 0x000007FFU /* standard frame format (SFF) */
 #define CAN_EFF_MASK 0x1FFFFFFFU /* extended frame format (EFF) */
 #define CAN_ERR_MASK 0x1FFFFFFFU /* omit EFF, RTR, ERR flags */
+#define CANXL_PRIO_MASK CAN_SFF_MASK /* 11 bit priority mask */
 
 /*
  * Controller Area Network Identifier structure
@@ -73,6 +75,7 @@ typedef __u32 canid_t;
 
 #define CAN_SFF_ID_BITS		11
 #define CAN_EFF_ID_BITS		29
+#define CANXL_PRIO_BITS		CAN_SFF_ID_BITS
 
 /*
  * Controller Area Network Error Message Frame Mask structure
@@ -91,6 +94,16 @@ typedef __u32 can_err_mask_t;
 #define CANFD_MAX_DLC 15
 #define CANFD_MAX_DLEN 64
 
+/*
+ * CAN XL payload length and DLC definitions according to ISO 11898-1
+ * CAN XL DLC ranges from 0 .. 2047 => data length from 1 .. 2048 byte
+ */
+#define CANXL_MIN_DLC 0
+#define CANXL_MAX_DLC 2047
+#define CANXL_MAX_DLC_MASK 0x07FF
+#define CANXL_MIN_DLEN 1
+#define CANXL_MAX_DLEN 2048
+
 /**
  * struct can_frame - Classical CAN frame structure (aka CAN 2.0B)
  * @can_id:   CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
@@ -166,8 +179,46 @@ struct canfd_frame {
 	__u8    data[CANFD_MAX_DLEN] __attribute__((aligned(8)));
 };
 
+/*
+ * defined bits for canxl_frame.flags
+ *
+ * The canxl_frame.flags element contains two bits CANXL_XLF and CANXL_SEC
+ * and shares the relative position of the struct can[fd]_frame.len element.
+ * The CANXL_XLF bit ALWAYS needs to be set to indicate a valid CAN XL frame.
+ * As a side effect setting this bit intentionally breaks the length checks
+ * for Classical CAN and CAN FD frames.
+ *
+ * Undefined bits in canxl_frame.flags are reserved and shall be set to zero.
+ */
+#define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */
+#define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */
+
+/**
+ * struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure
+ * @prio:  11 bit arbitration priority with zero'ed CAN_*_FLAG flags
+ * @flags: additional flags for CAN XL
+ * @sdt:   SDU (service data unit) type
+ * @len:   frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN)
+ * @af:    acceptance field
+ * @data:  CAN XL frame payload (CANXL_MIN_DLEN .. CANXL_MAX_DLEN byte)
+ *
+ * @prio shares the same position as @can_id from struct can[fd]_frame.
+ */
+struct canxl_frame {
+	canid_t prio;  /* 11 bit priority for arbitration (canid_t) */
+	__u8    flags; /* additional flags for CAN XL */
+	__u8    sdt;   /* SDU (service data unit) type */
+	__u16   len;   /* frame payload length in byte */
+	__u32   af;    /* acceptance field */
+	__u8    data[CANXL_MAX_DLEN];
+};
+
 #define CAN_MTU		(sizeof(struct can_frame))
 #define CANFD_MTU	(sizeof(struct canfd_frame))
+#define CANXL_MTU	(sizeof(struct canxl_frame))
+#define CANXL_HDR_SIZE	(offsetof(struct canxl_frame, data))
+#define CANXL_MIN_MTU	(CANXL_HDR_SIZE + 64)
+#define CANXL_MAX_MTU	CANXL_MTU
 
 /* particular protocols of the protocol family PF_CAN */
 #define CAN_RAW		1 /* RAW sockets */
-- 
cgit v1.2.3


From fb08cba12b52cba4366e858932307649dc5304e2 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Sep 2022 19:07:23 +0200
Subject: can: canxl: update CAN infrastructure for CAN XL frames

- add new ETH_P_CANXL ethernet protocol type
- update skb checks for CAN XL
- add alloc_canxl_skb() which now needs a data length parameter
- introduce init_can_skb_reserve() to reduce code duplication

Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/all/20220912170725.120748-6-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index d370165bc621..69e0457eb200 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -138,6 +138,7 @@
 #define ETH_P_LOCALTALK 0x0009		/* Localtalk pseudo type 	*/
 #define ETH_P_CAN	0x000C		/* CAN: Controller Area Network */
 #define ETH_P_CANFD	0x000D		/* CANFD: CAN flexible data rate*/
+#define ETH_P_CANXL	0x000E		/* CANXL: eXtended frame Length */
 #define ETH_P_PPPTALK	0x0010		/* Dummy type for Atalk over PPP*/
 #define ETH_P_TR_802_2	0x0011		/* 802.2 frames 		*/
 #define ETH_P_MOBITEX	0x0015		/* Mobitex (kaz@cafe.net)	*/
-- 
cgit v1.2.3


From 626332696d7506e8f844a564277bdba2dc78fcb5 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Sep 2022 19:07:25 +0200
Subject: can: raw: add CAN XL support

Enable CAN_RAW sockets to read and write CAN XL frames analogue to the
CAN FD extension (new CAN_RAW_XL_FRAMES sockopt).

A CAN XL network interface is capable to handle Classical CAN, CAN FD and
CAN XL frames. When CAN_RAW_XL_FRAMES is enabled, the CAN_RAW socket checks
whether the addressed CAN network interface is capable to handle the
provided CAN frame.

In opposite to the fixed number of bytes for
- CAN frames (CAN_MTU = sizeof(struct can_frame))
- CAN FD frames (CANFD_MTU = sizeof(struct can_frame))
the number of bytes when reading/writing CAN XL frames depends on the
number of data bytes. For efficiency reasons the length of the struct
canxl_frame is truncated to the needed size for read/write operations.
This leads to a calculated size of CANXL_HDR_SIZE + canxl_frame::len which
is enforced on write() operations and guaranteed on read() operations.

NB: Valid length values are 1 .. 2048 (CANXL_MIN_DLEN .. CANXL_MAX_DLEN).

Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/all/20220912170725.120748-8-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/raw.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index 3386aa81fdf2..ff12f525c37c 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -62,6 +62,7 @@ enum {
 	CAN_RAW_RECV_OWN_MSGS,	/* receive my own msgs (default:off) */
 	CAN_RAW_FD_FRAMES,	/* allow CAN FD frames (default:off) */
 	CAN_RAW_JOIN_FILTERS,	/* all filters must match to trigger */
+	CAN_RAW_XL_FRAMES,	/* allow CAN XL frames (default:off) */
 };
 
 #endif /* !_UAPI_CAN_RAW_H */
-- 
cgit v1.2.3


From 7e6e1b57162ed6a2d32d2f0929c27d79482ff706 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 6 Sep 2022 11:55:58 +0200
Subject: rtnetlink: advertise allmulti counter

Like what was done with IFLA_PROMISCUITY, add IFLA_ALLMULTI to advertise
the allmulti counter.
The flag IFF_ALLMULTI is advertised only if it was directly set by a
userland app.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e36d9d2c65a7..0bfa9a99ebb6 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -370,6 +370,7 @@ enum {
 	IFLA_GRO_MAX_SIZE,
 	IFLA_TSO_MAX_SIZE,
 	IFLA_TSO_MAX_SEGS,
+	IFLA_ALLMULTI,		/* Allmulti count: > 0 means acts ALLMULTI */
 
 	__IFLA_MAX
 };
-- 
cgit v1.2.3


From 65b32f801bfbc54dc98144a6ec26082b59d131ee Mon Sep 17 00:00:00 2001
From: Wojciech Drewek <wojciech.drewek@intel.com>
Date: Thu, 8 Sep 2022 10:16:40 -0700
Subject: uapi: move IPPROTO_L2TP to in.h

IPPROTO_L2TP is currently defined in l2tp.h, but most of
ip protocols are defined in in.h file. Move it there in order
to keep code clean.

Acked-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/in.h   | 2 ++
 include/uapi/linux/l2tp.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 578daa6f816b..f243ce665f74 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -68,6 +68,8 @@ enum {
 #define IPPROTO_PIM		IPPROTO_PIM
   IPPROTO_COMP = 108,		/* Compression Header Protocol		*/
 #define IPPROTO_COMP		IPPROTO_COMP
+  IPPROTO_L2TP = 115,		/* Layer 2 Tunnelling Protocol		*/
+#define IPPROTO_L2TP		IPPROTO_L2TP
   IPPROTO_SCTP = 132,		/* Stream Control Transport Protocol	*/
 #define IPPROTO_SCTP		IPPROTO_SCTP
   IPPROTO_UDPLITE = 136,	/* UDP-Lite (RFC 3828)			*/
diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index bab8c9708611..7d81c3e1ec29 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -13,8 +13,6 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 
-#define IPPROTO_L2TP		115
-
 /**
  * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets
  * @l2tp_family:  address family number AF_L2TPIP.
-- 
cgit v1.2.3


From 8b189ea08c334f25dbb3d076f8adb8b80491d01d Mon Sep 17 00:00:00 2001
From: Wojciech Drewek <wojciech.drewek@intel.com>
Date: Thu, 8 Sep 2022 10:16:42 -0700
Subject: net/sched: flower: Add L2TPv3 filter

Add support for matching on L2TPv3 session ID.
Session ID can be specified only when ip proto was
set to IPPROTO_L2TP.

Example filter:
  # tc filter add dev $PF1 ingress prio 1 protocol ip \
      flower \
        ip_proto l2tp \
        l2tpv3_sid 1234 \
        skip_sw \
      action mirred egress redirect dev $VF1_PR

Acked-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/pkt_cls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 877309d6ca3c..648a82f32666 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -592,6 +592,8 @@ enum {
 	TCA_FLOWER_KEY_PPPOE_SID,	/* be16 */
 	TCA_FLOWER_KEY_PPP_PROTO,	/* be16 */
 
+	TCA_FLOWER_KEY_L2TPV3_SID,	/* be32 */
+
 	__TCA_FLOWER_MAX,
 };
 
-- 
cgit v1.2.3


From 95f510d0b792f308d3d748242fe960c35bdc2c62 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 11 Sep 2022 04:06:59 +0300
Subject: net: dsa: allow the DSA master to be seen and changed through
 rtnetlink

Some DSA switches have multiple CPU ports, which can be used to improve
CPU termination throughput, but DSA, through dsa_tree_setup_cpu_ports(),
sets up only the first one, leading to suboptimal use of hardware.

The desire is to not change the default configuration but to permit the
user to create a dynamic mapping between individual user ports and the
CPU port that they are served by, configurable through rtnetlink. It is
also intended to permit load balancing between CPU ports, and in that
case, the foreseen model is for the DSA master to be a bonding interface
whose lowers are the physical DSA masters.

To that end, we create a struct rtnl_link_ops for DSA user ports with
the "dsa" kind. We expose the IFLA_DSA_MASTER link attribute that
contains the ifindex of the newly desired DSA master.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/if_link.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 0bfa9a99ebb6..3d39fb398d65 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1375,4 +1375,14 @@ enum {
 
 #define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
 
+/* DSA section */
+
+enum {
+	IFLA_DSA_UNSPEC,
+	IFLA_DSA_MASTER,
+	__IFLA_DSA_MAX,
+};
+
+#define IFLA_DSA_MAX	(__IFLA_DSA_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From 848f3c0d47694924536e2894cb349613201321c6 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Mon, 12 Sep 2022 19:16:18 +0200
Subject: seg6: add NEXT-C-SID support for SRv6 End behavior

The NEXT-C-SID mechanism described in [1] offers the possibility of
encoding several SRv6 segments within a single 128 bit SID address. Such
a SID address is called a Compressed SID (C-SID) container. In this way,
the length of the SID List can be drastically reduced.

A SID instantiated with the NEXT-C-SID flavor considers an IPv6 address
logically structured in three main blocks: i) Locator-Block; ii)
Locator-Node Function; iii) Argument.

                        C-SID container
+------------------------------------------------------------------+
|     Locator-Block      |Loc-Node|            Argument            |
|                        |Function|                                |
+------------------------------------------------------------------+
<--------- B -----------> <- NF -> <------------- A --------------->

   (i) The Locator-Block can be any IPv6 prefix available to the provider;

  (ii) The Locator-Node Function represents the node and the function to
       be triggered when a packet is received on the node;

 (iii) The Argument carries the remaining C-SIDs in the current C-SID
       container.

The NEXT-C-SID mechanism relies on the "flavors" framework defined in
[2]. The flavors represent additional operations that can modify or
extend a subset of the existing behaviors.

This patch introduces the support for flavors in SRv6 End behavior
implementing the NEXT-C-SID one. An SRv6 End behavior with NEXT-C-SID
flavor works as an End behavior but it is capable of processing the
compressed SID List encoded in C-SID containers.

An SRv6 End behavior with NEXT-C-SID flavor can be configured to support
user-provided Locator-Block and Locator-Node Function lengths. In this
implementation, such lengths must be evenly divisible by 8 (i.e. must be
byte-aligned), otherwise the kernel informs the user about invalid
values with a meaningful error code and message through netlink_ext_ack.

If Locator-Block and/or Locator-Node Function lengths are not provided
by the user during configuration of an SRv6 End behavior instance with
NEXT-C-SID flavor, the kernel will choose their default values i.e.,
32-bit Locator-Block and 16-bit Locator-Node Function.

[1] - https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression
[2] - https://datatracker.ietf.org/doc/html/rfc8986

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/seg6_local.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index 332b18f318f8..4fdc424c9cb3 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -28,6 +28,7 @@ enum {
 	SEG6_LOCAL_BPF,
 	SEG6_LOCAL_VRFTABLE,
 	SEG6_LOCAL_COUNTERS,
+	SEG6_LOCAL_FLAVORS,
 	__SEG6_LOCAL_MAX,
 };
 #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@@ -110,4 +111,27 @@ enum {
 
 #define SEG6_LOCAL_CNT_MAX (__SEG6_LOCAL_CNT_MAX - 1)
 
+/* SRv6 End* Flavor attributes */
+enum {
+	SEG6_LOCAL_FLV_UNSPEC,
+	SEG6_LOCAL_FLV_OPERATION,
+	SEG6_LOCAL_FLV_LCBLOCK_BITS,
+	SEG6_LOCAL_FLV_LCNODE_FN_BITS,
+	__SEG6_LOCAL_FLV_MAX,
+};
+
+#define SEG6_LOCAL_FLV_MAX (__SEG6_LOCAL_FLV_MAX - 1)
+
+/* Designed flavor operations for SRv6 End* Behavior */
+enum {
+	SEG6_LOCAL_FLV_OP_UNSPEC,
+	SEG6_LOCAL_FLV_OP_PSP,
+	SEG6_LOCAL_FLV_OP_USP,
+	SEG6_LOCAL_FLV_OP_USD,
+	SEG6_LOCAL_FLV_OP_NEXT_CSID,
+	__SEG6_LOCAL_FLV_OP_MAX
+};
+
+#define SEG6_LOCAL_FLV_OP_MAX (__SEG6_LOCAL_FLV_OP_MAX - 1)
+
 #endif
-- 
cgit v1.2.3


From 7b5541a932c21f7e07f068a785afcc25986e4893 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 11 Sep 2022 14:03:09 +0200
Subject: headers: Remove some left-over license text in
 include/uapi/linux/netfilter/

When the SPDX-License-Identifier tag has been added, the corresponding
license text has not been removed.

Remove it now.

Also, in xt_connmark.h, move the copyright text at the top of the file
which is a much more common pattern.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/uapi/linux/netfilter/ipset/ip_set.h |  4 ----
 include/uapi/linux/netfilter/xt_AUDIT.h     |  4 ----
 include/uapi/linux/netfilter/xt_connmark.h  | 13 ++++---------
 include/uapi/linux/netfilter/xt_osf.h       | 14 --------------
 4 files changed, 4 insertions(+), 31 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 6397d75899bc..79e5d68b87af 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -3,10 +3,6 @@
  *                         Patrick Schaaf <bof@bof.de>
  *                         Martin Josefsson <gandalf@wlug.westbo.se>
  * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _UAPI_IP_SET_H
 #define _UAPI_IP_SET_H
diff --git a/include/uapi/linux/netfilter/xt_AUDIT.h b/include/uapi/linux/netfilter/xt_AUDIT.h
index 1b314e2f84ac..56a3f6092e0c 100644
--- a/include/uapi/linux/netfilter/xt_AUDIT.h
+++ b/include/uapi/linux/netfilter/xt_AUDIT.h
@@ -4,10 +4,6 @@
  *
  * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
  * (C) 2010-2011 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _XT_AUDIT_TARGET_H
diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h
index f01c19b83a2b..41b578ccd03b 100644
--- a/include/uapi/linux/netfilter/xt_connmark.h
+++ b/include/uapi/linux/netfilter/xt_connmark.h
@@ -1,18 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ */
+
 #ifndef _XT_CONNMARK_H
 #define _XT_CONNMARK_H
 
 #include <linux/types.h>
 
-/* Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
 enum {
 	XT_CONNMARK_SET = 0,
 	XT_CONNMARK_SAVE,
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index 6e466236ca4b..f1f097896bdf 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -1,20 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2003+ Evgeniy Polyakov <johnpol@2ka.mxt.ru>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _XT_OSF_H
-- 
cgit v1.2.3


From 0e426a3ae030a9e891899370229e117158b35de6 Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Wed, 21 Sep 2022 10:46:02 +0000
Subject: bpf, cgroup: Reject prog_attach_flags array when effective query

Attach flags is only valid for attached progs of this layer cgroup,
but not for effective progs. For querying with EFFECTIVE flags,
exporting attach flags does not make sense. So when effective query,
we reject prog_attach_flags array and don't need to populate it.
Also we limit attach_flags to output 0 during effective query.

Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP")
Signed-off-by: Pu Lehui <pulehui@huawei.com>
Link: https://lore.kernel.org/r/20220921104604.2340580-2-pulehui@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/uapi/linux/bpf.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59a217ca2dfd..4eff7fc7ae58 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1233,7 +1233,7 @@ enum {
 
 /* Query effective (directly attached + inherited from ancestor cgroups)
  * programs that will be executed for events within a cgroup.
- * attach_flags with this flag are returned only for directly attached programs.
+ * attach_flags with this flag are always returned 0.
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
@@ -1432,7 +1432,10 @@ union bpf_attr {
 		__u32		attach_flags;
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
-		__aligned_u64	prog_attach_flags; /* output: per-program attach_flags */
+		/* output: per-program attach_flags.
+		 * not allowed to be set during effective query.
+		 */
+		__aligned_u64	prog_attach_flags;
 	} query;
 
 	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
-- 
cgit v1.2.3


From 583c1f420173f7d84413a1a1fbf5109d798b4faa Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 19 Sep 2022 19:00:57 -0500
Subject: bpf: Define new BPF_MAP_TYPE_USER_RINGBUF map type

We want to support a ringbuf map type where samples are published from
user-space, to be consumed by BPF programs. BPF currently supports a
kernel -> user-space circular ring buffer via the BPF_MAP_TYPE_RINGBUF
map type.  We'll need to define a new map type for user-space -> kernel,
as none of the helpers exported for BPF_MAP_TYPE_RINGBUF will apply
to a user-space producer ring buffer, and we'll want to add one or
more helper functions that would not apply for a kernel-producer
ring buffer.

This patch therefore adds a new BPF_MAP_TYPE_USER_RINGBUF map type
definition. The map type is useless in its current form, as there is no
way to access or use it for anything until we one or more BPF helpers. A
follow-on patch will therefore add a new helper function that allows BPF
programs to run callbacks on samples that are published to the ring
buffer.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-2-void@manifault.com
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3df78c56c1bf..e18c85324db6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -928,6 +928,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
 	BPF_MAP_TYPE_BLOOM_FILTER,
+	BPF_MAP_TYPE_USER_RINGBUF,
 };
 
 /* Note that tracing related programs such as
-- 
cgit v1.2.3


From 20571567384428dfc9fe5cf9f2e942e1df13c2dd Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 19 Sep 2022 19:00:58 -0500
Subject: bpf: Add bpf_user_ringbuf_drain() helper

In a prior change, we added a new BPF_MAP_TYPE_USER_RINGBUF map type which
will allow user-space applications to publish messages to a ring buffer
that is consumed by a BPF program in kernel-space. In order for this
map-type to be useful, it will require a BPF helper function that BPF
programs can invoke to drain samples from the ring buffer, and invoke
callbacks on those samples. This change adds that capability via a new BPF
helper function:

bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx,
                       u64 flags)

BPF programs may invoke this function to run callback_fn() on a series of
samples in the ring buffer. callback_fn() has the following signature:

long callback_fn(struct bpf_dynptr *dynptr, void *context);

Samples are provided to the callback in the form of struct bpf_dynptr *'s,
which the program can read using BPF helper functions for querying
struct bpf_dynptr's.

In order to support bpf_ringbuf_drain(), a new PTR_TO_DYNPTR register
type is added to the verifier to reflect a dynptr that was allocated by
a helper function and passed to a BPF program. Unlike PTR_TO_STACK
dynptrs which are allocated on the stack by a BPF program, PTR_TO_DYNPTR
dynptrs need not use reference tracking, as the BPF helper is trusted to
properly free the dynptr before returning. The verifier currently only
supports PTR_TO_DYNPTR registers that are also DYNPTR_TYPE_LOCAL.

Note that while the corresponding user-space libbpf logic will be added
in a subsequent patch, this patch does contain an implementation of the
.map_poll() callback for BPF_MAP_TYPE_USER_RINGBUF maps. This
.map_poll() callback guarantees that an epoll-waiting user-space
producer will receive at least one event notification whenever at least
one sample is drained in an invocation of bpf_user_ringbuf_drain(),
provided that the function is not invoked with the BPF_RB_NO_WAKEUP
flag. If the BPF_RB_FORCE_WAKEUP flag is provided, a wakeup
notification is sent even if no sample was drained.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-3-void@manifault.com
---
 include/uapi/linux/bpf.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e18c85324db6..ead35f39f185 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5388,6 +5388,43 @@ union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags)
+ *	Description
+ *		Drain samples from the specified user ring buffer, and invoke
+ *		the provided callback for each such sample:
+ *
+ *		long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ *
+ *		If **callback_fn** returns 0, the helper will continue to try
+ *		and drain the next sample, up to a maximum of
+ *		BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
+ *		the helper will skip the rest of the samples and return. Other
+ *		return values are not used now, and will be rejected by the
+ *		verifier.
+ *	Return
+ *		The number of drained samples if no error was encountered while
+ *		draining samples, or 0 if no samples were present in the ring
+ *		buffer. If a user-space producer was epoll-waiting on this map,
+ *		and at least one sample was drained, they will receive an event
+ *		notification notifying them of available space in the ring
+ *		buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
+ *		function, no wakeup notification will be sent. If the
+ *		BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
+ *		be sent even if no sample was drained.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EBUSY** if the ring buffer is contended, and another calling
+ *		context was concurrently draining the ring buffer.
+ *
+ *		**-EINVAL** if user-space is not properly tracking the ring
+ *		buffer due to the producer position not being aligned to 8
+ *		bytes, a sample not being aligned to 8 bytes, or the producer
+ *		position not matching the advertised length of a sample.
+ *
+ *		**-E2BIG** if user-space has tried to publish a sample which is
+ *		larger than the size of the ring buffer, or which cannot fit
+ *		within a struct bpf_dynptr.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5599,6 +5636,7 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(user_ringbuf_drain),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 0c3e10cb44232833a50cb8e3e784c432906a60c1 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@seco.com>
Date: Tue, 20 Sep 2022 18:12:31 -0400
Subject: net: phy: Add support for rate matching

This adds support for rate matching (also known as rate adaptation) to
the phy subsystem. The general idea is that the phy interface runs at
one speed, and the MAC throttles the rate at which it sends packets to
the link speed. There's a good overview of several techniques for
achieving this at [1]. This patch adds support for three: pause-frame
based (such as in Aquantia phys), CRS-based (such as in 10PASS-TS and
2BASE-TL), and open-loop-based (such as in 10GBASE-W).

This patch makes a few assumptions and a few non assumptions about the
types of rate matching available. First, it assumes that different phys
may use different forms of rate matching. Second, it assumes that phys
can use rate matching for any of their supported link speeds (e.g. if a
phy supports 10BASE-T and XGMII, then it can adapt XGMII to 10BASE-T).
Third, it does not assume that all interface modes will use the same
form of rate matching. Fourth, it does not assume that all phy devices
will support rate matching (even if some do). Relaxing or strengthening
these (non-)assumptions could result in a different API. For example, if
all interface modes were assumed to use the same form of rate matching,
then a bitmask of interface modes supportting rate matching would
suffice.

For some better visibility into the process, the current rate matching
mode is exposed as part of the ethtool ksettings. For the moment, only
read access is supported. I'm not sure what userspace might want to
configure yet (disable it altogether, disable just one mode, specify the
mode to use, etc.). For the moment, since only pause-based rate
adaptation support is added in the next few commits, rate matching can
be disabled altogether by adjusting the advertisement.

802.3 calls this feature "rate adaptation" in clause 49 (10GBASE-R) and
"rate matching" in clause 61 (10PASS-TL and 2BASE-TS). Aquantia also calls
this feature "rate adaptation". I chose "rate matching" because it is
shorter, and because Russell doesn't think "adaptation" is correct in this
context.

Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h         | 18 ++++++++++++++++--
 include/uapi/linux/ethtool_netlink.h |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 2d5741fd44bb..fe9893d1485d 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1840,6 +1840,20 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 #define MASTER_SLAVE_STATE_SLAVE		3
 #define MASTER_SLAVE_STATE_ERR			4
 
+/* These are used to throttle the rate of data on the phy interface when the
+ * native speed of the interface is higher than the link speed. These should
+ * not be used for phy interfaces which natively support multiple speeds (e.g.
+ * MII or SGMII).
+ */
+/* No rate matching performed. */
+#define RATE_MATCH_NONE		0
+/* The phy sends pause frames to throttle the MAC. */
+#define RATE_MATCH_PAUSE	1
+/* The phy asserts CRS to prevent the MAC from transmitting. */
+#define RATE_MATCH_CRS		2
+/* The MAC is programmed with a sufficiently-large IPG. */
+#define RATE_MATCH_OPEN_LOOP	3
+
 /* Which connector port. */
 #define PORT_TP			0x00
 #define PORT_AUI		0x01
@@ -2033,8 +2047,8 @@ enum ethtool_reset_flags {
  *	reported consistently by PHYLIB.  Read-only.
  * @master_slave_cfg: Master/slave port mode.
  * @master_slave_state: Master/slave port state.
+ * @rate_matching: Rate adaptation performed by the PHY
  * @reserved: Reserved for future use; see the note on reserved space.
- * @reserved1: Reserved for future use; see the note on reserved space.
  * @link_mode_masks: Variable length bitmaps.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
@@ -2085,7 +2099,7 @@ struct ethtool_link_settings {
 	__u8	transceiver;
 	__u8	master_slave_cfg;
 	__u8	master_slave_state;
-	__u8	reserved1[1];
+	__u8	rate_matching;
 	__u32	reserved[7];
 	__u32	link_mode_masks[];
 	/* layout of link_mode_masks fields:
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index d2fb4f7be61b..408a664fad59 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -242,6 +242,7 @@ enum {
 	ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,	/* u8 */
 	ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,	/* u8 */
 	ETHTOOL_A_LINKMODES_LANES,		/* u32 */
+	ETHTOOL_A_LINKMODES_RATE_MATCHING,	/* u8 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_LINKMODES_CNT,
-- 
cgit v1.2.3


From 0e253f7e558a3e250902ba2034091e0185448836 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 26 Sep 2022 17:33:39 +0200
Subject: bpf: Return value in kprobe get_func_ip only for entry address

Changing return value of kprobe's version of bpf_get_func_ip
to return zero if the attach address is not on the function's
entry point.

For kprobes attached in the middle of the function we can't easily
get to the function address especially now with the CONFIG_X86_KERNEL_IBT
support.

If user cares about current IP for kprobes attached within the
function body, they can get it with PT_REGS_IP(ctx).

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martynas Pumputis <m@lambda.lt>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220926153340.1621984-6-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ead35f39f185..d6bd10759eaf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4951,6 +4951,7 @@ union bpf_attr {
  * 		Get address of the traced function (for tracing and kprobe programs).
  * 	Return
  * 		Address of the traced function.
+ * 		0 for kprobes placed within the function (not at the entry).
  *
  * u64 bpf_get_attach_cookie(void *ctx)
  * 	Description
-- 
cgit v1.2.3


From 73dfe93ea1b319482e6d82a54fe06f953ceeeccb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 22 Sep 2022 20:41:40 +0200
Subject: headers: Remove some left-over license text

Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX
license identifier to uapi header files with a license")

When the SPDX-License-Identifier tag has been added, the corresponding
license text has not been removed.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Alexander Duyck <alexanderduyck@fb.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/88410cddd31197ea26840d7dd71612bece8c6acf.1663871981.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/tc_act/tc_bpf.h        |  5 -----
 include/uapi/linux/tc_act/tc_skbedit.h    | 13 -------------
 include/uapi/linux/tc_act/tc_skbmod.h     |  7 +------
 include/uapi/linux/tc_act/tc_tunnel_key.h |  5 -----
 include/uapi/linux/tc_act/tc_vlan.h       |  5 -----
 5 files changed, 1 insertion(+), 34 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 653c4f94f76e..fe6c8f8f3e8c 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_BPF_H
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 6cb6101208d0..64032513cc4c 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -2,19 +2,6 @@
 /*
  * Copyright (c) 2008, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
  * Author: Alexander Duyck <alexander.h.duyck@intel.com>
  */
 
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
index af6ef2cfbf3d..ac62c9a993ea 100644
--- a/include/uapi/linux/tc_act/tc_skbmod.h
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -1,12 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2016, Jamal Hadi Salim
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
-*/
+ */
 
 #ifndef __LINUX_TC_SKBMOD_H
 #define __LINUX_TC_SKBMOD_H
diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
index 3f10dc4e7a4b..49ad4033951b 100644
--- a/include/uapi/linux/tc_act/tc_tunnel_key.h
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -2,11 +2,6 @@
 /*
  * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_TUNNEL_KEY_H
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 5b306fe815cc..3e1f8e57cdd2 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_VLAN_H
-- 
cgit v1.2.3


From 62e56ef57c04c0cacb33433d7984a4d71b690b3f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 25 Sep 2022 15:00:33 +0000
Subject: net: tls: Add ARIA-GCM algorithm

RFC 6209 describes ARIA for TLS 1.2.
ARIA-128-GCM and ARIA-256-GCM are defined in RFC 6209.

This patch would offer performance increment and an opportunity for
hardware offload.

Benchmark results:
iperf-ssl are used.
CPU: intel i3-12100.

  TLS(openssl-3.0-dev)
[  3]  0.0- 1.0 sec   185 MBytes  1.55 Gbits/sec
[  3]  1.0- 2.0 sec   186 MBytes  1.56 Gbits/sec
[  3]  2.0- 3.0 sec   186 MBytes  1.56 Gbits/sec
[  3]  3.0- 4.0 sec   186 MBytes  1.56 Gbits/sec
[  3]  4.0- 5.0 sec   186 MBytes  1.56 Gbits/sec
[  3]  0.0- 5.0 sec   927 MBytes  1.56 Gbits/sec
  kTLS(aria-generic)
[  3]  0.0- 1.0 sec   198 MBytes  1.66 Gbits/sec
[  3]  1.0- 2.0 sec   194 MBytes  1.62 Gbits/sec
[  3]  2.0- 3.0 sec   194 MBytes  1.63 Gbits/sec
[  3]  3.0- 4.0 sec   194 MBytes  1.63 Gbits/sec
[  3]  4.0- 5.0 sec   194 MBytes  1.62 Gbits/sec
[  3]  0.0- 5.0 sec   974 MBytes  1.63 Gbits/sec
  kTLS(aria-avx wirh GFNI)
[  3]  0.0- 1.0 sec   632 MBytes  5.30 Gbits/sec
[  3]  1.0- 2.0 sec   657 MBytes  5.51 Gbits/sec
[  3]  2.0- 3.0 sec   657 MBytes  5.51 Gbits/sec
[  3]  3.0- 4.0 sec   656 MBytes  5.50 Gbits/sec
[  3]  4.0- 5.0 sec   656 MBytes  5.50 Gbits/sec
[  3]  0.0- 5.0 sec  3.18 GBytes  5.47 Gbits/sec

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Reviewed-by: Vadim Fedorenko <vfedorenko@novek.ru>
Link: https://lore.kernel.org/r/20220925150033.24615-1-ap420073@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/tls.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index f1157d8f4acd..b66a800389cc 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -100,6 +100,20 @@
 #define TLS_CIPHER_SM4_CCM_TAG_SIZE		16
 #define TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE		8
 
+#define TLS_CIPHER_ARIA_GCM_128				57
+#define TLS_CIPHER_ARIA_GCM_128_IV_SIZE			8
+#define TLS_CIPHER_ARIA_GCM_128_KEY_SIZE		16
+#define TLS_CIPHER_ARIA_GCM_128_SALT_SIZE		4
+#define TLS_CIPHER_ARIA_GCM_128_TAG_SIZE		16
+#define TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE		8
+
+#define TLS_CIPHER_ARIA_GCM_256				58
+#define TLS_CIPHER_ARIA_GCM_256_IV_SIZE			8
+#define TLS_CIPHER_ARIA_GCM_256_KEY_SIZE		32
+#define TLS_CIPHER_ARIA_GCM_256_SALT_SIZE		4
+#define TLS_CIPHER_ARIA_GCM_256_TAG_SIZE		16
+#define TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE		8
+
 #define TLS_SET_RECORD_TYPE	1
 #define TLS_GET_RECORD_TYPE	2
 
@@ -156,6 +170,22 @@ struct tls12_crypto_info_sm4_ccm {
 	unsigned char rec_seq[TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE];
 };
 
+struct tls12_crypto_info_aria_gcm_128 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_ARIA_GCM_128_IV_SIZE];
+	unsigned char key[TLS_CIPHER_ARIA_GCM_128_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_ARIA_GCM_128_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE];
+};
+
+struct tls12_crypto_info_aria_gcm_256 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_ARIA_GCM_256_IV_SIZE];
+	unsigned char key[TLS_CIPHER_ARIA_GCM_256_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_ARIA_GCM_256_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE];
+};
+
 enum {
 	TLS_INFO_UNSPEC,
 	TLS_INFO_VERSION,
-- 
cgit v1.2.3


From f0d74c4da1f060d2a66976193712a5e6abd361f5 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Mon, 26 Sep 2022 11:49:53 -0700
Subject: bpf: Parameterize task iterators.

Allow creating an iterator that loops through resources of one
thread/process.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
---
 include/uapi/linux/bpf.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d6bd10759eaf..455b21a53aac 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -110,6 +110,12 @@ union bpf_iter_link_info {
 		__u32	cgroup_fd;
 		__u64	cgroup_id;
 	} cgroup;
+	/* Parameters of task iterators. */
+	struct {
+		__u32	tid;
+		__u32	pid;
+		__u32	pid_fd;
+	} task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
-- 
cgit v1.2.3


From 21fb6f2aa3890b0d0abf88b7756d0098e9367a7c Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Mon, 26 Sep 2022 11:49:54 -0700
Subject: bpf: Handle bpf_link_info for the parameterized task BPF iterators.

Add new fields to bpf_link_info that users can query it through
bpf_obj_get_info_by_fd().

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-3-kuifeng@fb.com
---
 include/uapi/linux/bpf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 455b21a53aac..3075018a4ef8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6265,6 +6265,10 @@ struct bpf_link_info {
 					__u64 cgroup_id;
 					__u32 order;
 				} cgroup;
+				struct {
+					__u32 tid;
+					__u32 pid;
+				} task;
 			};
 		} iter;
 		struct  {
-- 
cgit v1.2.3


From 5493a2ad0d20944b16aba7ed7a951a43ad1f5fba Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 27 Sep 2022 14:23:06 -0700
Subject: docs: netlink: clarify the historical baggage of Netlink flags

nlmsg_flags are full of historical baggage, inconsistencies and
strangeness. Try to document it more thoroughly. Explain the meaning
of the ECHO flag (and while at it clarify the comment in the uAPI).
Handwave a little about the NEW request flags and how they make
sense on the surface but cater to really old paradigm before commands
were a thing.

I will add more notes on how to make use of ECHO and discouragement
for reuse of flags to the kernel-side documentation.

Link: https://lore.kernel.org/r/20220927212306.823862-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index e0689dbd2cde..e2ae82e3f9f7 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -62,7 +62,7 @@ struct nlmsghdr {
 #define NLM_F_REQUEST		0x01	/* It is request message. 	*/
 #define NLM_F_MULTI		0x02	/* Multipart message, terminated by NLMSG_DONE */
 #define NLM_F_ACK		0x04	/* Reply with ack, with zero or error code */
-#define NLM_F_ECHO		0x08	/* Echo this request 		*/
+#define NLM_F_ECHO		0x08	/* Receive resulting notifications */
 #define NLM_F_DUMP_INTR		0x10	/* Dump was inconsistent due to sequence change */
 #define NLM_F_DUMP_FILTERED	0x20	/* Dump was filtered as requested */
 
-- 
cgit v1.2.3


From a54fc09e4cba3004443aa05979f8c678196c8226 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 28 Sep 2022 12:51:58 +0300
Subject: net/sched: taprio: allow user input of per-tc max SDU

IEEE 802.1Q clause 12.29.1.1 "The queueMaxSDUTable structure and data
types" and 8.6.8.4 "Enhancements for scheduled traffic" talk about the
existence of a per traffic class limitation of maximum frame sizes, with
a fallback on the port-based MTU.

As far as I am able to understand, the 802.1Q Service Data Unit (SDU)
represents the MAC Service Data Unit (MSDU, i.e. L2 payload), excluding
any number of prepended VLAN headers which may be otherwise present in
the MSDU. Therefore, the queueMaxSDU is directly comparable to the
device MTU (1500 means L2 payload sizes are accepted, or frame sizes of
1518 octets, or 1522 plus one VLAN header). Drivers which offload this
are directly responsible of translating into other units of measurement.

To keep the fast path checks optimized, we keep 2 arrays in the qdisc,
one for max_sdu translated into frame length (so that it's comparable to
skb->len), and another for offloading and for dumping back to the user.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/pkt_sched.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index f292b467b27f..000eec106856 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1232,6 +1232,16 @@ enum {
 #define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST	_BITUL(0)
 #define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD	_BITUL(1)
 
+enum {
+	TCA_TAPRIO_TC_ENTRY_UNSPEC,
+	TCA_TAPRIO_TC_ENTRY_INDEX,		/* u32 */
+	TCA_TAPRIO_TC_ENTRY_MAX_SDU,		/* u32 */
+
+	/* add new constants above here */
+	__TCA_TAPRIO_TC_ENTRY_CNT,
+	TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1)
+};
+
 enum {
 	TCA_TAPRIO_ATTR_UNSPEC,
 	TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
@@ -1245,6 +1255,7 @@ enum {
 	TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
 	TCA_TAPRIO_ATTR_FLAGS, /* u32 */
 	TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */
+	TCA_TAPRIO_ATTR_TC_ENTRY, /* nest */
 	__TCA_TAPRIO_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From 18ff0bcda6d1dd3d53b4ce3f03e61bf1a648f960 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 3 Oct 2022 08:52:00 +0200
Subject: ethtool: add interface to interact with Ethernet Power Equipment

Add interface to support Power Sourcing Equipment. At current step it
provides generic way to address all variants of PSE devices as defined
in IEEE 802.3-2018 but support only objects specified for IEEE 802.3-2018 104.4
PoDL Power Sourcing Equipment (PSE).

Currently supported and mandatory objects are:
IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus
IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl

This is minimal interface needed to control PSE on each separate
ethernet port but it provides not all mandatory objects specified in
IEEE 802.3-2018.

Since "PoDL PSE" and "PSE" have similar names, but some different values
I decide to not merge them and keep separate naming schema. This should
allow as to be as close to IEEE 802.3 spec as possible and avoid name
conflicts in the future.

This implementation is connected to PHYs instead of MACs because PSE
auto classification can potentially interfere with PHY auto negotiation.
So, may be some extra PHY related initialization will be needed.

With WIP version of ethtools interaction with PSE capable link looks
as following:

$ ip l
...
5: t1l1@eth0: <BROADCAST,MULTICAST> ..
...

$ ethtool --show-pse t1l1
PSE attributs for t1l1:
PoDL PSE Admin State: disabled
PoDL PSE Power Detection Status: disabled

$ ethtool --set-pse t1l1 podl-pse-admin-control enable
$ ethtool --show-pse t1l1
PSE attributs for t1l1:
PoDL PSE Admin State: enabled
PoDL PSE Power Detection Status: delivering power

Signed-off-by: kernel test robot <lkp@intel.com>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h         | 45 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h | 16 +++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index fe9893d1485d..dc2aa3d75b39 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -736,6 +736,51 @@ enum ethtool_module_power_mode {
 	ETHTOOL_MODULE_POWER_MODE_HIGH,
 };
 
+/**
+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
+ *	functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
+ * 	unknown
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
+ */
+enum ethtool_podl_pse_admin_state {
+	ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
+	ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+	ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
+ *	IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
+ *	asserted true when the PoDL PSE state diagram variable mr_pse_enable is
+ *	false"
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
+ *	asserted true when either of the PSE state diagram variables
+ *	pi_detecting or pi_classifying is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
+ *	is asserted true when the PoDL PSE state diagram variable pi_powered is
+ *	true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
+ *	true when the PoDL PSE state diagram variable pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
+ *	when the logical combination of the PoDL PSE state diagram variables
+ *	pi_prebiased*!pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
+ *	true when the PoDL PSE state diagram variable overload_held is true."
+ */
+enum ethtool_podl_pse_pw_d_status {
+	ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
+};
+
 /**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 408a664fad59..bb57084ac524 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -49,6 +49,8 @@ enum {
 	ETHTOOL_MSG_PHC_VCLOCKS_GET,
 	ETHTOOL_MSG_MODULE_GET,
 	ETHTOOL_MSG_MODULE_SET,
+	ETHTOOL_MSG_PSE_GET,
+	ETHTOOL_MSG_PSE_SET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -94,6 +96,7 @@ enum {
 	ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
 	ETHTOOL_MSG_MODULE_GET_REPLY,
 	ETHTOOL_MSG_MODULE_NTF,
+	ETHTOOL_MSG_PSE_GET_REPLY,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -863,6 +866,19 @@ enum {
 	ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
 };
 
+/* Power Sourcing Equipment */
+enum {
+	ETHTOOL_A_PSE_UNSPEC,
+	ETHTOOL_A_PSE_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PODL_PSE_ADMIN_STATE,		/* u32 */
+	ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,	/* u32 */
+	ETHTOOL_A_PODL_PSE_PW_D_STATUS,		/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PSE_CNT,
+	ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
-- 
cgit v1.2.3