From ca34ce29fc4b0e929cc6aada40829d17ab50fee4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 8 Aug 2022 09:47:23 -0700 Subject: bpf: Improve docstring for BPF_F_USER_BUILD_ID flag Most tools which use bpf_get_stack or bpf_get_stackid symbolicate the stack - meaning the stack of addresses in the target process' address space is transformed into meaningful symbol names. The BPF_F_USER_BUILD_ID flag eases this process by finding the build_id of the file-backed vma which the address falls in and translating the address to an offset within the backing file. To be more specific, the offset is a "file offset" from the beginning of the backing file. The symbols in ET_DYN ELF objects have a st_value which is also described as an "offset" - but an offset in the process address space, relative to the base address of the object. It's necessary to translate between the "file offset" and "virtual address offset" during symbolication before they can be directly compared. Failure to do so can lead to confusing bugs, so this patch clarifies language in the documentation in an attempt to keep this from happening. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220808164723.3107500-1-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be..534e33fb1029 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3008,8 +3008,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject -- cgit v1.2.3 From c8996c98f703b09afe77a1d247dae691c9849dc1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Aug 2022 08:08:02 +0200 Subject: bpf: Add BPF-helper for accessing CLOCK_TAI Commit 3dc6ffae2da2 ("timekeeping: Introduce fast accessor to clock tai") introduced a fast and NMI-safe accessor for CLOCK_TAI. Especially in time sensitive networks (TSN), where all nodes are synchronized by Precision Time Protocol (PTP), it's helpful to have the possibility to generate timestamps based on CLOCK_TAI instead of CLOCK_MONOTONIC. With a BPF helper for TAI in place, it becomes very convenient to correlate activity across different machines in the network. Use cases for such a BPF helper include functionalities such as Tx launch time (e.g. ETF and TAPRIO Qdiscs) and timestamping. Note: CLOCK_TAI is nothing new per se, only the NMI-safe variant of it is. Signed-off-by: Jesper Dangaard Brouer [Kurt: Wrote changelog and renamed helper] Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20220809060803.5773-2-kurt@linutronix.de Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 534e33fb1029..7d1e2794d83e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5341,6 +5341,18 @@ union bpf_attr { * **-EACCES** if the SYN cookie is not valid. * * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5551,6 +5563,7 @@ union bpf_attr { FN(tcp_raw_gen_syncookie_ipv6), \ FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ + FN(ktime_get_tai_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 4961d0772578e8737afe61370743f3bc22867111 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 12 Aug 2022 16:37:27 +0100 Subject: bpf: Clear up confusion in bpf_skb_adjust_room()'s documentation Adding or removing room space _below_ layers 2 or 3, as the description mentions, is ambiguous. This was written with a mental image of the packet with layer 2 at the top, layer 3 under it, and so on. But it has led users to believe that it was on lower layers (before the beginning of the L2 and L3 headers respectively). Let's make it more explicit, and specify between which layers the room space is adjusted. Reported-by: Rumen Telbizov Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220812153727.224500-3-quentin@isovalent.com --- include/uapi/linux/bpf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7d1e2794d83e..934a2a8beb87 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2573,10 +2573,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * -- cgit v1.2.3 From 5d81757835859760a38a54a87eff856d4e578836 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Thu, 18 Aug 2022 18:32:30 +0300 Subject: net: macsec: Expose MACSEC_SALT_LEN definition to user space Expose MACSEC_SALT_LEN definition to user space to be used in various user space applications such as iproute. Iproute will use this as part of adding macsec extended packet number support. Reviewed-by: Raed Salem Reviewed-by: Sabrina Dubroca Signed-off-by: Emeel Hakim Link: https://lore.kernel.org/r/20220818153229.4721-1-ehakim@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_macsec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 3af2aa069a36..d5b6d1f37353 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -22,6 +22,8 @@ #define MACSEC_KEYID_LEN 16 +#define MACSEC_SALT_LEN 12 + /* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL -- cgit v1.2.3 From 1202cdd665315c525b5237e96e0bedc76d7e754f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 17 Aug 2022 17:43:21 -0700 Subject: Remove DECnet support from kernel DECnet is an obsolete network protocol that receives more attention from kernel janitors than users. It belongs in computer protocol history museum not in Linux kernel. It has been "Orphaned" in kernel since 2010. The iproute2 support for DECnet was dropped in 5.0 release. The documentation link on Sourceforge says it is abandoned there as well. Leave the UAPI alone to keep userspace programs compiling. This means that there is still an empty neighbour table for AF_DECNET. The table of /proc/sys/net entries was updated to match current directories and reformatted to be alphabetical. Signed-off-by: Stephen Hemminger Acked-by: David Ahern Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/dn.h | 149 ---------------------------------- include/uapi/linux/netfilter_decnet.h | 72 ---------------- include/uapi/linux/netlink.h | 2 +- 3 files changed, 1 insertion(+), 222 deletions(-) delete mode 100644 include/uapi/linux/dn.h delete mode 100644 include/uapi/linux/netfilter_decnet.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h deleted file mode 100644 index 36ca71bd8bbe..000000000000 --- a/include/uapi/linux/dn.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_DN_H -#define _LINUX_DN_H - -#include -#include -#include - -/* - - DECnet Data Structures and Constants - -*/ - -/* - * DNPROTO_NSP can't be the same as SOL_SOCKET, - * so increment each by one (compared to ULTRIX) - */ -#define DNPROTO_NSP 2 /* NSP protocol number */ -#define DNPROTO_ROU 3 /* Routing protocol number */ -#define DNPROTO_NML 4 /* Net mgt protocol number */ -#define DNPROTO_EVL 5 /* Evl protocol number (usr) */ -#define DNPROTO_EVR 6 /* Evl protocol number (evl) */ -#define DNPROTO_NSPT 7 /* NSP trace protocol number */ - - -#define DN_ADDL 2 -#define DN_MAXADDL 2 /* ULTRIX headers have 20 here, but pathworks has 2 */ -#define DN_MAXOPTL 16 -#define DN_MAXOBJL 16 -#define DN_MAXACCL 40 -#define DN_MAXALIASL 128 -#define DN_MAXNODEL 256 -#define DNBUFSIZE 65023 - -/* - * SET/GET Socket options - must match the DSO_ numbers below - */ -#define SO_CONDATA 1 -#define SO_CONACCESS 2 -#define SO_PROXYUSR 3 -#define SO_LINKINFO 7 - -#define DSO_CONDATA 1 /* Set/Get connect data */ -#define DSO_DISDATA 10 /* Set/Get disconnect data */ -#define DSO_CONACCESS 2 /* Set/Get connect access data */ -#define DSO_ACCEPTMODE 4 /* Set/Get accept mode */ -#define DSO_CONACCEPT 5 /* Accept deferred connection */ -#define DSO_CONREJECT 6 /* Reject deferred connection */ -#define DSO_LINKINFO 7 /* Set/Get link information */ -#define DSO_STREAM 8 /* Set socket type to stream */ -#define DSO_SEQPACKET 9 /* Set socket type to sequenced packet */ -#define DSO_MAXWINDOW 11 /* Maximum window size allowed */ -#define DSO_NODELAY 12 /* Turn off nagle */ -#define DSO_CORK 13 /* Wait for more data! */ -#define DSO_SERVICES 14 /* NSP Services field */ -#define DSO_INFO 15 /* NSP Info field */ -#define DSO_MAX 15 /* Maximum option number */ - - -/* LINK States */ -#define LL_INACTIVE 0 -#define LL_CONNECTING 1 -#define LL_RUNNING 2 -#define LL_DISCONNECTING 3 - -#define ACC_IMMED 0 -#define ACC_DEFER 1 - -#define SDF_WILD 1 /* Wild card object */ -#define SDF_PROXY 2 /* Addr eligible for proxy */ -#define SDF_UICPROXY 4 /* Use uic-based proxy */ - -/* Structures */ - - -struct dn_naddr { - __le16 a_len; - __u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */ -}; - -struct sockaddr_dn { - __u16 sdn_family; - __u8 sdn_flags; - __u8 sdn_objnum; - __le16 sdn_objnamel; - __u8 sdn_objname[DN_MAXOBJL]; - struct dn_naddr sdn_add; -}; -#define sdn_nodeaddrl sdn_add.a_len /* Node address length */ -#define sdn_nodeaddr sdn_add.a_addr /* Node address */ - - - -/* - * DECnet set/get DSO_CONDATA, DSO_DISDATA (optional data) structure - */ -struct optdata_dn { - __le16 opt_status; /* Extended status return */ -#define opt_sts opt_status - __le16 opt_optl; /* Length of user data */ - __u8 opt_data[16]; /* User data */ -}; - -struct accessdata_dn { - __u8 acc_accl; - __u8 acc_acc[DN_MAXACCL]; - __u8 acc_passl; - __u8 acc_pass[DN_MAXACCL]; - __u8 acc_userl; - __u8 acc_user[DN_MAXACCL]; -}; - -/* - * DECnet logical link information structure - */ -struct linkinfo_dn { - __u16 idn_segsize; /* Segment size for link */ - __u8 idn_linkstate; /* Logical link state */ -}; - -/* - * Ethernet address format (for DECnet) - */ -union etheraddress { - __u8 dne_addr[ETH_ALEN]; /* Full ethernet address */ - struct { - __u8 dne_hiord[4]; /* DECnet HIORD prefix */ - __u8 dne_nodeaddr[2]; /* DECnet node address */ - } dne_remote; -}; - - -/* - * DECnet physical socket address format - */ -struct dn_addr { - __le16 dna_family; /* AF_DECnet */ - union etheraddress dna_netaddr; /* DECnet ethernet address */ -}; - -#define DECNET_IOCTL_BASE 0x89 /* PROTOPRIVATE range */ - -#define SIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, struct dn_naddr) -#define SIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, struct dn_naddr) -#define OSIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, int) -#define OSIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, int) - -#endif /* _LINUX_DN_H */ diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h deleted file mode 100644 index 3c77f54560f2..000000000000 --- a/include/uapi/linux/netfilter_decnet.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_DECNET_NETFILTER_H -#define __LINUX_DECNET_NETFILTER_H - -/* DECnet-specific defines for netfilter. - * This file (C) Steve Whitehouse 1999 derived from the - * ipv4 netfilter header file which is - * (C)1998 Rusty Russell -- This code is GPL. - */ - -#include - -/* only for userspace compatibility */ -#ifndef __KERNEL__ - -#include /* for INT_MIN, INT_MAX */ - -/* kernel define is in netfilter_defs.h */ -#define NF_DN_NUMHOOKS 7 -#endif /* ! __KERNEL__ */ - -/* DECnet Hooks */ -/* After promisc drops, checksum checks. */ -#define NF_DN_PRE_ROUTING 0 -/* If the packet is destined for this box. */ -#define NF_DN_LOCAL_IN 1 -/* If the packet is destined for another interface. */ -#define NF_DN_FORWARD 2 -/* Packets coming from a local process. */ -#define NF_DN_LOCAL_OUT 3 -/* Packets about to hit the wire. */ -#define NF_DN_POST_ROUTING 4 -/* Input Hello Packets */ -#define NF_DN_HELLO 5 -/* Input Routing Packets */ -#define NF_DN_ROUTE 6 - -enum nf_dn_hook_priorities { - NF_DN_PRI_FIRST = INT_MIN, - NF_DN_PRI_CONNTRACK = -200, - NF_DN_PRI_MANGLE = -150, - NF_DN_PRI_NAT_DST = -100, - NF_DN_PRI_FILTER = 0, - NF_DN_PRI_NAT_SRC = 100, - NF_DN_PRI_DNRTMSG = 200, - NF_DN_PRI_LAST = INT_MAX, -}; - -struct nf_dn_rtmsg { - int nfdn_ifindex; -}; - -#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) - -#ifndef __KERNEL__ -/* backwards compatibility for userspace */ -#define DNRMG_L1_GROUP 0x01 -#define DNRMG_L2_GROUP 0x02 -#endif - -enum { - DNRNG_NLGRP_NONE, -#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE - DNRNG_NLGRP_L1, -#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 - DNRNG_NLGRP_L2, -#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 - __DNRNG_NLGRP_MAX -}; -#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) - -#endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 855dffb4c1c3..1e543cf0568c 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -20,7 +20,7 @@ #define NETLINK_CONNECTOR 11 #define NETLINK_NETFILTER 12 /* netfilter subsystem */ #define NETLINK_IP6_FW 13 -#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_DNRTMSG 14 /* DECnet routing messages (obsolete) */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ #define NETLINK_GENERIC 16 /* leave room for NETLINK_DM (DM Events) */ -- cgit v1.2.3 From 91350fe152930c0d61a362af68272526490efea5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Sun, 21 Aug 2022 14:35:17 +0300 Subject: bpf, flow_dissector: Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode for bpf progs Currently, attaching BPF_PROG_TYPE_FLOW_DISSECTOR programs completely replaces the flow-dissector logic with custom dissection logic. This forces implementors to write programs that handle dissection for any flows expected in the namespace. It makes sense for flow-dissector BPF programs to just augment the dissector with custom logic (e.g. dissecting certain flows or custom protocols), while enjoying the broad capabilities of the standard dissector for any other traffic. Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode. Flow-dissector BPF programs may return this to indicate no dissection was made, and fallback to the standard dissector is requested. Signed-off-by: Shmulik Ladkani Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220821113519.116765-3-shmulik.ladkani@gmail.com --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 934a2a8beb87..7f87012b012e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5861,6 +5861,11 @@ enum bpf_ret_code { * represented by BPF_REDIRECT above). */ BPF_LWT_REROUTE = 128, + /* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR + * to indicate that no custom dissection was performed, and + * fallback to standard dissector is requested. + */ + BPF_FLOW_DISSECTOR_CONTINUE = 129, }; struct bpf_sock { -- cgit v1.2.3 From 2172fb8007eaafbef18563afb6c1ae5a976bf787 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 23 Aug 2022 15:25:54 -0700 Subject: bpf: update bpf_{g,s}et_retval documentation * replace 'syscall' with 'upper layers', still mention that it's being exported via syscall errno * describe what happens in set_retval(-EPERM) + return 1 * describe what happens with bind's 'return 3' Acked-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220823222555.523590-5-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7f87012b012e..644600dbb114 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5085,17 +5085,29 @@ union bpf_attr { * * int bpf_get_retval(void) * Description - * Get the syscall's return value that will be returned to userspace. + * Get the BPF program's return value that will be returned to the upper layers. * - * This helper is currently supported by cgroup programs only. + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. * Return - * The syscall's return value. + * The BPF program's return value. * * int bpf_set_retval(int retval) * Description - * Set the syscall's return value that will be returned to userspace. + * Set the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * + * Note that there is the following corner case where the program exports an error + * via bpf_set_retval but signals success via 'return 1': + * + * bpf_set_retval(-EPERM); + * return 1; + * + * In this case, the BPF program's return value will use helper's -EPERM. This + * still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case. * - * This helper is currently supported by cgroup programs only. * Return * 0 on success, or a negative error in case of failure. * -- cgit v1.2.3 From 30b6055428a90cc52d4add164df12b94ab07c3fd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Aug 2022 13:02:20 -0700 Subject: net: improve and fix netlink kdoc Subsequent patch will render the kdoc from include/uapi/linux/netlink.h into Documentation. We need to fix the warnings. While at it move the comments on struct nlmsghdr to a proper kdoc comment. Link: https://lore.kernel.org/r/20220819200221.422801-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netlink.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1e543cf0568c..e0ab261ceca2 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -41,12 +41,20 @@ struct sockaddr_nl { __u32 nl_groups; /* multicast groups mask */ }; +/** + * struct nlmsghdr - fixed format metadata header of Netlink messages + * @nlmsg_len: Length of message including header + * @nlmsg_type: Message content type + * @nlmsg_flags: Additional flags + * @nlmsg_seq: Sequence number + * @nlmsg_pid: Sending process port ID + */ struct nlmsghdr { - __u32 nlmsg_len; /* Length of message including header */ - __u16 nlmsg_type; /* Message content */ - __u16 nlmsg_flags; /* Additional flags */ - __u32 nlmsg_seq; /* Sequence number */ - __u32 nlmsg_pid; /* Sending process port ID */ + __u32 nlmsg_len; + __u16 nlmsg_type; + __u16 nlmsg_flags; + __u32 nlmsg_seq; + __u32 nlmsg_pid; }; /* Flags values */ @@ -337,6 +345,9 @@ enum netlink_attribute_type { * bitfield32 type (U32) * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment + * + * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes + * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number */ enum netlink_policy_type_attr { NL_POLICY_TYPE_ATTR_UNSPEC, -- cgit v1.2.3 From e7a7b84e33178db4a839c5e1773247be17597c1f Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Sat, 30 Jul 2022 10:56:43 +0530 Subject: wifi: cfg80211: Add link_id parameter to various key operations for MLO Add support for various key operations on MLD by adding new parameter link_id. Pass the link_id received from userspace to driver for add_key, get_key, del_key, set_default_key, set_default_mgmt_key and set_default_beacon_key to support configuring keys specific to each MLO link. Userspace must not specify link ID for MLO pairwise key since it is common for all the MLO links. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20220730052643.1959111-4-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ffb7c573e299..573db20403dc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -377,14 +377,22 @@ * the non-transmitting interfaces are deleted as well. * * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified - * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. + * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC + * represents peer's MLD address for MLO pairwise key. For MLO group key, + * the link is identified by %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT, * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD. + * For MLO connection, the link to set default key is identified by + * %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA, * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER, - * and %NL80211_ATTR_KEY_SEQ attributes. + * and %NL80211_ATTR_KEY_SEQ attributes. %NL80211_ATTR_MAC represents + * peer's MLD address for MLO pairwise key. The link to add MLO + * group key is identified by %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX - * or %NL80211_ATTR_MAC. + * or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC represents peer's MLD address + * for MLO pairwise key. The link to delete group key is identified by + * %NL80211_ATTR_MLO_LINK_ID. * * @NL80211_CMD_GET_BEACON: (not used) * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface -- cgit v1.2.3 From d4ccaf58a8472123ac97e6db03932c375b5c45ba Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Wed, 24 Aug 2022 16:31:13 -0700 Subject: bpf: Introduce cgroup iter Cgroup_iter is a type of bpf_iter. It walks over cgroups in four modes: - walking a cgroup's descendants in pre-order. - walking a cgroup's descendants in post-order. - walking a cgroup's ancestors. - process only the given cgroup. When attaching cgroup_iter, one can set a cgroup to the iter_link created from attaching. This cgroup is passed as a file descriptor or cgroup id and serves as the starting point of the walk. If no cgroup is specified, the starting point will be the root cgroup v2. For walking descendants, one can specify the order: either pre-order or post-order. For walking ancestors, the walk starts at the specified cgroup and ends at the root. One can also terminate the walk early by returning 1 from the iter program. Note that because walking cgroup hierarchy holds cgroup_mutex, the iter program is called with cgroup_mutex held. Currently only one session is supported, which means, depending on the volume of data bpf program intends to send to user space, the number of cgroups that can be walked is limited. For example, given the current buffer size is 8 * PAGE_SIZE, if the program sends 64B data for each cgroup, assuming PAGE_SIZE is 4kb, the total number of cgroups that can be walked is 512. This is a limitation of cgroup_iter. If the output data is larger than the kernel buffer size, after all data in the kernel buffer is consumed by user space, the subsequent read() syscall will signal EOPNOTSUPP. In order to work around, the user may have to update their program to reduce the volume of data sent to output. For example, skip some uninteresting cgroups. In future, we may extend bpf_iter flags to allow customizing buffer size. Acked-by: Yonghong Song Acked-by: Tejun Heo Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220824233117.1312810-2-haoluo@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 644600dbb114..0f61f09f467a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; +enum bpf_cgroup_iter_order { + BPF_ITER_ORDER_UNSPEC = 0, + BPF_ITER_SELF_ONLY, /* process only a single object. */ + BPF_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_ITER_ANCESTORS_UP, /* walk ancestors upward. */ +}; + union bpf_iter_link_info { struct { __u32 map_fd; } map; + struct { + enum bpf_cgroup_iter_order order; + + /* At most one of cgroup_fd and cgroup_id can be non-zero. If + * both are zero, the walk starts from the default cgroup v2 + * root. For walking v1 hierarchy, one should always explicitly + * specify cgroup_fd. + */ + __u32 cgroup_fd; + __u64 cgroup_id; + } cgroup; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -6176,11 +6195,22 @@ struct bpf_link_info { struct { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ + + /* If the iter specific field is 32 bits, it can be put + * in the first or second union. Otherwise it should be + * put in the second union. + */ union { struct { __u32 map_id; } map; }; + union { + struct { + __u64 cgroup_id; + __u32 order; + } cgroup; + }; } iter; struct { __u32 netns_ino; -- cgit v1.2.3 From d4ffb6f39f1a1b260966b43a4ffdb64779c650dd Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 25 Aug 2022 15:39:36 -0700 Subject: bpf: Add CGROUP prefix to cgroup_iter_order bpf_cgroup_iter_order is globally visible but the entries do not have CGROUP prefix. As requested by Andrii, put a CGROUP in the names in bpf_cgroup_iter_order. This patch fixes two previous commits: one introduced the API and the other uses the API in bpf selftest (that is, the selftest cgroup_hierarchical_stats). I tested this patch via the following command: test_progs -t cgroup,iter,btf_dump Fixes: d4ccaf58a847 ("bpf: Introduce cgroup iter") Fixes: 88886309d2e8 ("selftests/bpf: add a selftest for cgroup hierarchical stats collection") Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220825223936.1865810-1-haoluo@google.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f61f09f467a..bdf4bc6d8d6b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -88,11 +88,11 @@ struct bpf_cgroup_storage_key { }; enum bpf_cgroup_iter_order { - BPF_ITER_ORDER_UNSPEC = 0, - BPF_ITER_SELF_ONLY, /* process only a single object. */ - BPF_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ - BPF_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ - BPF_ITER_ANCESTORS_UP, /* walk ancestors upward. */ + BPF_CGROUP_ITER_ORDER_UNSPEC = 0, + BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */ + BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ }; union bpf_iter_link_info { -- cgit v1.2.3 From 54c4ef34c4b6f9720fded620e2893894f9f2c554 Mon Sep 17 00:00:00 2001 From: Andrey Zhadchenko Date: Thu, 25 Aug 2022 05:04:49 +0300 Subject: openvswitch: allow specifying ifindex of new interfaces CRIU is preserving ifindexes of net devices after restoration. However, current Open vSwitch API does not allow to target ifindex, so we cannot correctly restore OVS configuration. Add new OVS_DP_ATTR_IFINDEX for OVS_DP_CMD_NEW and use it as desired ifindex. Use OVS_VPORT_ATTR_IFINDEX during OVS_VPORT_CMD_NEW to specify new netdev ifindex. Signed-off-by: Andrey Zhadchenko Acked-by: Christian Brauner (Microsoft) Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ce3e1738d427..94066f87e9ee 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -76,6 +76,8 @@ enum ovs_datapath_cmd { * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the * datapath. Always present in notifications. + * @OVS_DP_ATTR_IFINDEX: Interface index for a new datapath netdev. Only + * valid for %OVS_DP_CMD_NEW requests. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. @@ -92,6 +94,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in * per-cpu dispatch mode */ + OVS_DP_ATTR_IFINDEX, __OVS_DP_ATTR_MAX }; -- cgit v1.2.3 From aa75622c3be4d5819ce69c714acbcbd67bba5d65 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 25 Aug 2022 23:08:06 +0100 Subject: bpf: Fix a few typos in BPF helpers documentation Address a few typos in the documentation for the BPF helper functions. They were reported by Jakub [0], who ran spell checkers on the generated man page [1]. [0] https://lore.kernel.org/linux-man/d22dcd47-023c-8f52-d369-7b5308e6c842@gmail.com/T/#mb02e7d4b7fb61d98fa914c77b581184e9a9537af [1] https://lore.kernel.org/linux-man/eb6a1e41-c48e-ac45-5154-ac57a2c76108@gmail.com/T/#m4a8d1b003616928013ffcd1450437309ab652f9f v3: Do not copy unrelated (and breaking) elements to tools/ header v2: Turn a ',' into a ';' Reported-by: Jakub Wilk Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220825220806.107143-1-quentin@isovalent.com --- include/uapi/linux/bpf.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bdf4bc6d8d6b..962960a98835 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4456,7 +4456,7 @@ union bpf_attr { * * **-EEXIST** if the option already exists. * - * **-EFAULT** on failrue to parse the existing header options. + * **-EFAULT** on failure to parse the existing header options. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. @@ -4665,7 +4665,7 @@ union bpf_attr { * a *map* with *task* as the **key**. From this * perspective, the usage is not much different from * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this - * helper enforces the key must be an task_struct and the map must also + * helper enforces the key must be a task_struct and the map must also * be a **BPF_MAP_TYPE_TASK_STORAGE**. * * Underneath, the value is stored locally at *task* instead of @@ -4723,7 +4723,7 @@ union bpf_attr { * * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) * Description - * Returns the stored IMA hash of the *inode* (if it's avaialable). + * Returns the stored IMA hash of the *inode* (if it's available). * If the hash is larger than *size*, then only *size* * bytes will be copied to *dst* * Return @@ -4747,12 +4747,12 @@ union bpf_attr { * * The argument *len_diff* can be used for querying with a planned * size change. This allows to check MTU prior to changing packet - * ctx. Providing an *len_diff* adjustment that is larger than the + * ctx. Providing a *len_diff* adjustment that is larger than the * actual packet size (resulting in negative packet size) will in - * principle not exceed the MTU, why it is not considered a - * failure. Other BPF-helpers are needed for performing the - * planned size change, why the responsability for catch a negative - * packet size belong in those helpers. + * principle not exceed the MTU, which is why it is not considered + * a failure. Other BPF helpers are needed for performing the + * planned size change; therefore the responsibility for catching + * a negative packet size belongs in those helpers. * * Specifying *ifindex* zero means the MTU check is performed * against the current net device. This is practical if this isn't -- cgit v1.2.3 From abc340b38ba25cd6c7aa2c0bd9150d30738c82d0 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:46:59 +0300 Subject: xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e36d9d2c65a7..d96f13a42589 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -694,6 +694,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; -- cgit v1.2.3 From 2c2493b9da9166478fe072e3054f8a5741dadb02 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:47:00 +0300 Subject: xfrm: lwtunnel: add lwtunnel support for xfrm interfaces in collect_md mode Allow specifying the xfrm interface if_id and link as part of a route metadata using the lwtunnel infrastructure. This allows for example using a single xfrm interface in collect_md mode as the target of multiple routes each specifying a different if_id. With the appropriate changes to iproute2, considering an xfrm device ipsec1 in collect_md mode one can for example add a route specifying an if_id like so: ip route add dev ipsec1 encap xfrm if_id 1 In which case traffic routed to the device via this route would use if_id in the xfrm interface policy lookup. Or in the context of vrf, one can also specify the "link" property: ip route add dev ipsec1 encap xfrm if_id 1 link_dev eth15 Note: LWT_XFRM_LINK uses NLA_U32 similar to IFLA_XFRM_LINK even though internally "link" is signed. This is consistent with other _LINK attributes in other devices as well as in bpf and should not have an effect as device indexes can't be negative. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/uapi/linux/lwtunnel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 2e206919125c..229655ef792f 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -15,6 +15,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, LWTUNNEL_ENCAP_IOAM6, + LWTUNNEL_ENCAP_XFRM, __LWTUNNEL_ENCAP_MAX, }; @@ -111,4 +112,13 @@ enum { #define LWT_BPF_MAX_HEADROOM 256 +enum { + LWT_XFRM_UNSPEC, + LWT_XFRM_IF_ID, + LWT_XFRM_LINK, + __LWT_XFRM_MAX, +}; + +#define LWT_XFRM_MAX (__LWT_XFRM_MAX - 1) + #endif /* _UAPI_LWTUNNEL_H_ */ -- cgit v1.2.3 From 690252f19f0e486abb8590b3a7a03d4e065d93d4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Aug 2022 20:09:31 -0700 Subject: netlink: add support for ext_ack missing attributes There is currently no way to report via extack in a structured way that an attribute is missing. This leads to families resorting to string messages. Add a pair of attributes - @offset and @type for machine-readable way of reporting missing attributes. The @offset points to the nest which should have contained the attribute, @type is the expected nla_type. The offset will be skipped if the attribute is missing at the message level rather than inside a nest. User space should be able to figure out which attribute enum (AKA attribute space AKA attribute set) the nest pointed to by @offset is using. Reviewed-by: Johannes Berg Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e0ab261ceca2..e0689dbd2cde 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -140,6 +140,10 @@ struct nlmsgerr { * be used - in the success case - to identify a created * object or operation or similar (binary) * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute + * @NLMSGERR_ATTR_MISS_TYPE: type of a missing required attribute, + * %NLMSGERR_ATTR_MISS_NEST will not be present if the attribute was + * missing at the message level + * @NLMSGERR_ATTR_MISS_NEST: offset of the nest where attribute was missing * @__NLMSGERR_ATTR_MAX: number of attributes * @NLMSGERR_ATTR_MAX: highest attribute number */ @@ -149,6 +153,8 @@ enum nlmsgerr_attrs { NLMSGERR_ATTR_OFFS, NLMSGERR_ATTR_COOKIE, NLMSGERR_ATTR_POLICY, + NLMSGERR_ATTR_MISS_TYPE, + NLMSGERR_ATTR_MISS_NEST, __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 -- cgit v1.2.3 From 44c51472bef83bb70b43e2f4b7a592096f32a855 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 31 Aug 2022 17:40:09 +0300 Subject: bpf: Support getting tunnel flags Existing 'bpf_skb_get_tunnel_key' extracts various tunnel parameters (id, ttl, tos, local and remote) but does not expose ip_tunnel_info's tun_flags to the BPF program. It makes sense to expose tun_flags to the BPF program. Assume for example multiple GRE tunnels maintained on a single GRE interface in collect_md mode. The program expects origins to initiate over GRE, however different origins use different GRE characteristics (e.g. some prefer to use GRE checksum, some do not; some pass a GRE key, some do not, etc..). A BPF program getting tun_flags can therefore remember the relevant flags (e.g. TUNNEL_CSUM, TUNNEL_SEQ...) for each initiating remote. In the reply path, the program can use 'bpf_skb_set_tunnel_key' in order to correctly reply to the remote, using similar characteristics, based on the stored tunnel flags. Introduce BPF_F_TUNINFO_FLAGS flag for bpf_skb_get_tunnel_key. If specified, 'bpf_tunnel_key->tunnel_flags' is set with the tun_flags. Decided to use the existing unused 'tunnel_ext' as the storage for the 'tunnel_flags' in order to avoid changing bpf_tunnel_key's layout. Also, the following has been considered during the design: 1. Convert the "interesting" internal TUNNEL_xxx flags back to BPF_F_yyy and place into the new 'tunnel_flags' field. This has 2 drawbacks: - The BPF_F_yyy flags are from *set_tunnel_key* enumeration space, e.g. BPF_F_ZERO_CSUM_TX. It is awkward that it is "returned" into tunnel_flags from a *get_tunnel_key* call. - Not all "interesting" TUNNEL_xxx flags can be mapped to existing BPF_F_yyy flags, and it doesn't make sense to create new BPF_F_yyy flags just for purposes of the returned tunnel_flags. 2. Place key.tun_flags into 'tunnel_flags' but mask them, keeping only "interesting" flags. That's ok, but the drawback is that what's "interesting" for my usecase might be limiting for other usecases. Therefore I decided to expose what's in key.tun_flags *as is*, which seems most flexible. The BPF user can just choose to ignore bits he's not interested in. The TUNNEL_xxx are also UAPI, so no harm exposing them back in the get_tunnel_key call. Signed-off-by: Shmulik Ladkani Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220831144010.174110-1-shmulik.ladkani@gmail.com --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 962960a98835..837c0f9b7fdd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5659,6 +5659,11 @@ enum { BPF_F_SEQ_NUMBER = (1ULL << 3), }; +/* BPF_FUNC_skb_get_tunnel_key flags. */ +enum { + BPF_F_TUNINFO_FLAGS = (1ULL << 4), +}; + /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ @@ -5848,7 +5853,10 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; - __u16 tunnel_ext; /* Padding, future use. */ + union { + __u16 tunnel_ext; /* compat */ + __be16 tunnel_flags; + }; __u32 tunnel_label; union { __u32 local_ipv4; -- cgit v1.2.3 From 5854a09b49574da5a77a0f36ad7b021a2661321d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 31 Aug 2022 14:12:42 -0500 Subject: net/ipv4: Use __DECLARE_FLEX_ARRAY() helper We now have a cleaner way to keep compatibility with user-space (a.k.a. not breaking it) when we need to keep in place a one-element array (for its use in user-space) together with a flexible-array member (for its use in kernel-space) without making it hard to read at the source level. This is through the use of the new __DECLARE_FLEX_ARRAY() helper macro. The size and memory layout of the structure is preserved after the changes. See below. Before changes: $ pahole -C ip_msfilter net/ipv4/igmp.o struct ip_msfilter { union { struct { __be32 imsf_multiaddr_aux; /* 0 4 */ __be32 imsf_interface_aux; /* 4 4 */ __u32 imsf_fmode_aux; /* 8 4 */ __u32 imsf_numsrc_aux; /* 12 4 */ __be32 imsf_slist[1]; /* 16 4 */ }; /* 0 20 */ struct { __be32 imsf_multiaddr; /* 0 4 */ __be32 imsf_interface; /* 4 4 */ __u32 imsf_fmode; /* 8 4 */ __u32 imsf_numsrc; /* 12 4 */ __be32 imsf_slist_flex[0]; /* 16 0 */ }; /* 0 16 */ }; /* 0 20 */ /* size: 20, cachelines: 1, members: 1 */ /* last cacheline: 20 bytes */ }; After changes: $ pahole -C ip_msfilter net/ipv4/igmp.o struct ip_msfilter { __be32 imsf_multiaddr; /* 0 4 */ __be32 imsf_interface; /* 4 4 */ __u32 imsf_fmode; /* 8 4 */ __u32 imsf_numsrc; /* 12 4 */ union { __be32 imsf_slist[1]; /* 16 4 */ struct { struct { } __empty_imsf_slist_flex; /* 16 0 */ __be32 imsf_slist_flex[0]; /* 16 0 */ }; /* 16 0 */ }; /* 16 4 */ /* size: 20, cachelines: 1, members: 5 */ /* last cacheline: 20 bytes */ }; In the past, we had to duplicate the whole original structure within a union, and update the names of all the members. Now, we just need to declare the flexible-array member to be used in kernel-space through the __DECLARE_FLEX_ARRAY() helper together with the one-element array, within a union. This makes the source code more clean and easier to read. Link: https://github.com/KSPP/linux/issues/193 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 14168225cecd..578daa6f816b 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,21 +188,13 @@ struct ip_mreq_source { }; struct ip_msfilter { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; union { - struct { - __be32 imsf_multiaddr_aux; - __be32 imsf_interface_aux; - __u32 imsf_fmode_aux; - __u32 imsf_numsrc_aux; - __be32 imsf_slist[1]; - }; - struct { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist_flex[]; - }; + __be32 imsf_slist[1]; + __DECLARE_FLEX_ARRAY(__be32, imsf_slist_flex); }; }; -- cgit v1.2.3 From a36c421690b3e5dee38fc12abfcabda742f00064 Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Fri, 26 Aug 2022 10:00:31 -0700 Subject: wifi: nl80211: Add POWERED_ADDR_CHANGE feature Add a new extended feature bit signifying that the wireless hardware supports changing the MAC address while the underlying net_device is powered. Note that this has a different meaning from IFF_LIVE_ADDR_CHANGE as additional restrictions might be imposed by the hardware, such as: - No connection is active on this interface, carrier is off - No scan is in progress - No offchannel operations are in progress Signed-off-by: James Prestwood Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 573db20403dc..a00a23840c57 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6281,6 +6281,14 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC * detection. * + * @NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE: Device can perform a MAC address + * change without having to bring the underlying network device down + * first. For example, in station mode this can be used to vary the + * origin MAC address prior to a connection to a new AP for privacy + * or other reasons. Note that certain driver specific restrictions + * might apply, e.g. no scans in progress, no offchannel operations + * in progress, and no active connections. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6348,6 +6356,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, NL80211_EXT_FEATURE_RADAR_BACKGROUND, + NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 6522047c65764c9aaec8009e73daa8c0b138c701 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:50 +0200 Subject: wifi: nl80211: add MLD address to assoc BSS entries Add an MLD address attribute to BSS entries that the interface is currently associated with to help userspace figure out what's going on. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a00a23840c57..c32e7616a366 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4959,6 +4959,7 @@ enum nl80211_bss_scan_width { * using the nesting index as the antenna number. * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz * @NL80211_BSS_MLO_LINK_ID: MLO link ID of the BSS (u8). + * @NL80211_BSS_MLD_ADDR: MLD address of this BSS if connected to it. * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -4985,6 +4986,7 @@ enum nl80211_bss { NL80211_BSS_CHAIN_SIGNAL, NL80211_BSS_FREQUENCY_OFFSET, NL80211_BSS_MLO_LINK_ID, + NL80211_BSS_MLD_ADDR, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From 27ed9353aec9de4277b3389c9f2b04beb6ab7622 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 31 Aug 2022 08:26:57 -0700 Subject: bpf: Update descriptions for helpers bpf_get_func_arg[_cnt]() Now instead of the number of arguments, the number of registers holding argument values are stored in trampoline. Update the description of bpf_get_func_arg[_cnt]() helpers. Previous programs without struct arguments should continue to work as usual. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220831152657.2078805-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 793103b10eab..3df78c56c1bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5079,12 +5079,12 @@ union bpf_attr { * * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) * returned in **value**. * * Return * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. + * **-EINVAL** if n >= argument register count of traced function. * * long bpf_get_func_ret(void *ctx, u64 *value) * Description @@ -5097,10 +5097,11 @@ union bpf_attr { * * long bpf_get_func_arg_cnt(void *ctx) * Description - * Get number of arguments of the traced function (for tracing programs). + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. * * Return - * The number of arguments of the traced function. + * The number of argument registers of the traced function. * * int bpf_get_retval(void) * Description -- cgit v1.2.3 From a0a4de4d897f5ce672e086cb6b9f91a306af6953 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 22 Aug 2022 16:41:21 +0200 Subject: netfilter: remove NFPROTO_DECNET Decnet has been removed. so no need to reserve space in arrays for it. Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 53411ccc69db..5a79ccb76701 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -63,7 +63,9 @@ enum { NFPROTO_NETDEV = 5, NFPROTO_BRIDGE = 7, NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ NFPROTO_DECNET = 12, +#endif NFPROTO_NUMPROTO, }; -- cgit v1.2.3 From 061834624c87282c6d9d8c5395aaff4380e5e1fc Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:21 +0200 Subject: can: set CANFD_FDF flag in all CAN FD frame structures To simplify the testing in user space all struct canfd_frame's provided by the CAN subsystem of the Linux kernel now have the CANFD_FDF flag set in canfd_frame::flags. NB: Handcrafted ETH_P_CANFD frames introduced via PF_PACKET socket might not set this bit correctly. During the check for sufficient headroom in PF_PACKET sk_buffs the uninitialized CAN sk_buff data structures are filled. In the case of a CAN FD frame the CANFD_FDF flag is set accordingly. As the CAN frame content is already zero initialized in alloc_canfd_skb() the obsolete initialization of cf->flags in the CTU CAN FD driver has been removed as it would overwrite the already set CANFD_FDF flag. Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-4-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 90801ada2bbe..7b23eeeb3273 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -141,8 +141,8 @@ struct can_frame { * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of * using struct canfd_frame for mixed CAN / CAN FD content (dual use). - * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of - * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. + * Since the introduction of CAN XL the CANFD_FDF flag is set in all CAN FD + * frame structures provided by the CAN subsystem of the Linux kernel. */ #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ -- cgit v1.2.3 From 1a3e3034c049503ec6992a4a7d573e7fff31fac4 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:22 +0200 Subject: can: canxl: introduce CAN XL data structure This patch adds defines for data structures and length information for CAN XL (CAN with eXtended data Length) which can transfer up to 2048 byte inside a single frame. Notable changes from CAN FD: - the 11 bit arbitration field is now named 'priority' instead of 'can_id' (there are no 29 bit identifiers nor RTR frames anymore) - the data length needs a uint16 value to cover up to 2048 byte (the length element position is different to struct can[fd]_frame) - new fields (SDT, AF) and a SEC bit have been introduced - the virtual CAN interface identifier is not part if the CAN XL frame struct as this VCID value is stored in struct skbuff (analog to vlan id) Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-5-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 7b23eeeb3273..dd645ea72306 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -48,6 +48,7 @@ #include #include +#include /* for offsetof */ /* controller area network (CAN) kernel definitions */ @@ -60,6 +61,7 @@ #define CAN_SFF_MASK 0x000007FFU /* standard frame format (SFF) */ #define CAN_EFF_MASK 0x1FFFFFFFU /* extended frame format (EFF) */ #define CAN_ERR_MASK 0x1FFFFFFFU /* omit EFF, RTR, ERR flags */ +#define CANXL_PRIO_MASK CAN_SFF_MASK /* 11 bit priority mask */ /* * Controller Area Network Identifier structure @@ -73,6 +75,7 @@ typedef __u32 canid_t; #define CAN_SFF_ID_BITS 11 #define CAN_EFF_ID_BITS 29 +#define CANXL_PRIO_BITS CAN_SFF_ID_BITS /* * Controller Area Network Error Message Frame Mask structure @@ -91,6 +94,16 @@ typedef __u32 can_err_mask_t; #define CANFD_MAX_DLC 15 #define CANFD_MAX_DLEN 64 +/* + * CAN XL payload length and DLC definitions according to ISO 11898-1 + * CAN XL DLC ranges from 0 .. 2047 => data length from 1 .. 2048 byte + */ +#define CANXL_MIN_DLC 0 +#define CANXL_MAX_DLC 2047 +#define CANXL_MAX_DLC_MASK 0x07FF +#define CANXL_MIN_DLEN 1 +#define CANXL_MAX_DLEN 2048 + /** * struct can_frame - Classical CAN frame structure (aka CAN 2.0B) * @can_id: CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition @@ -166,8 +179,46 @@ struct canfd_frame { __u8 data[CANFD_MAX_DLEN] __attribute__((aligned(8))); }; +/* + * defined bits for canxl_frame.flags + * + * The canxl_frame.flags element contains two bits CANXL_XLF and CANXL_SEC + * and shares the relative position of the struct can[fd]_frame.len element. + * The CANXL_XLF bit ALWAYS needs to be set to indicate a valid CAN XL frame. + * As a side effect setting this bit intentionally breaks the length checks + * for Classical CAN and CAN FD frames. + * + * Undefined bits in canxl_frame.flags are reserved and shall be set to zero. + */ +#define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */ +#define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */ + +/** + * struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure + * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags + * @flags: additional flags for CAN XL + * @sdt: SDU (service data unit) type + * @len: frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN) + * @af: acceptance field + * @data: CAN XL frame payload (CANXL_MIN_DLEN .. CANXL_MAX_DLEN byte) + * + * @prio shares the same position as @can_id from struct can[fd]_frame. + */ +struct canxl_frame { + canid_t prio; /* 11 bit priority for arbitration (canid_t) */ + __u8 flags; /* additional flags for CAN XL */ + __u8 sdt; /* SDU (service data unit) type */ + __u16 len; /* frame payload length in byte */ + __u32 af; /* acceptance field */ + __u8 data[CANXL_MAX_DLEN]; +}; + #define CAN_MTU (sizeof(struct can_frame)) #define CANFD_MTU (sizeof(struct canfd_frame)) +#define CANXL_MTU (sizeof(struct canxl_frame)) +#define CANXL_HDR_SIZE (offsetof(struct canxl_frame, data)) +#define CANXL_MIN_MTU (CANXL_HDR_SIZE + 64) +#define CANXL_MAX_MTU CANXL_MTU /* particular protocols of the protocol family PF_CAN */ #define CAN_RAW 1 /* RAW sockets */ -- cgit v1.2.3 From fb08cba12b52cba4366e858932307649dc5304e2 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:23 +0200 Subject: can: canxl: update CAN infrastructure for CAN XL frames - add new ETH_P_CANXL ethernet protocol type - update skb checks for CAN XL - add alloc_canxl_skb() which now needs a data length parameter - introduce init_can_skb_reserve() to reduce code duplication Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-6-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index d370165bc621..69e0457eb200 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -138,6 +138,7 @@ #define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */ #define ETH_P_CAN 0x000C /* CAN: Controller Area Network */ #define ETH_P_CANFD 0x000D /* CANFD: CAN flexible data rate*/ +#define ETH_P_CANXL 0x000E /* CANXL: eXtended frame Length */ #define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/ #define ETH_P_TR_802_2 0x0011 /* 802.2 frames */ #define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */ -- cgit v1.2.3 From 626332696d7506e8f844a564277bdba2dc78fcb5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:25 +0200 Subject: can: raw: add CAN XL support Enable CAN_RAW sockets to read and write CAN XL frames analogue to the CAN FD extension (new CAN_RAW_XL_FRAMES sockopt). A CAN XL network interface is capable to handle Classical CAN, CAN FD and CAN XL frames. When CAN_RAW_XL_FRAMES is enabled, the CAN_RAW socket checks whether the addressed CAN network interface is capable to handle the provided CAN frame. In opposite to the fixed number of bytes for - CAN frames (CAN_MTU = sizeof(struct can_frame)) - CAN FD frames (CANFD_MTU = sizeof(struct can_frame)) the number of bytes when reading/writing CAN XL frames depends on the number of data bytes. For efficiency reasons the length of the struct canxl_frame is truncated to the needed size for read/write operations. This leads to a calculated size of CANXL_HDR_SIZE + canxl_frame::len which is enforced on write() operations and guaranteed on read() operations. NB: Valid length values are 1 .. 2048 (CANXL_MIN_DLEN .. CANXL_MAX_DLEN). Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-8-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/raw.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index 3386aa81fdf2..ff12f525c37c 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -62,6 +62,7 @@ enum { CAN_RAW_RECV_OWN_MSGS, /* receive my own msgs (default:off) */ CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ CAN_RAW_JOIN_FILTERS, /* all filters must match to trigger */ + CAN_RAW_XL_FRAMES, /* allow CAN XL frames (default:off) */ }; #endif /* !_UAPI_CAN_RAW_H */ -- cgit v1.2.3 From 7e6e1b57162ed6a2d32d2f0929c27d79482ff706 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 6 Sep 2022 11:55:58 +0200 Subject: rtnetlink: advertise allmulti counter Like what was done with IFLA_PROMISCUITY, add IFLA_ALLMULTI to advertise the allmulti counter. The flag IFF_ALLMULTI is advertised only if it was directly set by a userland app. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e36d9d2c65a7..0bfa9a99ebb6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -370,6 +370,7 @@ enum { IFLA_GRO_MAX_SIZE, IFLA_TSO_MAX_SIZE, IFLA_TSO_MAX_SEGS, + IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ __IFLA_MAX }; -- cgit v1.2.3 From 65b32f801bfbc54dc98144a6ec26082b59d131ee Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 8 Sep 2022 10:16:40 -0700 Subject: uapi: move IPPROTO_L2TP to in.h IPPROTO_L2TP is currently defined in l2tp.h, but most of ip protocols are defined in in.h file. Move it there in order to keep code clean. Acked-by: Guillaume Nault Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- include/uapi/linux/in.h | 2 ++ include/uapi/linux/l2tp.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 578daa6f816b..f243ce665f74 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -68,6 +68,8 @@ enum { #define IPPROTO_PIM IPPROTO_PIM IPPROTO_COMP = 108, /* Compression Header Protocol */ #define IPPROTO_COMP IPPROTO_COMP + IPPROTO_L2TP = 115, /* Layer 2 Tunnelling Protocol */ +#define IPPROTO_L2TP IPPROTO_L2TP IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ #define IPPROTO_SCTP IPPROTO_SCTP IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index bab8c9708611..7d81c3e1ec29 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -13,8 +13,6 @@ #include #include -#define IPPROTO_L2TP 115 - /** * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets * @l2tp_family: address family number AF_L2TPIP. -- cgit v1.2.3 From 8b189ea08c334f25dbb3d076f8adb8b80491d01d Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 8 Sep 2022 10:16:42 -0700 Subject: net/sched: flower: Add L2TPv3 filter Add support for matching on L2TPv3 session ID. Session ID can be specified only when ip proto was set to IPPROTO_L2TP. Example filter: # tc filter add dev $PF1 ingress prio 1 protocol ip \ flower \ ip_proto l2tp \ l2tpv3_sid 1234 \ skip_sw \ action mirred egress redirect dev $VF1_PR Acked-by: Guillaume Nault Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 877309d6ca3c..648a82f32666 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -592,6 +592,8 @@ enum { TCA_FLOWER_KEY_PPPOE_SID, /* be16 */ TCA_FLOWER_KEY_PPP_PROTO, /* be16 */ + TCA_FLOWER_KEY_L2TPV3_SID, /* be32 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 95f510d0b792f308d3d748242fe960c35bdc2c62 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 11 Sep 2022 04:06:59 +0300 Subject: net: dsa: allow the DSA master to be seen and changed through rtnetlink Some DSA switches have multiple CPU ports, which can be used to improve CPU termination throughput, but DSA, through dsa_tree_setup_cpu_ports(), sets up only the first one, leading to suboptimal use of hardware. The desire is to not change the default configuration but to permit the user to create a dynamic mapping between individual user ports and the CPU port that they are served by, configurable through rtnetlink. It is also intended to permit load balancing between CPU ports, and in that case, the foreseen model is for the DSA master to be a bonding interface whose lowers are the physical DSA masters. To that end, we create a struct rtnl_link_ops for DSA user ports with the "dsa" kind. We expose the IFLA_DSA_MASTER link attribute that contains the ifindex of the newly desired DSA master. Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0bfa9a99ebb6..3d39fb398d65 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1375,4 +1375,14 @@ enum { #define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) +/* DSA section */ + +enum { + IFLA_DSA_UNSPEC, + IFLA_DSA_MASTER, + __IFLA_DSA_MAX, +}; + +#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From 848f3c0d47694924536e2894cb349613201321c6 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Mon, 12 Sep 2022 19:16:18 +0200 Subject: seg6: add NEXT-C-SID support for SRv6 End behavior The NEXT-C-SID mechanism described in [1] offers the possibility of encoding several SRv6 segments within a single 128 bit SID address. Such a SID address is called a Compressed SID (C-SID) container. In this way, the length of the SID List can be drastically reduced. A SID instantiated with the NEXT-C-SID flavor considers an IPv6 address logically structured in three main blocks: i) Locator-Block; ii) Locator-Node Function; iii) Argument. C-SID container +------------------------------------------------------------------+ | Locator-Block |Loc-Node| Argument | | |Function| | +------------------------------------------------------------------+ <--------- B -----------> <- NF -> <------------- A ---------------> (i) The Locator-Block can be any IPv6 prefix available to the provider; (ii) The Locator-Node Function represents the node and the function to be triggered when a packet is received on the node; (iii) The Argument carries the remaining C-SIDs in the current C-SID container. The NEXT-C-SID mechanism relies on the "flavors" framework defined in [2]. The flavors represent additional operations that can modify or extend a subset of the existing behaviors. This patch introduces the support for flavors in SRv6 End behavior implementing the NEXT-C-SID one. An SRv6 End behavior with NEXT-C-SID flavor works as an End behavior but it is capable of processing the compressed SID List encoded in C-SID containers. An SRv6 End behavior with NEXT-C-SID flavor can be configured to support user-provided Locator-Block and Locator-Node Function lengths. In this implementation, such lengths must be evenly divisible by 8 (i.e. must be byte-aligned), otherwise the kernel informs the user about invalid values with a meaningful error code and message through netlink_ext_ack. If Locator-Block and/or Locator-Node Function lengths are not provided by the user during configuration of an SRv6 End behavior instance with NEXT-C-SID flavor, the kernel will choose their default values i.e., 32-bit Locator-Block and 16-bit Locator-Node Function. [1] - https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression [2] - https://datatracker.ietf.org/doc/html/rfc8986 Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- include/uapi/linux/seg6_local.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 332b18f318f8..4fdc424c9cb3 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -28,6 +28,7 @@ enum { SEG6_LOCAL_BPF, SEG6_LOCAL_VRFTABLE, SEG6_LOCAL_COUNTERS, + SEG6_LOCAL_FLAVORS, __SEG6_LOCAL_MAX, }; #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) @@ -110,4 +111,27 @@ enum { #define SEG6_LOCAL_CNT_MAX (__SEG6_LOCAL_CNT_MAX - 1) +/* SRv6 End* Flavor attributes */ +enum { + SEG6_LOCAL_FLV_UNSPEC, + SEG6_LOCAL_FLV_OPERATION, + SEG6_LOCAL_FLV_LCBLOCK_BITS, + SEG6_LOCAL_FLV_LCNODE_FN_BITS, + __SEG6_LOCAL_FLV_MAX, +}; + +#define SEG6_LOCAL_FLV_MAX (__SEG6_LOCAL_FLV_MAX - 1) + +/* Designed flavor operations for SRv6 End* Behavior */ +enum { + SEG6_LOCAL_FLV_OP_UNSPEC, + SEG6_LOCAL_FLV_OP_PSP, + SEG6_LOCAL_FLV_OP_USP, + SEG6_LOCAL_FLV_OP_USD, + SEG6_LOCAL_FLV_OP_NEXT_CSID, + __SEG6_LOCAL_FLV_OP_MAX +}; + +#define SEG6_LOCAL_FLV_OP_MAX (__SEG6_LOCAL_FLV_OP_MAX - 1) + #endif -- cgit v1.2.3 From 7b5541a932c21f7e07f068a785afcc25986e4893 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Sep 2022 14:03:09 +0200 Subject: headers: Remove some left-over license text in include/uapi/linux/netfilter/ When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Remove it now. Also, in xt_connmark.h, move the copyright text at the top of the file which is a much more common pattern. Signed-off-by: Christophe JAILLET Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/ipset/ip_set.h | 4 ---- include/uapi/linux/netfilter/xt_AUDIT.h | 4 ---- include/uapi/linux/netfilter/xt_connmark.h | 13 ++++--------- include/uapi/linux/netfilter/xt_osf.h | 14 -------------- 4 files changed, 4 insertions(+), 31 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 6397d75899bc..79e5d68b87af 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -3,10 +3,6 @@ * Patrick Schaaf * Martin Josefsson * Copyright (C) 2003-2011 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _UAPI_IP_SET_H #define _UAPI_IP_SET_H diff --git a/include/uapi/linux/netfilter/xt_AUDIT.h b/include/uapi/linux/netfilter/xt_AUDIT.h index 1b314e2f84ac..56a3f6092e0c 100644 --- a/include/uapi/linux/netfilter/xt_AUDIT.h +++ b/include/uapi/linux/netfilter/xt_AUDIT.h @@ -4,10 +4,6 @@ * * (C) 2010-2011 Thomas Graf * (C) 2010-2011 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _XT_AUDIT_TARGET_H diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index f01c19b83a2b..41b578ccd03b 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -1,18 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright (C) 2002,2004 MARA Systems AB + * by Henrik Nordstrom + */ + #ifndef _XT_CONNMARK_H #define _XT_CONNMARK_H #include -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - enum { XT_CONNMARK_SET = 0, XT_CONNMARK_SAVE, diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h index 6e466236ca4b..f1f097896bdf 100644 --- a/include/uapi/linux/netfilter/xt_osf.h +++ b/include/uapi/linux/netfilter/xt_osf.h @@ -1,20 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2003+ Evgeniy Polyakov - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . */ #ifndef _XT_OSF_H -- cgit v1.2.3 From 0e426a3ae030a9e891899370229e117158b35de6 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 21 Sep 2022 10:46:02 +0000 Subject: bpf, cgroup: Reject prog_attach_flags array when effective query Attach flags is only valid for attached progs of this layer cgroup, but not for effective progs. For querying with EFFECTIVE flags, exporting attach flags does not make sense. So when effective query, we reject prog_attach_flags array and don't need to populate it. Also we limit attach_flags to output 0 during effective query. Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP") Signed-off-by: Pu Lehui Link: https://lore.kernel.org/r/20220921104604.2340580-2-pulehui@huaweicloud.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 59a217ca2dfd..4eff7fc7ae58 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1233,7 +1233,7 @@ enum { /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -1432,7 +1432,10 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; - __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ -- cgit v1.2.3 From 583c1f420173f7d84413a1a1fbf5109d798b4faa Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 19 Sep 2022 19:00:57 -0500 Subject: bpf: Define new BPF_MAP_TYPE_USER_RINGBUF map type We want to support a ringbuf map type where samples are published from user-space, to be consumed by BPF programs. BPF currently supports a kernel -> user-space circular ring buffer via the BPF_MAP_TYPE_RINGBUF map type. We'll need to define a new map type for user-space -> kernel, as none of the helpers exported for BPF_MAP_TYPE_RINGBUF will apply to a user-space producer ring buffer, and we'll want to add one or more helper functions that would not apply for a kernel-producer ring buffer. This patch therefore adds a new BPF_MAP_TYPE_USER_RINGBUF map type definition. The map type is useless in its current form, as there is no way to access or use it for anything until we one or more BPF helpers. A follow-on patch will therefore add a new helper function that allows BPF programs to run callbacks on samples that are published to the ring buffer. Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220920000100.477320-2-void@manifault.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3df78c56c1bf..e18c85324db6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -928,6 +928,7 @@ enum bpf_map_type { BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, }; /* Note that tracing related programs such as -- cgit v1.2.3 From 20571567384428dfc9fe5cf9f2e942e1df13c2dd Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 19 Sep 2022 19:00:58 -0500 Subject: bpf: Add bpf_user_ringbuf_drain() helper In a prior change, we added a new BPF_MAP_TYPE_USER_RINGBUF map type which will allow user-space applications to publish messages to a ring buffer that is consumed by a BPF program in kernel-space. In order for this map-type to be useful, it will require a BPF helper function that BPF programs can invoke to drain samples from the ring buffer, and invoke callbacks on those samples. This change adds that capability via a new BPF helper function: bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) BPF programs may invoke this function to run callback_fn() on a series of samples in the ring buffer. callback_fn() has the following signature: long callback_fn(struct bpf_dynptr *dynptr, void *context); Samples are provided to the callback in the form of struct bpf_dynptr *'s, which the program can read using BPF helper functions for querying struct bpf_dynptr's. In order to support bpf_ringbuf_drain(), a new PTR_TO_DYNPTR register type is added to the verifier to reflect a dynptr that was allocated by a helper function and passed to a BPF program. Unlike PTR_TO_STACK dynptrs which are allocated on the stack by a BPF program, PTR_TO_DYNPTR dynptrs need not use reference tracking, as the BPF helper is trusted to properly free the dynptr before returning. The verifier currently only supports PTR_TO_DYNPTR registers that are also DYNPTR_TYPE_LOCAL. Note that while the corresponding user-space libbpf logic will be added in a subsequent patch, this patch does contain an implementation of the .map_poll() callback for BPF_MAP_TYPE_USER_RINGBUF maps. This .map_poll() callback guarantees that an epoll-waiting user-space producer will receive at least one event notification whenever at least one sample is drained in an invocation of bpf_user_ringbuf_drain(), provided that the function is not invoked with the BPF_RB_NO_WAKEUP flag. If the BPF_RB_FORCE_WAKEUP flag is provided, a wakeup notification is sent even if no sample was drained. Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220920000100.477320-3-void@manifault.com --- include/uapi/linux/bpf.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e18c85324db6..ead35f39f185 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5388,6 +5388,43 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5599,6 +5636,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(user_ringbuf_drain), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 0c3e10cb44232833a50cb8e3e784c432906a60c1 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 20 Sep 2022 18:12:31 -0400 Subject: net: phy: Add support for rate matching This adds support for rate matching (also known as rate adaptation) to the phy subsystem. The general idea is that the phy interface runs at one speed, and the MAC throttles the rate at which it sends packets to the link speed. There's a good overview of several techniques for achieving this at [1]. This patch adds support for three: pause-frame based (such as in Aquantia phys), CRS-based (such as in 10PASS-TS and 2BASE-TL), and open-loop-based (such as in 10GBASE-W). This patch makes a few assumptions and a few non assumptions about the types of rate matching available. First, it assumes that different phys may use different forms of rate matching. Second, it assumes that phys can use rate matching for any of their supported link speeds (e.g. if a phy supports 10BASE-T and XGMII, then it can adapt XGMII to 10BASE-T). Third, it does not assume that all interface modes will use the same form of rate matching. Fourth, it does not assume that all phy devices will support rate matching (even if some do). Relaxing or strengthening these (non-)assumptions could result in a different API. For example, if all interface modes were assumed to use the same form of rate matching, then a bitmask of interface modes supportting rate matching would suffice. For some better visibility into the process, the current rate matching mode is exposed as part of the ethtool ksettings. For the moment, only read access is supported. I'm not sure what userspace might want to configure yet (disable it altogether, disable just one mode, specify the mode to use, etc.). For the moment, since only pause-based rate adaptation support is added in the next few commits, rate matching can be disabled altogether by adjusting the advertisement. 802.3 calls this feature "rate adaptation" in clause 49 (10GBASE-R) and "rate matching" in clause 61 (10PASS-TL and 2BASE-TS). Aquantia also calls this feature "rate adaptation". I chose "rate matching" because it is shorter, and because Russell doesn't think "adaptation" is correct in this context. Signed-off-by: Sean Anderson Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 18 ++++++++++++++++-- include/uapi/linux/ethtool_netlink.h | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2d5741fd44bb..fe9893d1485d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1840,6 +1840,20 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define MASTER_SLAVE_STATE_SLAVE 3 #define MASTER_SLAVE_STATE_ERR 4 +/* These are used to throttle the rate of data on the phy interface when the + * native speed of the interface is higher than the link speed. These should + * not be used for phy interfaces which natively support multiple speeds (e.g. + * MII or SGMII). + */ +/* No rate matching performed. */ +#define RATE_MATCH_NONE 0 +/* The phy sends pause frames to throttle the MAC. */ +#define RATE_MATCH_PAUSE 1 +/* The phy asserts CRS to prevent the MAC from transmitting. */ +#define RATE_MATCH_CRS 2 +/* The MAC is programmed with a sufficiently-large IPG. */ +#define RATE_MATCH_OPEN_LOOP 3 + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 @@ -2033,8 +2047,8 @@ enum ethtool_reset_flags { * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. * @master_slave_state: Master/slave port state. + * @rate_matching: Rate adaptation performed by the PHY * @reserved: Reserved for future use; see the note on reserved space. - * @reserved1: Reserved for future use; see the note on reserved space. * @link_mode_masks: Variable length bitmaps. * * If autonegotiation is disabled, the speed and @duplex represent the @@ -2085,7 +2099,7 @@ struct ethtool_link_settings { __u8 transceiver; __u8 master_slave_cfg; __u8 master_slave_state; - __u8 reserved1[1]; + __u8 rate_matching; __u32 reserved[7]; __u32 link_mode_masks[]; /* layout of link_mode_masks fields: diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d2fb4f7be61b..408a664fad59 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -242,6 +242,7 @@ enum { ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ ETHTOOL_A_LINKMODES_LANES, /* u32 */ + ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ /* add new constants above here */ __ETHTOOL_A_LINKMODES_CNT, -- cgit v1.2.3 From 0e253f7e558a3e250902ba2034091e0185448836 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Sep 2022 17:33:39 +0200 Subject: bpf: Return value in kprobe get_func_ip only for entry address Changing return value of kprobe's version of bpf_get_func_ip to return zero if the attach address is not on the function's entry point. For kprobes attached in the middle of the function we can't easily get to the function address especially now with the CONFIG_X86_KERNEL_IBT support. If user cares about current IP for kprobes attached within the function body, they can get it with PT_REGS_IP(ctx). Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Martynas Pumputis Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20220926153340.1621984-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ead35f39f185..d6bd10759eaf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4951,6 +4951,7 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * 0 for kprobes placed within the function (not at the entry). * * u64 bpf_get_attach_cookie(void *ctx) * Description -- cgit v1.2.3 From 73dfe93ea1b319482e6d82a54fe06f953ceeeccb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 22 Sep 2022 20:41:40 +0200 Subject: headers: Remove some left-over license text Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Signed-off-by: Christophe JAILLET Acked-by: Alexander Duyck Reviewed-by: Jiri Pirko Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/88410cddd31197ea26840d7dd71612bece8c6acf.1663871981.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/uapi/linux/tc_act/tc_bpf.h | 5 ----- include/uapi/linux/tc_act/tc_skbedit.h | 13 ------------- include/uapi/linux/tc_act/tc_skbmod.h | 7 +------ include/uapi/linux/tc_act/tc_tunnel_key.h | 5 ----- include/uapi/linux/tc_act/tc_vlan.h | 5 ----- 5 files changed, 1 insertion(+), 34 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 653c4f94f76e..fe6c8f8f3e8c 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -1,11 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2015 Jiri Pirko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_BPF_H diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 6cb6101208d0..64032513cc4c 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -2,19 +2,6 @@ /* * Copyright (c) 2008, Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * * Author: Alexander Duyck */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index af6ef2cfbf3d..ac62c9a993ea 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -1,12 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2016, Jamal Hadi Salim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. -*/ + */ #ifndef __LINUX_TC_SKBMOD_H #define __LINUX_TC_SKBMOD_H diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 3f10dc4e7a4b..49ad4033951b 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -2,11 +2,6 @@ /* * Copyright (c) 2016, Amir Vadai * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_TUNNEL_KEY_H diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 5b306fe815cc..3e1f8e57cdd2 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -1,11 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2014 Jiri Pirko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_VLAN_H -- cgit v1.2.3 From 62e56ef57c04c0cacb33433d7984a4d71b690b3f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 25 Sep 2022 15:00:33 +0000 Subject: net: tls: Add ARIA-GCM algorithm RFC 6209 describes ARIA for TLS 1.2. ARIA-128-GCM and ARIA-256-GCM are defined in RFC 6209. This patch would offer performance increment and an opportunity for hardware offload. Benchmark results: iperf-ssl are used. CPU: intel i3-12100. TLS(openssl-3.0-dev) [ 3] 0.0- 1.0 sec 185 MBytes 1.55 Gbits/sec [ 3] 1.0- 2.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 2.0- 3.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 3.0- 4.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 4.0- 5.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 0.0- 5.0 sec 927 MBytes 1.56 Gbits/sec kTLS(aria-generic) [ 3] 0.0- 1.0 sec 198 MBytes 1.66 Gbits/sec [ 3] 1.0- 2.0 sec 194 MBytes 1.62 Gbits/sec [ 3] 2.0- 3.0 sec 194 MBytes 1.63 Gbits/sec [ 3] 3.0- 4.0 sec 194 MBytes 1.63 Gbits/sec [ 3] 4.0- 5.0 sec 194 MBytes 1.62 Gbits/sec [ 3] 0.0- 5.0 sec 974 MBytes 1.63 Gbits/sec kTLS(aria-avx wirh GFNI) [ 3] 0.0- 1.0 sec 632 MBytes 5.30 Gbits/sec [ 3] 1.0- 2.0 sec 657 MBytes 5.51 Gbits/sec [ 3] 2.0- 3.0 sec 657 MBytes 5.51 Gbits/sec [ 3] 3.0- 4.0 sec 656 MBytes 5.50 Gbits/sec [ 3] 4.0- 5.0 sec 656 MBytes 5.50 Gbits/sec [ 3] 0.0- 5.0 sec 3.18 GBytes 5.47 Gbits/sec Signed-off-by: Taehee Yoo Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20220925150033.24615-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/tls.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index f1157d8f4acd..b66a800389cc 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -100,6 +100,20 @@ #define TLS_CIPHER_SM4_CCM_TAG_SIZE 16 #define TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE 8 +#define TLS_CIPHER_ARIA_GCM_128 57 +#define TLS_CIPHER_ARIA_GCM_128_IV_SIZE 8 +#define TLS_CIPHER_ARIA_GCM_128_KEY_SIZE 16 +#define TLS_CIPHER_ARIA_GCM_128_SALT_SIZE 4 +#define TLS_CIPHER_ARIA_GCM_128_TAG_SIZE 16 +#define TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE 8 + +#define TLS_CIPHER_ARIA_GCM_256 58 +#define TLS_CIPHER_ARIA_GCM_256_IV_SIZE 8 +#define TLS_CIPHER_ARIA_GCM_256_KEY_SIZE 32 +#define TLS_CIPHER_ARIA_GCM_256_SALT_SIZE 4 +#define TLS_CIPHER_ARIA_GCM_256_TAG_SIZE 16 +#define TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -156,6 +170,22 @@ struct tls12_crypto_info_sm4_ccm { unsigned char rec_seq[TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_aria_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_ARIA_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_ARIA_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_ARIA_GCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE]; +}; + +struct tls12_crypto_info_aria_gcm_256 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_ARIA_GCM_256_IV_SIZE]; + unsigned char key[TLS_CIPHER_ARIA_GCM_256_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_ARIA_GCM_256_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE]; +}; + enum { TLS_INFO_UNSPEC, TLS_INFO_VERSION, -- cgit v1.2.3 From f0d74c4da1f060d2a66976193712a5e6abd361f5 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:53 -0700 Subject: bpf: Parameterize task iterators. Allow creating an iterator that loops through resources of one thread/process. People could only create iterators to loop through all resources of files, vma, and tasks in the system, even though they were interested in only the resources of a specific task or process. Passing the additional parameters, people can now create an iterator to go through all resources or only the resources of a task. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d6bd10759eaf..455b21a53aac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ -- cgit v1.2.3 From 21fb6f2aa3890b0d0abf88b7756d0098e9367a7c Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:54 -0700 Subject: bpf: Handle bpf_link_info for the parameterized task BPF iterators. Add new fields to bpf_link_info that users can query it through bpf_obj_get_info_by_fd(). Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-3-kuifeng@fb.com --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 455b21a53aac..3075018a4ef8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6265,6 +6265,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { -- cgit v1.2.3 From 5493a2ad0d20944b16aba7ed7a951a43ad1f5fba Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Sep 2022 14:23:06 -0700 Subject: docs: netlink: clarify the historical baggage of Netlink flags nlmsg_flags are full of historical baggage, inconsistencies and strangeness. Try to document it more thoroughly. Explain the meaning of the ECHO flag (and while at it clarify the comment in the uAPI). Handwave a little about the NEW request flags and how they make sense on the surface but cater to really old paradigm before commands were a thing. I will add more notes on how to make use of ECHO and discouragement for reuse of flags to the kernel-side documentation. Link: https://lore.kernel.org/r/20220927212306.823862-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e0689dbd2cde..e2ae82e3f9f7 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -62,7 +62,7 @@ struct nlmsghdr { #define NLM_F_REQUEST 0x01 /* It is request message. */ #define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ #define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ -#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_ECHO 0x08 /* Receive resulting notifications */ #define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ #define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ -- cgit v1.2.3 From a54fc09e4cba3004443aa05979f8c678196c8226 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 28 Sep 2022 12:51:58 +0300 Subject: net/sched: taprio: allow user input of per-tc max SDU IEEE 802.1Q clause 12.29.1.1 "The queueMaxSDUTable structure and data types" and 8.6.8.4 "Enhancements for scheduled traffic" talk about the existence of a per traffic class limitation of maximum frame sizes, with a fallback on the port-based MTU. As far as I am able to understand, the 802.1Q Service Data Unit (SDU) represents the MAC Service Data Unit (MSDU, i.e. L2 payload), excluding any number of prepended VLAN headers which may be otherwise present in the MSDU. Therefore, the queueMaxSDU is directly comparable to the device MTU (1500 means L2 payload sizes are accepted, or frame sizes of 1518 octets, or 1522 plus one VLAN header). Drivers which offload this are directly responsible of translating into other units of measurement. To keep the fast path checks optimized, we keep 2 arrays in the qdisc, one for max_sdu translated into frame length (so that it's comparable to skb->len), and another for offloading and for dumping back to the user. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f292b467b27f..000eec106856 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1232,6 +1232,16 @@ enum { #define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST _BITUL(0) #define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD _BITUL(1) +enum { + TCA_TAPRIO_TC_ENTRY_UNSPEC, + TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */ + + /* add new constants above here */ + __TCA_TAPRIO_TC_ENTRY_CNT, + TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1) +}; + enum { TCA_TAPRIO_ATTR_UNSPEC, TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ @@ -1245,6 +1255,7 @@ enum { TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ TCA_TAPRIO_ATTR_FLAGS, /* u32 */ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */ + TCA_TAPRIO_ATTR_TC_ENTRY, /* nest */ __TCA_TAPRIO_ATTR_MAX, }; -- cgit v1.2.3 From 18ff0bcda6d1dd3d53b4ce3f03e61bf1a648f960 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 3 Oct 2022 08:52:00 +0200 Subject: ethtool: add interface to interact with Ethernet Power Equipment Add interface to support Power Sourcing Equipment. At current step it provides generic way to address all variants of PSE devices as defined in IEEE 802.3-2018 but support only objects specified for IEEE 802.3-2018 104.4 PoDL Power Sourcing Equipment (PSE). Currently supported and mandatory objects are: IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl This is minimal interface needed to control PSE on each separate ethernet port but it provides not all mandatory objects specified in IEEE 802.3-2018. Since "PoDL PSE" and "PSE" have similar names, but some different values I decide to not merge them and keep separate naming schema. This should allow as to be as close to IEEE 802.3 spec as possible and avoid name conflicts in the future. This implementation is connected to PHYs instead of MACs because PSE auto classification can potentially interfere with PHY auto negotiation. So, may be some extra PHY related initialization will be needed. With WIP version of ethtools interaction with PSE capable link looks as following: $ ip l ... 5: t1l1@eth0: .. ... $ ethtool --show-pse t1l1 PSE attributs for t1l1: PoDL PSE Admin State: disabled PoDL PSE Power Detection Status: disabled $ ethtool --set-pse t1l1 podl-pse-admin-control enable $ ethtool --show-pse t1l1 PSE attributs for t1l1: PoDL PSE Admin State: enabled PoDL PSE Power Detection Status: delivering power Signed-off-by: kernel test robot Signed-off-by: Oleksij Rempel Reviewed-by: Bagas Sanjaya Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 45 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 16 +++++++++++++ 2 files changed, 61 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fe9893d1485d..dc2aa3d75b39 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -736,6 +736,51 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are + * unknown + * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled + * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled + */ +enum ethtool_podl_pse_admin_state { + ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, + ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, + ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, +}; + +/** + * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is + * asserted true when the PoDL PSE state diagram variable mr_pse_enable is + * false" + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is + * asserted true when either of the PSE state diagram variables + * pi_detecting or pi_classifying is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” + * is asserted true when the PoDL PSE state diagram variable pi_powered is + * true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted + * true when the PoDL PSE state diagram variable pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true + * when the logical combination of the PoDL PSE state diagram variables + * pi_prebiased*!pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted + * true when the PoDL PSE state diagram variable overload_held is true." + */ +enum ethtool_podl_pse_pw_d_status { + ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, + ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, + ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, + ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, + ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, + ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, + ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 408a664fad59..bb57084ac524 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -49,6 +49,8 @@ enum { ETHTOOL_MSG_PHC_VCLOCKS_GET, ETHTOOL_MSG_MODULE_GET, ETHTOOL_MSG_MODULE_SET, + ETHTOOL_MSG_PSE_GET, + ETHTOOL_MSG_PSE_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -94,6 +96,7 @@ enum { ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, ETHTOOL_MSG_MODULE_GET_REPLY, ETHTOOL_MSG_MODULE_NTF, + ETHTOOL_MSG_PSE_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -863,6 +866,19 @@ enum { ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) }; +/* Power Sourcing Equipment */ +enum { + ETHTOOL_A_PSE_UNSPEC, + ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PSE_CNT, + ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3