summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-10-31 05:10:11 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2023-10-31 05:10:11 -1000
commit89ed67ef126c4160349c1b96fdb775ea6170ac90 (patch)
tree98caaf8bba44b21f9345a0af1dd2bd9987764e27 /include/linux
parent5a6a09e97199d6600d31383055f9d43fbbcbe86f (diff)
parentf1c73396133cb3d913e2075298005644ee8dfade (diff)
downloadlinux-89ed67ef126c4160349c1b96fdb775ea6170ac90.tar.gz
linux-89ed67ef126c4160349c1b96fdb775ea6170ac90.tar.bz2
linux-89ed67ef126c4160349c1b96fdb775ea6170ac90.zip
Merge tag 'net-next-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Support usec resolution of TCP timestamps, enabled selectively by a route attribute. - Defer regular TCP ACK while processing socket backlog, try to send a cumulative ACK at the end. Increase single TCP flow performance on a 200Gbit NIC by 20% (100Gbit -> 120Gbit). - The Fair Queuing (FQ) packet scheduler: - add built-in 3 band prio / WRR scheduling - support bypass if the qdisc is mostly idle (5% speed up for TCP RR) - improve inactive flow reporting - optimize the layout of structures for better cache locality - Support TCP Authentication Option (RFC 5925, TCP-AO), a more modern replacement for the old MD5 option. - Add more retransmission timeout (RTO) related statistics to TCP_INFO. - Support sending fragmented skbs over vsock sockets. - Make sure we send SIGPIPE for vsock sockets if socket was shutdown(). - Add sysctl for ignoring lower limit on lifetime in Router Advertisement PIO, based on an in-progress IETF draft. - Add sysctl to control activation of TCP ping-pong mode. - Add sysctl to make connection timeout in MPTCP configurable. - Support rcvlowat and notsent_lowat on MPTCP sockets, to help apps limit the number of wakeups. - Support netlink GET for MDB (multicast forwarding), allowing user space to request a single MDB entry instead of dumping the entire table. - Support selective FDB flushing in the VXLAN tunnel driver. - Allow limiting learned FDB entries in bridges, prevent OOM attacks. - Allow controlling via configfs netconsole targets which were created via the kernel cmdline at boot, rather than via configfs at runtime. - Support multiple PTP timestamp event queue readers with different filters. - MCTP over I3C. BPF: - Add new veth-like netdevice where BPF program defines the logic of the xmit routine. It can operate in L3 and L2 mode. - Support exceptions - allow asserting conditions which should never be true but are hard for the verifier to infer. With some extra flexibility around handling of the exit / failure: https://lwn.net/Articles/938435/ - Add support for local per-cpu kptr, allow allocating and storing per-cpu objects in maps. Access to those objects operates on the value for the current CPU. This allows to deprecate local one-off implementations of per-CPU storage like BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE maps. - Extend cgroup BPF sockaddr hooks for UNIX sockets. The use case is for systemd to re-implement the LogNamespace feature which allows running multiple instances of systemd-journald to process the logs of different services. - Enable open-coded task_vma iteration, after maple tree conversion made it hard to directly walk VMAs in tracing programs. - Add open-coded task, css_task and css iterator support. One of the use cases is customizable OOM victim selection via BPF. - Allow source address selection with bpf_*_fib_lookup(). - Add ability to pin BPF timer to the current CPU. - Prevent creation of infinite loops by combining tail calls and fentry/fexit programs. - Add missed stats for kprobes to retrieve the number of missed kprobe executions and subsequent executions of BPF programs. - Inherit system settings for CPU security mitigations. - Add BPF v4 CPU instruction support for arm32 and s390x. Changes to common code: - overflow: add DEFINE_FLEX() for on-stack definition of structs with flexible array members. - Process doc update with more guidance for reviewers. Driver API: - Simplify locking in WiFi (cfg80211 and mac80211 layers), use wiphy mutex in most places and remove a lot of smaller locks. - Create a common DPLL configuration API. Allow configuring and querying state of PLL circuits used for clock syntonization, in network time distribution. - Unify fragmented and full page allocation APIs in page pool code. Let drivers be ignorant of PAGE_SIZE. - Rework PHY state machine to avoid races with calls to phy_stop(). - Notify DSA drivers of MAC address changes on user ports, improve correctness of offloads which depend on matching port MAC addresses. - Allow antenna control on injected WiFi frames. - Reduce the number of variants of napi_schedule(). - Simplify error handling when composing devlink health messages. Misc: - A lot of KCSAN data race "fixes", from Eric. - A lot of __counted_by() annotations, from Kees. - A lot of strncpy -> strscpy and printf format fixes. - Replace master/slave terminology with conduit/user in DSA drivers. - Handful of KUnit tests for netdev and WiFi core. Removed: - AppleTalk COPS. - AppleTalk ipddp. - TI AR7 CPMAC Ethernet driver. Drivers: - Ethernet high-speed NICs: - Intel (100G, ice, idpf): - add a driver for the Intel E2000 IPUs - make CRC/FCS stripping configurable - cross-timestamping for E823 devices - basic support for E830 devices - use aux-bus for managing client drivers - i40e: report firmware versions via devlink - nVidia/Mellanox: - support 4-port NICs - increase max number of channels to 256 - optimize / parallelize SF creation flow - Broadcom (bnxt): - enhance NIC temperature reporting - support PAM4 speeds and lane configuration - Marvell OcteonTX2: - PTP pulse-per-second output support - enable hardware timestamping for VFs - Solarflare/AMD: - conntrack NAT offload and offload for tunnels - Wangxun (ngbe/txgbe): - expose HW statistics - Pensando/AMD: - support PCI level reset - narrow down the condition under which skbs are linearized - Netronome/Corigine (nfp): - support CHACHA20-POLY1305 crypto in IPsec offload - Ethernet NICs embedded, slower, virtual: - Synopsys (stmmac): - add Loongson-1 SoC support - enable use of HW queues with no offload capabilities - enable PPS input support on all 5 channels - increase TX coalesce timer to 5ms - RealTek USB (r8152): improve efficiency of Rx by using GRO frags - xen: support SW packet timestamping - add drivers for implementations based on TI's PRUSS (AM64x EVM) - nVidia/Mellanox Ethernet datacenter switches: - avoid poor HW resource use on Spectrum-4 by better block selection for IPv6 multicast forwarding and ordering of blocks in ACL region - Ethernet embedded switches: - Microchip: - support configuring the drive strength for EMI compliance - ksz9477: partial ACL support - ksz9477: HSR offload - ksz9477: Wake on LAN - Realtek: - rtl8366rb: respect device tree config of the CPU port - Ethernet PHYs: - support Broadcom BCM5221 PHYs - TI dp83867: support hardware LED blinking - CAN: - add support for Linux-PHY based CAN transceivers - at91_can: clean up and use rx-offload helpers - WiFi: - MediaTek (mt76): - new sub-driver for mt7925 USB/PCIe devices - HW wireless <> Ethernet bridging in MT7988 chips - mt7603/mt7628 stability improvements - Qualcomm (ath12k): - WCN7850: - enable 320 MHz channels in 6 GHz band - hardware rfkill support - enable IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS to make scan faster - read board data variant name from SMBIOS - QCN9274: mesh support - RealTek (rtw89): - TDMA-based multi-channel concurrency (MCC) - Silicon Labs (wfx): - Remain-On-Channel (ROC) support - Bluetooth: - ISO: many improvements for broadcast support - mark BCM4378/BCM4387 as BROKEN_LE_CODED - add support for QCA2066 - btmtksdio: enable Bluetooth wakeup from suspend" * tag 'net-next-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1816 commits) net: pcs: xpcs: Add 2500BASE-X case in get state for XPCS drivers net: bpf: Use sockopt_lock_sock() in ip_sock_set_tos() net: mana: Use xdp_set_features_flag instead of direct assignment vxlan: Cleanup IFLA_VXLAN_PORT_RANGE entry in vxlan_get_size() iavf: delete the iavf client interface iavf: add a common function for undoing the interrupt scheme iavf: use unregister_netdev iavf: rely on netdev's own registered state iavf: fix the waiting time for initial reset iavf: in iavf_down, don't queue watchdog_task if comms failed iavf: simplify mutex_trylock+sleep loops iavf: fix comments about old bit locks doc/netlink: Update schema to support cmd-cnt-name and cmd-max-name tools: ynl: introduce option to process unknown attributes or types ipvlan: properly track tx_errors netdevsim: Block until all devices are released nfp: using napi_build_skb() to replace build_skb() net: dsa: microchip: ksz9477: Fix spelling mistake "Enery" -> "Energy" net: dsa: microchip: Ensure Stable PME Pin State for Wake-on-LAN net: dsa: microchip: Refactor switch shutdown routine for WoL preparation ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/avf/virtchnl.h15
-rw-r--r--include/linux/bpf-cgroup-defs.h5
-rw-r--r--include/linux/bpf-cgroup.h90
-rw-r--r--include/linux/bpf.h64
-rw-r--r--include/linux/bpf_mem_alloc.h1
-rw-r--r--include/linux/bpf_verifier.h44
-rw-r--r--include/linux/brcmphy.h10
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/linux/can/dev.h4
-rw-r--r--include/linux/ceph/mon_client.h2
-rw-r--r--include/linux/cgroup.h12
-rw-r--r--include/linux/compiler_types.h32
-rw-r--r--include/linux/dpll.h170
-rw-r--r--include/linux/dsa/sja1105.h2
-rw-r--r--include/linux/ethtool.h19
-rw-r--r--include/linux/filter.h67
-rw-r--r--include/linux/fortify-string.h4
-rw-r--r--include/linux/i3c/master.h11
-rw-r--r--include/linux/ieee80211.h106
-rw-r--r--include/linux/igmp.h2
-rw-r--r--include/linux/ipv6.h50
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/linkmode.h18
-rw-r--r--include/linux/micrel_phy.h4
-rw-r--r--include/linux/mlx5/device.h3
-rw-r--r--include/linux/mlx5/driver.h20
-rw-r--r--include/linux/mlx5/fs.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h131
-rw-r--r--include/linux/mm_types.h13
-rw-r--r--include/linux/netdevice.h100
-rw-r--r--include/linux/netfilter.h10
-rw-r--r--include/linux/overflow.h35
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/pds/pds_core_if.h1
-rw-r--r--include/linux/percpu.h1
-rw-r--r--include/linux/phy.h1
-rw-r--r--include/linux/phylink.h56
-rw-r--r--include/linux/posix-clock.h35
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h76
-rw-r--r--include/linux/socket.h1
-rw-r--r--include/linux/sockptr.h23
-rw-r--r--include/linux/stmmac.h2
-rw-r--r--include/linux/tcp.h61
-rw-r--r--include/linux/trace_events.h6
-rw-r--r--include/linux/udp.h66
-rw-r--r--include/linux/virtio_vsock.h10
46 files changed, 1084 insertions, 305 deletions
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index d0807ad43f93..6b3acf15be5c 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -4,6 +4,10 @@
#ifndef _VIRTCHNL_H_
#define _VIRTCHNL_H_
+#include <linux/bitops.h>
+#include <linux/overflow.h>
+#include <uapi/linux/if_ether.h>
+
/* Description:
* This header file describes the Virtual Function (VF) - Physical Function
* (PF) communication protocol used by the drivers for all devices starting
@@ -240,6 +244,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6)
/* used to negotiate communicating link speeds in Mbps */
#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7)
+#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10)
#define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15)
#define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16)
#define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17)
@@ -295,7 +300,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info);
/* VIRTCHNL_OP_CONFIG_RX_QUEUE
* VF sends this message to set up parameters for one RX queue.
* External data buffer contains one instance of virtchnl_rxq_info.
- * PF configures requested queue and returns a status code.
+ * PF configures requested queue and returns a status code. The
+ * crc_disable flag disables CRC stripping on the VF. Setting
+ * the crc_disable flag to 1 will disable CRC stripping for each
+ * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC
+ * offload must have been set prior to sending this info or the PF
+ * will ignore the request. This flag should be set the same for
+ * all of the queues for a VF.
*/
/* Rx queue config info */
@@ -307,7 +318,7 @@ struct virtchnl_rxq_info {
u16 splithdr_enabled; /* deprecated with AVF 1.0 */
u32 databuffer_size;
u32 max_pkt_size;
- u8 pad0;
+ u8 crc_disable;
u8 rxdid;
u8 pad1[2];
u64 dma_ring_addr;
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 7b121bd780eb..0985221d5478 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type {
CGROUP_INET6_BIND,
CGROUP_INET4_CONNECT,
CGROUP_INET6_CONNECT,
+ CGROUP_UNIX_CONNECT,
CGROUP_INET4_POST_BIND,
CGROUP_INET6_POST_BIND,
CGROUP_UDP4_SENDMSG,
CGROUP_UDP6_SENDMSG,
+ CGROUP_UNIX_SENDMSG,
CGROUP_SYSCTL,
CGROUP_UDP4_RECVMSG,
CGROUP_UDP6_RECVMSG,
+ CGROUP_UNIX_RECVMSG,
CGROUP_GETSOCKOPT,
CGROUP_SETSOCKOPT,
CGROUP_INET4_GETPEERNAME,
CGROUP_INET6_GETPEERNAME,
+ CGROUP_UNIX_GETPEERNAME,
CGROUP_INET4_GETSOCKNAME,
CGROUP_INET6_GETSOCKNAME,
+ CGROUP_UNIX_GETSOCKNAME,
CGROUP_INET_SOCK_RELEASE,
CGROUP_LSM_START,
CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8506690dbb9c..98b8cea904fe 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
CGROUP_ATYPE(CGROUP_INET6_BIND);
CGROUP_ATYPE(CGROUP_INET4_CONNECT);
CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+ CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+ CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
CGROUP_ATYPE(CGROUP_SYSCTL);
CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+ CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
CGROUP_ATYPE(CGROUP_GETSOCKOPT);
CGROUP_ATYPE(CGROUP_SETSOCKOPT);
CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
default:
return CGROUP_BPF_ATTACH_TYPE_INVALID;
@@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
+ int *uaddrlen,
enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags);
@@ -230,22 +236,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, NULL); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, NULL, NULL); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- t_ctx, NULL); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, t_ctx, NULL); \
release_sock(sk); \
} \
__ret; \
@@ -256,14 +262,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
* (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
* should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
*/
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \
({ \
u32 __flags = 0; \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, &__flags); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, NULL, &__flags); \
release_sock(sk); \
if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
*bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
@@ -276,29 +282,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \
(sk)->sk_prot->pre_connect)
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT)
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
+
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
* fullsock and its parent fullsock cannot be traced by
@@ -477,24 +492,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
}
#define cgroup_bpf_enabled(atype) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49f8b691496c..b4825d3cdb29 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -55,8 +55,8 @@ struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
-extern struct bpf_mem_alloc bpf_global_ma;
-extern bool bpf_global_ma_set;
+extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
+extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -180,14 +180,15 @@ enum btf_field_type {
BPF_TIMER = (1 << 1),
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
- BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
- BPF_LIST_HEAD = (1 << 4),
- BPF_LIST_NODE = (1 << 5),
- BPF_RB_ROOT = (1 << 6),
- BPF_RB_NODE = (1 << 7),
+ BPF_KPTR_PERCPU = (1 << 4),
+ BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
+ BPF_LIST_HEAD = (1 << 5),
+ BPF_LIST_NODE = (1 << 6),
+ BPF_RB_ROOT = (1 << 7),
+ BPF_RB_NODE = (1 << 8),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
BPF_RB_NODE | BPF_RB_ROOT,
- BPF_REFCOUNT = (1 << 8),
+ BPF_REFCOUNT = (1 << 9),
};
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return "kptr";
+ case BPF_KPTR_PERCPU:
+ return "percpu_kptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
@@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
@@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
@@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_TIMER:
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
break;
default:
WARN_ON_ONCE(1);
@@ -1029,6 +1035,11 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6)
+/* Indicate that current trampoline is in a tail call context. Then, it has to
+ * cache and restore tail_call_cnt to avoid infinite tail call loop.
+ */
+#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
@@ -1378,6 +1389,7 @@ struct bpf_prog_aux {
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
+ u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
@@ -1398,6 +1410,8 @@ struct bpf_prog_aux {
bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
+ bool exception_cb;
+ bool exception_boundary;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -1420,6 +1434,7 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
+ unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -2043,6 +2058,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
@@ -2149,12 +2165,12 @@ static inline bool bpf_allow_uninit_stack(void)
static inline bool bpf_bypass_spec_v1(void)
{
- return perfmon_capable();
+ return cpu_mitigations_off() || perfmon_capable();
}
static inline bool bpf_bypass_spec_v4(void)
{
- return perfmon_capable();
+ return cpu_mitigations_off() || perfmon_capable();
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2407,9 +2423,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *reg);
+ struct bpf_reg_state *reg, bool is_ex_cb);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key);
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
@@ -2461,6 +2479,9 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
+
+bool dev_check_flush(void);
+bool cpu_map_check_flush(void);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2905,6 +2926,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+static __always_inline void
+bpf_prog_inc_misses_counters(const struct bpf_prog_array *array)
+{
+ const struct bpf_prog_array_item *item;
+ struct bpf_prog *prog;
+
+ if (unlikely(!array))
+ return;
+
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ bpf_prog_inc_misses_counter(prog);
+ item++;
+ }
+}
+
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
@@ -3183,4 +3220,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags)
return flags;
}
+static inline bool bpf_is_s