summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 11:33:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 11:33:01 -0700
commitbd6c11bc43c496cddfc6cf603b5d45365606dbd5 (patch)
tree36318fa68f784d397111991177d65bd6325189c4 /net/ipv6
parent68cf01760bc0891074e813b9bb06d2696cac1c01 (diff)
parentc873512ef3a39cc1a605b7a5ff2ad0a33d619aa8 (diff)
downloadlinux-bd6c11bc43c496cddfc6cf603b5d45365606dbd5.tar.gz
linux-bd6c11bc43c496cddfc6cf603b5d45365606dbd5.tar.bz2
linux-bd6c11bc43c496cddfc6cf603b5d45365606dbd5.zip
Merge tag 'net-next-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Increase size limits for to-be-sent skb frag allocations. This allows tun, tap devices and packet sockets to better cope with large writes operations - Store netdevs in an xarray, to simplify iterating over netdevs - Refactor nexthop selection for multipath routes - Improve sched class lifetime handling - Add backup nexthop ID support for bridge - Implement drop reasons support in openvswitch - Several data races annotations and fixes - Constify the sk parameter of routing functions - Prepend kernel version to netconsole message Protocols: - Implement support for TCP probing the peer being under memory pressure - Remove hard coded limitation on IPv6 specific info placement inside the socket struct - Get rid of sysctl_tcp_adv_win_scale and use an auto-estimated per socket scaling factor - Scaling-up the IPv6 expired route GC via a separated list of expiring routes - In-kernel support for the TLS alert protocol - Better support for UDP reuseport with connected sockets - Add NEXT-C-SID support for SRv6 End.X behavior, reducing the SR header size - Get rid of additional ancillary per MPTCP connection struct socket - Implement support for BPF-based MPTCP packet schedulers - Format MPTCP subtests selftests results in TAP - Several new SMC 2.1 features including unique experimental options, max connections per lgr negotiation, max links per lgr negotiation BPF: - Multi-buffer support in AF_XDP - Add multi uprobe BPF links for attaching multiple uprobes and usdt probes, which is significantly faster and saves extra fds - Implement an fd-based tc BPF attach API (TCX) and BPF link support on top of it - Add SO_REUSEPORT support for TC bpf_sk_assign - Support new instructions from cpu v4 to simplify the generated code and feature completeness, for x86, arm64, riscv64 - Support defragmenting IPv(4|6) packets in BPF - Teach verifier actual bounds of bpf_get_smp_processor_id() and fix perf+libbpf issue related to custom section handling - Introduce bpf map element count and enable it for all program types - Add a BPF hook in sys_socket() to change the protocol ID from IPPROTO_TCP to IPPROTO_MPTCP to cover migration for legacy - Introduce bpf_me_mcache_free_rcu() and fix OOM under stress - Add uprobe support for the bpf_get_func_ip helper - Check skb ownership against full socket - Support for up to 12 arguments in BPF trampoline - Extend link_info for kprobe_multi and perf_event links Netfilter: - Speed-up process exit by aborting ruleset validation if a fatal signal is pending - Allow NLA_POLICY_MASK to be used with BE16/BE32 types Driver API: - Page pool optimizations, to improve data locality and cache usage - Introduce ndo_hwtstamp_get() and ndo_hwtstamp_set() to avoid the need for raw ioctl() handling in drivers - Simplify genetlink dump operations (doit/dumpit) providing them the common information already populated in struct genl_info - Extend and use the yaml devlink specs to [re]generate the split ops - Introduce devlink selective dumps, to allow SF filtering SF based on handle and other attributes - Add yaml netlink spec for netlink-raw families, allow route, link and address related queries via the ynl tool - Remove phylink legacy mode support - Support offload LED blinking to phy - Add devlink port function attributes for IPsec New hardware / drivers: - Ethernet: - Broadcom ASP 2.0 (72165) ethernet controller - MediaTek MT7988 SoC - Texas Instruments AM654 SoC - Texas Instruments IEP driver - Atheros qca8081 phy - Marvell 88Q2110 phy - NXP TJA1120 phy - WiFi: - MediaTek mt7981 support - Can: - Kvaser SmartFusion2 PCI Express devices - Allwinner T113 controllers - Texas Instruments tcan4552/4553 chips - Bluetooth: - Intel Gale Peak - Qualcomm WCN3988 and WCN7850 - NXP AW693 and IW624 - Mediatek MT2925 Drivers: - Ethernet NICs: - nVidia/Mellanox: - mlx5: - support UDP encapsulation in packet offload mode - IPsec packet offload support in eswitch mode - improve aRFS observability by adding new set of counters - extends MACsec offload support to cover RoCE traffic - dynamic completion EQs - mlx4: - convert to use auxiliary bus instead of custom interface logic - Intel - ice: - implement switchdev bridge offload, even for LAG interfaces - implement SRIOV support for LAG interfaces - igc: - add support for multiple in-flight TX timestamps - Broadcom: - bnxt: - use the unified RX page pool buffers for XDP and non-XDP - use the NAPI skb allocation cache - OcteonTX2: - support Round Robin scheduling HTB offload - TC flower offload support for SPI field - Freescale: - add XDP_TX feature support - AMD: - ionic: add support for PCI FLR event - sfc: - basic conntrack offload - introduce eth, ipv4 and ipv6 pedit offloads - ST Microelectronics: - stmmac: maximze PTP timestamping resolution - Virtual NICs: - Microsoft vNIC: - batch ringing RX queue doorbell on receiving packets - add page pool for RX buffers - Virtio vNIC: - add per queue interrupt coalescing support - Google vNIC: - add queue-page-list mode support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add port range matching tc-flower offload - permit enslavement to netdevices with uppers - Ethernet embedded switches: - Marvell (mv88e6xxx): - convert to phylink_pcs - Renesas: - r8A779fx: add speed change support - rzn1: enables vlan support - Ethernet PHYs: - convert mv88e6xxx to phylink_pcs - WiFi: - Qualcomm Wi-Fi 7 (ath12k): - extremely High Throughput (EHT) PHY support - RealTek (rtl8xxxu): - enable AP mode for: RTL8192FU, RTL8710BU (RTL8188GU), RTL8192EU and RTL8723BU - RealTek (rtw89): - Introduce Time Averaged SAR (TAS) support - Connector: - support for event filtering" * tag 'net-next-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1806 commits) net: ethernet: mtk_wed: minor change in wed_{tx,rx}info_show net: ethernet: mtk_wed: add some more info in wed_txinfo_show handler net: stmmac: clarify difference between "interface" and "phy_interface" r8152: add vendor/device ID pair for D-Link DUB-E250 devlink: move devlink_notify_register/unregister() to dev.c devlink: move small_ops definition into netlink.c devlink: move tracepoint definitions into core.c devlink: push linecard related code into separate file devlink: push rate related code into separate file devlink: push trap related code into separate file devlink: use tracepoint_enabled() helper devlink: push region related code into separate file devlink: push param related code into separate file devlink: push resource related code into separate file devlink: push dpipe related code into separate file devlink: move and rename devlink_dpipe_send_and_alloc_skb() helper devlink: push shared buffer related code into separate file devlink: push port related code into separate file devlink: push object register/unregister notifications into separate helpers inet: fix IP_TRANSPARENT error handling ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c90
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/datagram.c9
-rw-r--r--net/ipv6/exthdrs.c7
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/inet6_hashtables.c69
-rw-r--r--net/ipv6/ip6_fib.c55
-rw-r--r--net/ipv6/ip6_output.c18
-rw-r--r--net/ipv6/ipv6_sockglue.c22
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c17
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c11
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c17
-rw-r--r--net/ipv6/route.c23
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/seg6_local.c108
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c99
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_policy.c6
24 files changed, 382 insertions, 215 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 94cec2075eee..47d1dd8501b7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -1061,20 +1063,28 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
struct fib6_info *f6i = NULL;
int err = 0;
- if (addr_type == IPV6_ADDR_ANY ||
- (addr_type & IPV6_ADDR_MULTICAST &&
- !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
- (!(idev->dev->flags & IFF_LOOPBACK) &&
- !netif_is_l3_master(idev->dev) &&
- addr_type & IPV6_ADDR_LOOPBACK))
+ if (addr_type == IPV6_ADDR_ANY) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid address");
return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (addr_type & IPV6_ADDR_MULTICAST &&
+ !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign multicast address without \"IFA_F_MCAUTOJOIN\" flag");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (!(idev->dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(idev->dev) &&
+ addr_type & IPV6_ADDR_LOOPBACK) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign loopback address on this device");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ }
if (idev->dead) {
- err = -ENODEV; /*XXX*/
+ NL_SET_ERR_MSG_MOD(extack, "device is going away");
+ err = -ENODEV;
goto out;
}
if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
err = -EACCES;
goto out;
}
@@ -1101,7 +1111,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags);
+ f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags, extack);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
f6i = NULL;
@@ -2731,6 +2741,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
return;
}
+ if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft)
+ goto put;
+
/*
* Two things going on here:
* 1) Add routes for on-link prefixes
@@ -2925,30 +2938,40 @@ static int inet6_addr_add(struct net *net, int ifindex,
ASSERT_RTNL();
- if (cfg->plen > 128)
+ if (cfg->plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
/* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
+ if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
return -EINVAL;
+ }
- if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64)
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
+ NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return -ENODEV;
idev = addrconf_add_dev(dev);
- if (IS_ERR(idev))
+ if (IS_ERR(idev)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return PTR_ERR(idev);
+ }
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
true, cfg->pfx, ifindex);
- if (ret < 0)
+ if (ret < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
return ret;
+ }
}
cfg->scope = ipv6_addr_scope(cfg->pfx);
@@ -3005,22 +3028,29 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
- const struct in6_addr *pfx, unsigned int plen)
+ const struct in6_addr *pfx, unsigned int plen,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct net_device *dev;
- if (plen > 128)
+ if (plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
idev = __in6_dev_get(dev);
- if (!idev)
+ if (!idev) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
+ }
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
@@ -3043,6 +3073,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
}
}
read_unlock_bh(&idev->lock);
+
+ NL_SET_ERR_MSG_MOD(extack, "address not found");
return -EADDRNOTAVAIL;
}
@@ -3085,7 +3117,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
- ireq.ifr6_prefixlen);
+ ireq.ifr6_prefixlen, NULL);
rtnl_unlock();
return err;
}
@@ -3488,7 +3520,7 @@ static int fixup_permanent_addr(struct net *net,
struct fib6_info *f6i, *prev;
f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
- GFP_ATOMIC);
+ GFP_ATOMIC, NULL);
if (IS_ERR(f6i))
return PTR_ERR(f6i);
@@ -4698,7 +4730,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa_flags &= IFA_F_MANAGETEMPADDR;
return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen);
+ ifm->ifa_prefixlen, extack);
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
@@ -4903,8 +4935,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
dev = __dev_get_by_index(net, ifm->ifa_index);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
if (tb[IFA_FLAGS])
cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
@@ -4939,10 +4973,12 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
- else
+ } else {
err = inet6_addr_modify(net, ifa, &cfg);
+ }
in6_ifa_put(ifa);
@@ -5602,6 +5638,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
}
static inline size_t inet6_ifla6_size(void)
@@ -6796,6 +6833,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_lft",
+ .data = &ipv6_devconf.accept_ra_min_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5d593ddc0347..368824fe9719 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
@@ -200,12 +200,12 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
sk->sk_destruct = inet6_sock_destruct;
@@ -229,7 +229,7 @@ lookup_protocol:
*/
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
inet->mc_index = 0;
RCU_INIT_POINTER(inet->mc_list, NULL);
@@ -399,7 +399,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -435,10 +435,8 @@ out_unlock:
goto out;
}
-/* bind for INET6 API */
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
const struct proto *prot;
int err = 0;
@@ -462,6 +460,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet6_bind(sk, uaddr, addr_len, flags);
}
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet6_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index dacdea7fcb62..bb17f484ee2c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -305,7 +305,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
}
net = dev_net(idev->dev);
- f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+ f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
goto out;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9b6818453afe..41ebc4e57473 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
+ const struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
int oif = sk->sk_bound_dev_if;
memset(fl6, 0, sizeof(*fl6));
@@ -523,7 +524,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
} else {
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin->sin6_addr);
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 202fc3aaa83c..4952ae792450 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -612,8 +612,6 @@ looped_back:
kfree(buf);
- skb_dst_drop(skb);
-
ip6_route_input(skb);
if (skb_dst(skb)->error) {
@@ -650,7 +648,6 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(skb->dev);
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
- struct in6_addr daddr;
int n, i;
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
@@ -798,9 +795,7 @@ looped_back:
return -1;
}
- daddr = *addr;
- *addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = daddr;
+ swap(*addr, ipv6_hdr(skb)->daddr);
ip6_route_input(skb);
if (skb_dst(skb)->error) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 65fa5014bc85..6d88f5248c1f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1034,11 +1034,9 @@ drop_no_count:
return 0;
}
-void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
- u8 type,
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif)
+ const struct in6_addr *daddr, int oif)
{
memset(fl6, 0, sizeof(*fl6));
fl6->saddr = *saddr;
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 3faf62530d6a..69caed07315f 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index bee45dfeb187..67e8c9440977 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -5,7 +5,6 @@
#include <linux/rhashtable.h>
#include <linux/vmalloc.h>
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b64b49012655..b0e8d278e8a9 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -39,6 +39,7 @@ u32 inet6_ehashfn(const struct net *net,
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet6_ehashfn);
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
@@ -111,22 +112,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned short hnum)
+/**
+ * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
@@ -143,8 +162,8 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet6_ehashfn);
if (result)
return result;
@@ -156,30 +175,30 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet6_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum, const int dif)
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
@@ -193,9 +212,11 @@ struct sock *inet6_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet6_ehashfn);
if (result)
goto done;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bac768d36cc1..28b01a068412 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -1057,6 +1060,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires_locked(rt);
}
/*
@@ -1118,9 +1123,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES))
- fib6_clean_expires(iter);
+ fib6_clean_expires_locked(iter);
else
- fib6_set_expires(iter, rt->expires);
+ fib6_set_expires_locked(iter,
+ rt->expires);
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1479,6 +1485,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (fib6_has_expires(rt))
+ hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -2285,9 +2295,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2295,7 +2304,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
* Routes are expired even if they are in use.
*/
- if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
+ if (fib6_has_expires(rt) && rt->expires) {
if (time_after(now, rt->expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
@@ -2312,6 +2321,40 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+ fib6_gc_table(net, table, gc_args);
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2327,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e8c90e97608..0665e8b09968 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -113,7 +113,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -1591,7 +1591,7 @@ emsgsize:
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet_sk(sk)->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1693,7 +1693,10 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
copy = datalen - transhdrlen - fraggap - pagedlen;
- if (copy < 0) {
+ /* [!] N