summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-13 16:27:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-13 16:27:13 -0700
commit96144c58abe7ff767e754b5b80995f7b8846d49b (patch)
tree7fcc47090ced9be71fa35cbf5e00d0160b04a2d1 /net
parentf82e7b57b5fc48199e2f26ffafe2f96f7338ad3d (diff)
parentbc139119a1708ae3db1ebb379630f286e28d06e8 (diff)
downloadlinux-96144c58abe7ff767e754b5b80995f7b8846d49b.tar.gz
linux-96144c58abe7ff767e754b5b80995f7b8846d49b.tar.bz2
linux-96144c58abe7ff767e754b5b80995f7b8846d49b.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from David Miller: 1) Fix cfg80211 deadlock, from Johannes Berg. 2) RXRPC fails to send norigications, from David Howells. 3) MPTCP RM_ADDR parsing has an off by one pointer error, fix from Geliang Tang. 4) Fix crash when using MSG_PEEK with sockmap, from Anny Hu. 5) The ucc_geth driver needs __netdev_watchdog_up exported, from Valentin Longchamp. 6) Fix hashtable memory leak in dccp, from Wang Hai. 7) Fix how nexthops are marked as FDB nexthops, from David Ahern. 8) Fix mptcp races between shutdown and recvmsg, from Paolo Abeni. 9) Fix crashes in tipc_disc_rcv(), from Tuong Lien. 10) Fix link speed reporting in iavf driver, from Brett Creeley. 11) When a channel is used for XSK and then reused again later for XSK, we forget to clear out the relevant data structures in mlx5 which causes all kinds of problems. Fix from Maxim Mikityanskiy. 12) Fix memory leak in genetlink, from Cong Wang. 13) Disallow sockmap attachments to UDP sockets, it simply won't work. From Lorenz Bauer. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (83 commits) net: ethernet: ti: ale: fix allmulti for nu type ale net: ethernet: ti: am65-cpsw-nuss: fix ale parameters init net: atm: Remove the error message according to the atomic context bpf: Undo internal BPF_PROBE_MEM in BPF insns dump libbpf: Support pre-initializing .bss global variables tools/bpftool: Fix skeleton codegen bpf: Fix memlock accounting for sock_hash bpf: sockmap: Don't attach programs to UDP sockets bpf: tcp: Recv() should return 0 when the peer socket is closed ibmvnic: Flush existing work items before device removal genetlink: clean up family attributes allocations net: ipa: header pad field only valid for AP->modem endpoint net: ipa: program upper nibbles of sequencer type net: ipa: fix modem LAN RX endpoint id net: ipa: program metadata mask differently ionic: add pcie_print_link_status rxrpc: Fix race between incoming ACK parser and retransmitter net/mlx5: E-Switch, Fix some error pointer dereferences net/mlx5: Don't fail driver on failure to create debugfs net/mlx5e: CT: Fix ipv6 nat header rewrite actions ...
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c8
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/core/dev.c30
-rw-r--r--net/core/dev_addr_lists.c12
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/core/sock_map.c38
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/ipv4/nexthop.c82
-rw-r--r--net/ipv4/tcp.c70
-rw-r--r--net/ipv4/tcp_bpf.c6
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mptcp/options.c2
-rw-r--r--net/mptcp/protocol.c45
-rw-r--r--net/mptcp/subflow.c1
-rw-r--r--net/netlink/genetlink.c28
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/ar-internal.h119
-rw-r--r--net/rxrpc/call_event.c30
-rw-r--r--net/rxrpc/conn_event.c7
-rw-r--r--net/rxrpc/input.c7
-rw-r--r--net/rxrpc/peer_event.c4
-rw-r--r--net/rxrpc/recvmsg.c79
-rw-r--r--net/rxrpc/sendmsg.c4
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/msg.c4
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/wireless/Kconfig2
-rw-r--r--net/wireless/core.c6
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c26
-rw-r--r--net/xdp/xsk.c4
38 files changed, 409 insertions, 266 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f00bb57f0f60..c8d6a07e23c5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -494,6 +494,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
* separate class since they always nest.
*/
static struct lock_class_key vlan_netdev_xmit_lock_key;
+static struct lock_class_key vlan_netdev_addr_lock_key;
static void vlan_dev_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -502,8 +503,11 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev,
lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key);
}
-static void vlan_dev_set_lockdep_class(struct net_device *dev)
+static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
{
+ lockdep_set_class_and_subclass(&dev->addr_list_lock,
+ &vlan_netdev_addr_lock_key,
+ subclass);
netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
}
@@ -597,7 +601,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- vlan_dev_set_lockdep_class(dev);
+ vlan_dev_set_lockdep_class(dev, dev->lower_level);
vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->vlan_pcpu_stats)
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ca37f5a71f5e..875fc0bc1780 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1536,10 +1536,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
struct lec_arp_table *to_return;
to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
- if (!to_return) {
- pr_info("LEC: Arp entry kmalloc failed\n");
+ if (!to_return)
return NULL;
- }
ether_addr_copy(to_return->mac_addr, mac_addr);
INIT_HLIST_NODE(&to_return->next);
timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0ddd80130ea3..f1f1c86f3419 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -745,6 +745,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
* separate class since they always nest.
*/
static struct lock_class_key batadv_netdev_xmit_lock_key;
+static struct lock_class_key batadv_netdev_addr_lock_key;
/**
* batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
@@ -765,6 +766,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev,
*/
static void batadv_set_lockdep_class(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8ec1362588af..8c7b78f8bc23 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -105,6 +105,13 @@ out:
return NETDEV_TX_OK;
}
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -143,6 +150,7 @@ static int br_dev_init(struct net_device *dev)
br_fdb_hash_fini(br);
}
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 061496a1f640..6bc2388141f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -439,6 +439,7 @@ static const char *const netdev_lock_name[] = {
"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
@@ -460,11 +461,25 @@ static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
netdev_lock_name[i]);
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+ int i;
+
+ i = netdev_lock_pos(dev->type);
+ lockdep_set_class_and_name(&dev->addr_list_lock,
+ &netdev_addr_lock_key[i],
+ netdev_lock_name[i]);
+}
#else
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
unsigned short dev_type)
{
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+}
#endif
/*******************************************************************************
@@ -9373,15 +9388,6 @@ void netif_tx_stop_all_queues(struct net_device *dev)
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
-void netdev_update_lockdep_key(struct net_device *dev)
-{
- lockdep_unregister_key(&dev->addr_list_lock_key);
- lockdep_register_key(&dev->addr_list_lock_key);
-
- lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
-}
-EXPORT_SYMBOL(netdev_update_lockdep_key);
-
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -9420,7 +9426,7 @@ int register_netdevice(struct net_device *dev)
return ret;
spin_lock_init(&dev->addr_list_lock);
- lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+ netdev_set_addr_lockdep_class(dev);
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
@@ -9939,8 +9945,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- lockdep_register_key(&dev->addr_list_lock_key);
-
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
@@ -10028,8 +10032,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
- lockdep_unregister_key(&dev->addr_list_lock_key);
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 2f949b5a1eb9..6393ba930097 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
return;
netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
@@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
return;
netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
diff --git a/net/core/filter.c b/net/core/filter.c
index 209482a4eaa2..73395384afe2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
u8 *end = skb_tail_pointer(skb);
- u8 *net = skb_network_header(skb);
- u8 *mac = skb_mac_header(skb);
- u8 *ptr;
+ u8 *start, *ptr;
- if (unlikely(offset > 0xffff || len > (end - mac)))
+ if (unlikely(offset > 0xffff))
goto err_clear;
switch (start_header) {
case BPF_HDR_START_MAC:
- ptr = mac + offset;
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ goto err_clear;
+ start = skb_mac_header(skb);
break;
case BPF_HDR_START_NET:
- ptr = net + offset;
+ start = skb_network_header(skb);
break;
default:
goto err_clear;
}
- if (likely(ptr >= mac && ptr + len <= end)) {
+ ptr = start + offset;
+
+ if (likely(ptr + len <= end)) {
memcpy(to, ptr, len);
return 0;
}
@@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
break;
case SO_BINDTODEVICE:
- ret = -ENOPROTOOPT;
-#ifdef CONFIG_NETDEVICES
optlen = min_t(long, optlen, IFNAMSIZ - 1);
strncpy(devname, optval, optlen);
devname[optlen] = 0;
@@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
dev_put(dev);
}
ret = sock_bindtoindex(sk, ifindex, false);
-#endif
break;
default:
ret = -EINVAL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2269199c5891..9aedc15736ad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2462,7 +2462,6 @@ static int do_set_master(struct net_device *dev, int ifindex,
err = ops->ndo_del_slave(upper_dev, dev);
if (err)
return err;
- netdev_update_lockdep_key(dev);
} else {
return -EOPNOTSUPP;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 00a26cf2cfe9..4059f94e9bb5 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -424,10 +424,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
-static bool sock_map_redirect_allowed(const struct sock *sk)
-{
- return sk->sk_state != TCP_LISTEN;
-}
+static bool sock_map_redirect_allowed(const struct sock *sk);
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
@@ -508,6 +505,11 @@ static bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}
+static bool sock_map_redirect_allowed(const struct sock *sk)
+{
+ return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
+}
+
static bool sock_map_sk_is_suitable(const struct sock *sk)
{
return sk_is_tcp(sk) || sk_is_udp(sk);
@@ -989,11 +991,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
err = -EINVAL;
goto free_htab;
}
+ err = bpf_map_charge_init(&htab->map.memory, cost);
+ if (err)
+ goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_htab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
+ bpf_map_charge_finish(&htab->map.memory);
err = -ENOMEM;
goto free_htab;
}
@@ -1013,6 +1019,7 @@ static void sock_hash_free(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct bpf_htab_bucket *bucket;
+ struct hlist_head unlink_list;
struct bpf_htab_elem *elem;
struct hlist_node *node;
int i;
@@ -1024,13 +1031,32 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
- hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
- hlist_del_rcu(&elem->node);
+
+ /* We are racing with sock_hash_delete_from_link to
+ * enter the spin-lock critical section. Every socket on
+ * the list is still linked to sockhash. Since link
+ * exists, psock exists and holds a ref to socket. That
+ * lets us to grab a socket ref too.
+ */
+ raw_spin_lock_bh(&bucket->lock);
+ hlist_for_each_entry(elem, &bucket->head, node)
+ sock_hold(elem->sk);
+ hlist_move_list(&bucket->head, &unlink_list);
+ raw_spin_unlock_bh(&bucket->lock);
+
+ /* Process removed entries out of atomic context to
+ * block for socket lock before deleting the psock's
+ * link to sockhash.
+ */
+ hlist_for_each_entry_safe(elem, node, &unlink_list, node) {
+ hlist_del(&elem->node);
lock_sock(elem->sk);
rcu_read_lock();
sock_map_unref(elem->sk, elem);
rcu_read_unlock();
release_sock(elem->sk);
+ sock_put(elem->sk);
+ sock_hash_free_elem(htab, elem);
}
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 4af8a98fe784..c13b6609474b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1139,14 +1139,14 @@ static int __init dccp_init(void)
inet_hashinfo_init(&dccp_hashinfo);
rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
if (rc)
- goto out_fail;
+ goto out_free_percpu;
rc = -ENOBUFS;
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out_free_percpu;
+ goto out_free_hashinfo2;
/*
* Size and allocate the main established and bind bucket
@@ -1242,6 +1242,8 @@ out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+out_free_hashinfo2:
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
out_free_percpu:
percpu_counter_destroy(&dccp_orphan_count);
out_fail:
@@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void)
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_ackvec_exit();
dccp_sysctl_exit();
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
percpu_counter_destroy(&dccp_orphan_count);
}
diff --git a/net/dsa/master.c b/net/dsa/master.c
index a621367c6e8c..480a61460c23 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -327,6 +327,8 @@ static void dsa_master_reset_mtu(struct net_device *dev)
rtnl_unlock();
}
+static struct lock_class_key dsa_master_addr_list_lock_key;
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int ret;
@@ -345,6 +347,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
+ lockdep_set_class(&dev->addr_list_lock,
+ &dsa_master_addr_list_lock_key);
ret = dsa_master_ethtool_setup(dev);
if (ret)
return ret;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 400a9f89ebdb..cc8049b100b2 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -247,12 +247,11 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_u32(skb, NHA_ID, nh->id))
goto nla_put_failure;
- if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
- goto nla_put_failure;
-
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
if (nla_put_nh_group(skb, nhg))
goto nla_put_failure;
goto out;
@@ -264,7 +263,10 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_flag(skb, NHA_BLACKHOLE))
goto nla_put_failure;
goto out;
- } else if (!nh->is_fdb_nh) {
+ } else if (nhi->fdb_nh) {
+ if (nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
+ } else {
const struct net_device *dev;
dev = nhi->fib_nhc.nhc_dev;
@@ -385,7 +387,7 @@ errout:
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
- struct netlink_ext_ack *extack)
+ bool *is_fdb, struct netlink_ext_ack *extack)
{
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
@@ -398,6 +400,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
"Multipath group can not be a nexthop within a group");
return false;
}
+ *is_fdb = nhg->fdb_nh;
} else {
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
@@ -406,6 +409,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
"Blackhole nexthop can not be used in a group with more than 1 path");
return false;
}
+ *is_fdb = nhi->fdb_nh;
}
return true;
@@ -416,12 +420,13 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
{
struct nh_info *nhi;
- if (!nh->is_fdb_nh) {
+ nhi = rtnl_dereference(nh->nh_info);
+
+ if (!nhi->fdb_nh) {
NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
return -EINVAL;
}
- nhi = rtnl_dereference(nh->nh_info);
if (*nh_family == AF_UNSPEC) {
*nh_family = nhi->family;
} else if (*nh_family != nhi->family) {
@@ -473,19 +478,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
struct nexthop *nh;
+ bool is_fdb_nh;
nh = nexthop_find_by_id(net, nhg[i].id);
if (!nh) {
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
return -EINVAL;
}
- if (!valid_group_nh(nh, len, extack))
+ if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
return -EINVAL;
if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
return -EINVAL;
- if (!nhg_fdb && nh->is_fdb_nh) {
+ if (!nhg_fdb && is_fdb_nh) {
NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
return -EINVAL;
}
@@ -553,13 +559,13 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
if (hash > atomic_read(&nhge->upper_bound))
continue;
- if (nhge->nh->is_fdb_nh)
+ nhi = rcu_dereference(nhge->nh->nh_info);
+ if (nhi->fdb_nh)
return nhge->nh;
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- nhi = rcu_dereference(nhge->nh->nh_info);
switch (nhi->family) {
case AF_INET:
if (ipv4_good_nh(&nhi->fib_nh))
@@ -624,11 +630,7 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct nh_info *nhi;
-
- if (nh->is_fdb_nh) {
- NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
- return -EINVAL;
- }
+ bool is_fdb_nh;
/* fib6_src is unique to a fib6_info and limits the ability to cache
* routes in fib6_nh within a nexthop that is potentially shared
@@ -645,10 +647,17 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
nhg = rtnl_dereference(nh->nh_grp);
if (nhg->has_v4)
goto no_v4_nh;
+ is_fdb_nh = nhg->fdb_nh;
} else {
nhi = rtnl_dereference(nh->nh_info);
if (nhi->family == AF_INET)
goto no_v4_nh;
+ is_fdb_nh = nhi->fdb_nh;
+ }
+
+ if (is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ return -EINVAL;
}
return 0;
@@ -677,12 +686,9 @@ static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
return fib6_check_nexthop(new, NULL, extack);
}
-static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
struct netlink_ext_ack *extack)
{
- struct nh_info *nhi;
-
- nhi = rtnl_dereference(nh->nh_info);
if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
NL_SET_ERR_MSG(extack,
"Route with host scope can not have a gateway");
@@ -704,29 +710,38 @@ static int nexthop_check_scope(struct nexthop *nh, u8 scope,
int fib_check_nexthop(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
+ struct nh_info *nhi;
int err = 0;
- if (nh->is_fdb_nh) {
- NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
- err = -EINVAL;
- goto out;
- }
-
if (nh->is_group) {
struct nh_group *nhg;
+ nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+
if (scope == RT_SCOPE_HOST) {
NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
err = -EINVAL;
goto out;
}
- nhg = rtnl_dereference(nh->nh_grp);
/* all nexthops in a group have the same scope */
- err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+ nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
+ err = nexthop_check_scope(nhi, scope, extack);
} else {
- err = nexthop_check_scope(nh, scope, extack);
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+ err = nexthop_check_scope(nhi, scope, extack);
}
+
out:
return err;
}
@@ -787,6 +802,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = nhg->has_v4;
newg->mpath = nhg->mpath;
+ newg->fdb_nh = nhg->fdb_nh;
newg->num_nh = nhg->num_nh;
/* copy old entries to new except the one getting removed */
@@ -1216,7 +1232,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
}
if (cfg->nh_fdb)
- nh->is_fdb_nh = 1;
+ nhg->fdb_nh = 1;
rcu_assign_pointer(nh->nh_grp, nhg);
@@ -1255,7 +1271,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
goto out;
}
- if (nh->is_fdb_nh)
+ if (nhi->fdb_nh)
goto out;
/* sets nh_dev if successful */
@@ -1326,7 +1342,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
if (cfg->nh_fdb)
- nh->is_fdb_nh = 1;
+ nhi->fdb_nh = 1;
if (cfg->nh_blackhole) {
nhi->reject_nh = 1;
@@ -1349,7 +1365,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
}
/* add the entry to the device based hash */
- if (!nh->is_fdb_nh)
+ if (!nhi->fdb_nh)
nexthop_devhash_add(net, nhi);
rcu_assign_pointer(nh->nh_info, nhi);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27716e4932bc..810cc164f795 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock,
}
EXPORT_SYMBOL(tcp_mmap);
+static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
+ struct page **pages,
+ unsigned long pages_to_map,
+ unsigned long *insert_ad