summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/datagram.c209
-rw-r--r--net/core/dev.c1304
-rw-r--r--net/core/dev_addr_lists.c100
-rw-r--r--net/core/dev_ioctl.c18
-rw-r--r--net/core/devlink.c1495
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c278
-rw-r--r--net/core/failover.c315
-rw-r--r--net/core/fib_rules.c606
-rw-r--r--net/core/filter.c3218
-rw-r--r--net/core/flow_dissector.c245
-rw-r--r--net/core/gen_estimator.c21
-rw-r--r--net/core/gen_stats.c89
-rw-r--r--net/core/gro_cells.c1
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/lwt_bpf.c4
-rw-r--r--net/core/neighbour.c692
-rw-r--r--net/core/net-procfs.c65
-rw-r--r--net/core/net-sysfs.c161
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/net_namespace.c204
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/netpoll.c67
-rw-r--r--net/core/page_pool.c317
-rw-r--r--net/core/pktgen.c17
-rw-r--r--net/core/rtnetlink.c1019
-rw-r--r--net/core/secure_seq.c1
-rw-r--r--net/core/skbuff.c449
-rw-r--r--net/core/skmsg.c822
-rw-r--r--net/core/sock.c237
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sock_map.c1003
-rw-r--r--net/core/sock_reuseport.c93
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c24
-rw-r--r--net/core/utils.c2
-rw-r--r--net/core/xdp.c376
38 files changed, 11174 insertions, 2295 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 6dbbba8c57ae..fccd31e0e7f7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -14,7 +14,9 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
fib_notifier.o xdp.o
obj-y += net-sysfs.o
+obj-$(CONFIG_PAGE_POOL) += page_pool.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
+obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
@@ -26,7 +28,9 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
+obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
+obj-$(CONFIG_FAILOVER) += failover.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9938952c5c78..b2651bb6d2a3 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -408,27 +408,20 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
}
EXPORT_SYMBOL(skb_kill_datagram);
-/**
- * skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
- * @skb: buffer to copy
- * @offset: offset in the buffer to start copying from
- * @to: iovec iterator to copy to
- * @len: amount of data to copy from buffer to iovec
- */
-int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
- struct iov_iter *to, int len)
+int __skb_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len, bool fault_short,
+ size_t (*cb)(const void *, size_t, void *, struct iov_iter *),
+ void *data)
{
int start = skb_headlen(skb);
int i, copy = start - offset, start_off = offset, n;
struct sk_buff *frag_iter;
- trace_skb_copy_datagram_iovec(skb, len);
-
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
- n = copy_to_iter(skb->data + offset, copy, to);
+ n = cb(skb->data + offset, copy, data, to);
offset += n;
if (n != copy)
goto short_copy;
@@ -445,11 +438,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
+ struct page *page = skb_frag_page(frag);
+ u8 *vaddr = kmap(page);
+
if (copy > len)
copy = len;
- n = copy_page_to_iter(skb_frag_page(frag),
- frag->page_offset + offset -
- start, copy, to);
+ n = cb(vaddr + frag->page_offset +
+ offset - start, copy, data, to);
+ kunmap(page);
offset += n;
if (n != copy)
goto short_copy;
@@ -468,8 +464,8 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- if (skb_copy_datagram_iter(frag_iter, offset - start,
- to, copy))
+ if (__skb_datagram_iter(frag_iter, offset - start,
+ to, copy, fault_short, cb, data))
goto fault;
if ((len -= copy) == 0)
return 0;
@@ -490,11 +486,50 @@ fault:
return -EFAULT;
short_copy:
- if (iov_iter_count(to))
+ if (fault_short || iov_iter_count(to))
goto fault;
return 0;
}
+
+/**
+ * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
+ * and update a hash.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: iovec iterator to copy to
+ * @len: amount of data to copy from buffer to iovec
+ * @hash: hash request to update
+ */
+int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len,
+ struct ahash_request *hash)
+{
+ return __skb_datagram_iter(skb, offset, to, len, true,
+ hash_and_copy_to_iter, hash);
+}
+EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
+
+static size_t simple_copy_to_iter(const void *addr, size_t bytes,
+ void *data __always_unused, struct iov_iter *i)
+{
+ return copy_to_iter(addr, bytes, i);
+}
+
+/**
+ * skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: iovec iterator to copy to
+ * @len: amount of data to copy from buffer to iovec
+ */
+int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len)
+{
+ trace_skb_copy_datagram_iovec(skb, len);
+ return __skb_datagram_iter(skb, offset, to, len, false,
+ simple_copy_to_iter, NULL);
+}
EXPORT_SYMBOL(skb_copy_datagram_iter);
/**
@@ -645,132 +680,23 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
+/**
+ * skb_copy_and_csum_datagram_iter - Copy datagram to an iovec iterator
+ * and update a checksum.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: iovec iterator to copy to
+ * @len: amount of data to copy from buffer to iovec
+ * @csump: checksum pointer
+ */
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len,
__wsum *csump)
{
- int start = skb_headlen(skb);
- int i, copy = start - offset, start_off = offset;
- struct sk_buff *frag_iter;
- int pos = 0;
- int n;
-
- /* Copy header. */
- if (copy > 0) {
- if (copy > len)
- copy = len;
- n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
- offset += n;
- if (n != copy)
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- pos = copy;
- }
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
- const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- WARN_ON(start > offset + len);
-
- end = start + skb_frag_size(frag);
- if ((copy = end - offset) > 0) {
- __wsum csum2 = 0;
- struct page *page = skb_frag_page(frag);
- u8 *vaddr = kmap(page);
-
- if (copy > len)
- copy = len;
- n = csum_and_copy_to_iter(vaddr + frag->page_offset +
- offset - start, copy,
- &csum2, to);
- kunmap(page);
- offset += n;
- if (n != copy)
- goto fault;
- *csump = csum_block_add(*csump, csum2, pos);
- if (!(len -= copy))
- return 0;
- pos += copy;
- }
- start = end;
- }
-
- skb_walk_frags(skb, frag_iter) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + frag_iter->len;
- if ((copy = end - offset) > 0) {
- __wsum csum2 = 0;
- if (copy > len)
- copy = len;
- if (skb_copy_and_csum_datagram(frag_iter,
- offset - start,
- to, copy,
- &csum2))
- goto fault;
- *csump = csum_block_add(*csump, csum2, pos);
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- pos += copy;
- }
- start = end;
- }
- if (!len)
- return 0;
-
-fault:
- iov_iter_revert(to, offset - start_off);
- return -EFAULT;
+ return __skb_datagram_iter(skb, offset, to, len, true,
+ csum_and_copy_to_iter, csump);
}
-__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
-{
- __sum16 sum;
-
- sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
- if (!skb_shared(skb))
- skb->csum_valid = !sum;
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete_head);
-
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
-{
- __wsum csum;
- __sum16 sum;
-
- csum = skb_checksum(skb, 0, skb->len, 0);
-
- /* skb->csum holds pseudo checksum */
- sum = csum_fold(csum_add(skb->csum, csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
-
- if (!skb_shared(skb)) {
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
- }
-
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete);
-
/**
* skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
* @skb: skbuff
@@ -808,8 +734,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
return -EINVAL;
}
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
- netdev_rx_csum_fault(skb->dev);
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(NULL, skb);
}
return 0;
fault:
@@ -837,7 +764,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
__poll_t mask;
- sock_poll_wait(file, sk_sleep(sk), wait);
+ sock_poll_wait(file, sock, wait);
mask = 0;
/* exceptional events? */
diff --git a/net/core/dev.c b/net/core/dev.c
index 2af787e8b130..1b5a4410be0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -93,7 +93,6 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
-#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
@@ -146,10 +145,10 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "net-sysfs.h"
-/* Instead of increasing this, you should create a hash table. */
#define MAX_GRO_SKBS 8
/* This should be increased if a protocol with a bigger head is added. */
@@ -164,6 +163,9 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
@@ -1285,6 +1287,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return len;
}
+EXPORT_SYMBOL(dev_set_alias);
/**
* dev_get_alias - get ifalias of a device
@@ -1362,7 +1365,7 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
-static int __dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
@@ -1378,7 +1381,7 @@ static int __dev_open(struct net_device *dev)
*/
netpoll_poll_disable(dev);
- ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+ ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
ret = notifier_to_errno(ret);
if (ret)
return ret;
@@ -1407,7 +1410,8 @@ static int __dev_open(struct net_device *dev)
/**
* dev_open - prepare an interface for use.
- * @dev: device to open
+ * @dev: device to open
+ * @extack: netlink extended ack
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
@@ -1417,14 +1421,14 @@ static int __dev_open(struct net_device *dev)
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
-int dev_open(struct net_device *dev)
+int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
int ret;
if (dev->flags & IFF_UP)
return 0;
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
if (ret < 0)
return ret;
@@ -1586,7 +1590,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
- };
+ N(PRE_CHANGEADDR)
+ }
#undef N
return "UNKNOWN_NETDEV_EVENT";
}
@@ -1734,6 +1739,18 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ .extack = extack,
+ };
+
+ return call_netdevice_notifiers_info(val, &info);
+}
+
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
@@ -1745,47 +1762,65 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info = {
- .dev = dev,
+ return call_netdevice_notifiers_extack(val, dev, NULL);
+}
+EXPORT_SYMBOL(call_netdevice_notifiers);
+
+/**
+ * call_netdevice_notifiers_mtu - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @arg: additional u32 argument passed to the notifier function
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+static int call_netdevice_notifiers_mtu(unsigned long val,
+ struct net_device *dev, u32 arg)
+{
+ struct netdev_notifier_info_ext info = {
+ .info.dev = dev,
+ .ext.mtu = arg,
};
- return call_netdevice_notifiers_info(val, &info);
+ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+ return call_netdevice_notifiers_info(val, &info.info);
}
-EXPORT_SYMBOL(call_netdevice_notifiers);
#ifdef CONFIG_NET_INGRESS
-static struct static_key ingress_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
void net_inc_ingress_queue(void)
{
- static_key_slow_inc(&ingress_needed);
+ static_branch_inc(&ingress_needed_key);
}
EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
void net_dec_ingress_queue(void)
{
- static_key_slow_dec(&ingress_needed);
+ static_branch_dec(&ingress_needed_key);
}
EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
#endif
#ifdef CONFIG_NET_EGRESS
-static struct static_key egress_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
void net_inc_egress_queue(void)
{
- static_key_slow_inc(&egress_needed);
+ static_branch_inc(&egress_needed_key);
}
EXPORT_SYMBOL_GPL(net_inc_egress_queue);
void net_dec_egress_queue(void)
{
- static_key_slow_dec(&egress_needed);
+ static_branch_dec(&egress_needed_key);
}
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
-static struct static_key netstamp_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
#ifdef HAVE_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
static atomic_t netstamp_wanted;
@@ -1796,9 +1831,9 @@ static void netstamp_clear(struct work_struct *work)
wanted = atomic_add_return(deferred, &netstamp_wanted);
if (wanted > 0)
- static_key_enable(&netstamp_needed);
+ static_branch_enable(&netstamp_needed_key);
else
- static_key_disable(&netstamp_needed);
+ static_branch_disable(&netstamp_needed_key);
}
static DECLARE_WORK(netstamp_work, netstamp_clear);
#endif
@@ -1818,7 +1853,7 @@ void net_enable_timestamp(void)
atomic_inc(&netstamp_needed_deferred);
schedule_work(&netstamp_work);
#else
- static_key_slow_inc(&netstamp_needed);
+ static_branch_inc(&netstamp_needed_key);
#endif
}
EXPORT_SYMBOL(net_enable_timestamp);
@@ -1838,7 +1873,7 @@ void net_disable_timestamp(void)
atomic_dec(&netstamp_needed_deferred);
schedule_work(&netstamp_work);
#else
- static_key_slow_dec(&netstamp_needed);
+ static_branch_dec(&netstamp_needed_key);
#endif
}
EXPORT_SYMBOL(net_disable_timestamp);
@@ -1846,15 +1881,15 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
skb->tstamp = 0;
- if (static_key_false(&netstamp_needed))
+ if (static_branch_unlikely(&netstamp_needed_key))
__net_timestamp(skb);
}
-#define net_timestamp_check(COND, SKB) \
- if (static_key_false(&netstamp_needed)) { \
- if ((COND) && !(SKB)->tstamp) \
- __net_timestamp(SKB); \
- } \
+#define net_timestamp_check(COND, SKB) \
+ if (static_branch_unlikely(&netstamp_needed_key)) { \
+ if ((COND) && !(SKB)->tstamp) \
+ __net_timestamp(SKB); \
+ } \
bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
{
@@ -1955,6 +1990,17 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
return false;
}
+/**
+ * dev_nit_active - return true if any network interface taps are in use
+ *
+ * @dev: network device to check for the presence of taps
+ */
+bool dev_nit_active(struct net_device *dev)
+{
+ return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
+}
+EXPORT_SYMBOL_GPL(dev_nit_active);
+
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
@@ -1970,6 +2016,9 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
again:
list_for_each_entry_rcu(ptype, ptype_list, list) {
+ if (ptype->ignore_outgoing)
+ continue;
+
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
@@ -2067,11 +2116,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
int i;
+ /* walk through the TCs and see if it falls into any of them */
for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
if ((txq - tc->offset) < tc->count)
return i;
}
+ /* didn't find it, just return -1 to indicate no match */
return -1;
}
@@ -2080,6 +2131,10 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
+struct static_key xps_needed __read_mostly;
+EXPORT_SYMBOL(xps_needed);
+struct static_key xps_rxqs_needed __read_mostly;
+EXPORT_SYMBOL(xps_rxqs_needed);
static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P) \
rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -2091,7 +2146,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
int pos;
if (dev_maps)
- map = xmap_dereference(dev_maps->cpu_map[tci]);
+ map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@@ -2104,7 +2159,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
break;
}
- RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+ RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
kfree_rcu(map, rcu);
return false;
}
@@ -2134,34 +2189,78 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
return active;
}
+static void reset_xps_maps(struct net_device *dev,
+ struct xps_dev_maps *dev_maps,
+ bool is_rxqs_map)
+{
+ if (is_rxqs_map) {
+ static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+ } else {
+ RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ }
+ static_key_slow_dec_cpuslocked(&xps_needed);
+ kfree_rcu(dev_maps, rcu);
+}
+
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+ struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+ u16 offset, u16 count, bool is_rxqs_map)
+{
+ bool active = false;
+ int i, j;
+
+ for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
+ j < nr_ids;)
+ active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+ count);
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
+
+ if (!is_rxqs_map) {
+ for (i = offset + (count - 1); count--; i--) {
+ netdev_queue_numa_node_write(
+ netdev_get_tx_queue(dev, i),
+ NUMA_NO_NODE);
+ }
+ }
+}
+
static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
u16 count)
{
+ const unsigned long *possible_mask = NULL;
struct xps_dev_maps *dev_maps;
- int cpu, i;
- bool active = false;
+ unsigned int nr_ids;
+
+ if (!static_key_false(&xps_needed))
+ return;
+ cpus_read_lock();
mutex_lock(&xps_map_mutex);
- dev_maps = xmap_dereference(dev->xps_maps);
+ if (static_key_false(&xps_rxqs_needed)) {
+ dev_maps = xmap_dereference(dev->xps_rxqs_map);
+ if (dev_maps) {
+ nr_ids = dev->num_rx_queues;
+ clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
+ offset, count, true);
+ }
+ }
+
+ dev_maps = xmap_dereference(dev->xps_cpus_map);
if (!dev_maps)
goto out_no_maps;
- for_each_possible_cpu(cpu)
- active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
- offset, count);
-
- if (!active) {
- RCU_INIT_POINTER(dev->xps_maps, NULL);
- kfree_rcu(dev_maps, rcu);
- }
-
- for (i = offset + (count - 1); count--; i--)
- netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
- NUMA_NO_NODE);
+ if (num_possible_cpus() > 1)
+ possible_mask = cpumask_bits(cpu_possible_mask);
+ nr_ids = nr_cpu_ids;
+ clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+ false);
out_no_maps:
mutex_unlock(&xps_map_mutex);
+ cpus_read_unlock();
}
static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
@@ -2169,8 +2268,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
}
-static struct xps_map *expand_xps_map(struct xps_map *map,
- int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+ u16 index, bool is_rxqs_map)
{
struct xps_map *new_map;
int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2182,7 +2281,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return map;
}
- /* Need to add queue to this CPU's existing map */
+ /* Need to add tx-queue to this CPU's/rx-queue's existing map */
if (map) {
if (pos < map->alloc_len)
return map;
@@ -2190,9 +2289,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
alloc_len = map->alloc_len * 2;
}
- /* Need to allocate new map to store queue on this CPU's map */
- new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
- cpu_to_node(cpu));
+ /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+ * map
+ */
+ if (is_rxqs_map)
+ new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+ else
+ new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+ cpu_to_node(attr_index));
if (!new_map)
return NULL;
@@ -2204,32 +2308,53 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return new_map;
}
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
- u16 index)
+/* Must be called under cpus_read_lock */
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+ u16 index, bool is_rxqs_map)
{
+ const unsigned long *online_mask = NULL, *possible_mask = NULL;
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
- int i, cpu, tci, numa_node_id = -2;
+ int i, j, tci, numa_node_id = -2;
int maps_sz, num_tc = 1, tc = 0;
struct xps_map *map, *new_map;
bool active = false;
+ unsigned int nr_ids;
if (dev->num_tc) {
+ /* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
+ if (num_tc < 0)
+ return -EINVAL;
+
+ /* If queue belongs to subordinate dev use its map */
+ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
}
- maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
- if (maps_sz < L1_CACHE_BYTES)
- maps_sz = L1_CACHE_BYTES;
-
mutex_lock(&xps_map_mutex);
+ if (is_rxqs_map) {
+ maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+ dev_maps = xmap_dereference(dev->xps_rxqs_map);
+ nr_ids = dev->num_rx_queues;
+ } else {
+ maps_sz = XPS_CPU_DEV_MAPS_SIZE(num