summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c42
-rw-r--r--net/core/dev.c185
-rw-r--r--net/core/dev.h7
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/devlink.c789
-rw-r--r--net/core/drop_monitor.c12
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/failover.c6
-rw-r--r--net/core/filter.c141
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/flow_offload.c7
-rw-r--r--net/core/gen_stats.c16
-rw-r--r--net/core/gro.c74
-rw-r--r--net/core/link_watch.c20
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/net_namespace.c5
-rw-r--r--net/core/of_net.c5
-rw-r--r--net/core/rtnetlink.c90
-rw-r--r--net/core/skbuff.c165
-rw-r--r--net/core/skmsg.c9
-rw-r--r--net/core/sock.c29
-rw-r--r--net/core/sock_diag.c15
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/core/sock_reuseport.c94
-rw-r--r--net/core/tso.c8
-rw-r--r--net/core/utils.c4
26 files changed, 1255 insertions, 488 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94374d529ea4..bb378c33f542 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -48,10 +48,8 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
- struct bpf_local_storage_elem *selem;
struct bpf_local_storage *sk_storage;
bool free_sk_storage = false;
- struct hlist_node *n;
rcu_read_lock();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
@@ -60,24 +58,8 @@ void bpf_sk_storage_free(struct sock *sk)
return;
}
- /* Netiher the bpf_prog nor the bpf-map's syscall
- * could be modifying the sk_storage->list now.
- * Thus, no elem can be added-to or deleted-from the
- * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
- *
- * It is racing with bpf_local_storage_map_free() alone
- * when unlinking elem from the sk_storage->list and
- * the map's bucket->list.
- */
raw_spin_lock_bh(&sk_storage->lock);
- hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) {
- /* Always unlink from map before unlinking from
- * sk_storage.
- */
- bpf_selem_unlink_map(selem);
- free_sk_storage = bpf_selem_unlink_storage_nolock(
- sk_storage, selem, true, false);
- }
+ free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage);
raw_spin_unlock_bh(&sk_storage->lock);
rcu_read_unlock();
@@ -87,23 +69,12 @@ void bpf_sk_storage_free(struct sock *sk)
static void bpf_sk_storage_map_free(struct bpf_map *map)
{
- struct bpf_local_storage_map *smap;
-
- smap = (struct bpf_local_storage_map *)map;
- bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
- bpf_local_storage_map_free(smap, NULL);
+ bpf_local_storage_map_free(map, &sk_cache, NULL);
}
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
{
- struct bpf_local_storage_map *smap;
-
- smap = bpf_local_storage_map_alloc(attr);
- if (IS_ERR(smap))
- return ERR_CAST(smap);
-
- smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
- return &smap->map;
+ return bpf_local_storage_map_alloc(attr, &sk_cache);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key,
@@ -176,7 +147,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
if (!copy_selem)
return NULL;
- if (map_value_has_spin_lock(&smap->map))
+ if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data, true);
else
@@ -339,7 +310,6 @@ bpf_sk_storage_ptr(void *owner)
return &sk->sk_bpf_storage;
}
-BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops sk_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -350,7 +320,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &sk_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
@@ -595,7 +565,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
if (!nla_value)
goto errout;
- if (map_value_has_spin_lock(&smap->map))
+ if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
copy_map_value_locked(&smap->map, nla_data(nla_value),
sdata->data, true);
else
diff --git a/net/core/dev.c b/net/core/dev.c
index 3be256051e99..b76fb37b381e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1163,22 +1163,6 @@ int dev_change_name(struct net_device *dev, const char *newname)
net = dev_net(dev);
- /* Some auto-enslaved devices e.g. failover slaves are
- * special, as userspace might rename the device after
- * the interface had been brought up and running since
- * the point kernel initiated auto-enslavement. Allow
- * live name change even when these slave devices are
- * up and running.
- *
- * Typically, users of these auto-enslaving devices
- * don't actually care about slave name change, as
- * they are supposed to operate on master interface
- * directly.
- */
- if (dev->flags & IFF_UP &&
- likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
- return -EBUSY;
-
down_write(&devnet_rename_sem);
if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
@@ -1195,7 +1179,8 @@ int dev_change_name(struct net_device *dev, const char *newname)
}
if (oldname[0] && !strchr(oldname, '%'))
- netdev_info(dev, "renamed from %s\n", oldname);
+ netdev_info(dev, "renamed from %s%s\n", oldname,
+ dev->flags & IFF_UP ? " (while UP)" : "");
old_assign_type = dev->name_assign_type;
dev->name_assign_type = NET_NAME_RENAMED;
@@ -1333,7 +1318,7 @@ void netdev_state_change(struct net_device *dev)
call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
}
}
EXPORT_SYMBOL(netdev_state_change);
@@ -1469,7 +1454,7 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
if (ret < 0)
return ret;
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, NULL);
call_netdevice_notifiers(NETDEV_UP, dev);
return ret;
@@ -1541,7 +1526,7 @@ void dev_close_many(struct list_head *head, bool unlink)
__dev_close_many(head);
list_for_each_entry_safe(dev, tmp, head, close_list) {
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, NULL);
call_netdevice_notifiers(NETDEV_DOWN, dev);
if (unlink)
list_del_init(&dev->close_list);
@@ -1621,10 +1606,10 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
- N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
- N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
- N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
- N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+ N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN)
+ N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA)
+ N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE)
+ N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
@@ -1876,6 +1861,22 @@ int unregister_netdevice_notifier_net(struct net *net,
}
EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+static void __move_netdevice_notifier_net(struct net *src_net,
+ struct net *dst_net,
+ struct notifier_block *nb)
+{
+ __unregister_netdevice_notifier_net(src_net, nb);
+ __register_netdevice_notifier_net(dst_net, nb, true);
+}
+
+void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
+ struct notifier_block *nb)
+{
+ rtnl_lock();
+ __move_netdevice_notifier_net(src_net, dst_net, nb);
+ rtnl_unlock();
+}
+
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
@@ -1912,10 +1913,8 @@ static void move_netdevice_notifiers_dev_net(struct net_device *dev,
{
struct netdev_net_notifier *nn;
- list_for_each_entry(nn, &dev->net_notifier_list, list) {
- __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
- __register_netdevice_notifier_net(net, nn->nb, true);
- }
+ list_for_each_entry(nn, &dev->net_notifier_list, list)
+ __move_netdevice_notifier_net(dev_net(dev), net, nn->nb);
}
/**
@@ -2074,13 +2073,10 @@ static DECLARE_WORK(netstamp_work, netstamp_clear);
void net_enable_timestamp(void)
{
#ifdef CONFIG_JUMP_LABEL
- int wanted;
+ int wanted = atomic_read(&netstamp_wanted);
- while (1) {
- wanted = atomic_read(&netstamp_wanted);
- if (wanted <= 0)
- break;
- if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+ while (wanted > 0) {
+ if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted + 1))
return;
}
atomic_inc(&netstamp_needed_deferred);
@@ -2094,13 +2090,10 @@ EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
#ifdef CONFIG_JUMP_LABEL
- int wanted;
+ int wanted = atomic_read(&netstamp_wanted);
- while (1) {
- wanted = atomic_read(&netstamp_wanted);
- if (wanted <= 1)
- break;
- if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+ while (wanted > 1) {
+ if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted - 1))
return;
}
atomic_dec(&netstamp_needed_deferred);
@@ -5986,10 +5979,9 @@ EXPORT_SYMBOL(__napi_schedule);
*/
bool napi_schedule_prep(struct napi_struct *n)
{
- unsigned long val, new;
+ unsigned long new, val = READ_ONCE(n->state);
do {
- val = READ_ONCE(n->state);
if (unlikely(val & NAPIF_STATE_DISABLE))
return false;
new = val | NAPIF_STATE_SCHED;
@@ -6002,7 +5994,7 @@ bool napi_schedule_prep(struct napi_struct *n)
*/
new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
NAPIF_STATE_MISSED;
- } while (cmpxchg(&n->state, val, new) != val);
+ } while (!try_cmpxchg(&n->state, &val, new));
return !(val & NAPIF_STATE_SCHED);
}
@@ -6070,9 +6062,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
local_irq_restore(flags);
}
+ val = READ_ONCE(n->state);
do {
- val = READ_ONCE(n->state);
-
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
@@ -6085,7 +6076,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
*/
new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
NAPIF_STATE_SCHED;
- } while (cmpxchg(&n->state, val, new) != val);
+ } while (!try_cmpxchg(&n->state, &val, new));
if (unlikely(val & NAPIF_STATE_MISSED)) {
__napi_schedule(n);
@@ -6406,19 +6397,16 @@ void napi_disable(struct napi_struct *n)
might_sleep();
set_bit(NAPI_STATE_DISABLE, &n->state);
- for ( ; ; ) {
- val = READ_ONCE(n->state);
- if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
+ val = READ_ONCE(n->state);
+ do {
+ while (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
usleep_range(20, 200);
- continue;
+ val = READ_ONCE(n->state);
}
new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
-
- if (cmpxchg(&n->state, val, new) == val)
- break;
- }
+ } while (!try_cmpxchg(&n->state, &val, new));
hrtimer_cancel(&n->timer);
@@ -6435,16 +6423,15 @@ EXPORT_SYMBOL(napi_disable);
*/
void napi_enable(struct napi_struct *n)
{
- unsigned long val, new;
+ unsigned long new, val = READ_ONCE(n->state);
do {
- val = READ_ONCE(n->state);
BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
if (n->dev->threaded && n->thread)
new |= NAPIF_STATE_THREADED;
- } while (cmpxchg(&n->state, val, new) != val);
+ } while (!try_cmpxchg(&n->state, &val, new));
}
EXPORT_SYMBOL(napi_enable);
@@ -8351,7 +8338,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
dev_change_rx_flags(dev, IFF_PROMISC);
}
if (notify)
- __dev_notify_flags(dev, old_flags, IFF_PROMISC);
+ __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL);
return 0;
}
@@ -8406,7 +8393,7 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
dev_set_rx_mode(dev);
if (notify)
__dev_notify_flags(dev, old_flags,
- dev->gflags ^ old_gflags);
+ dev->gflags ^ old_gflags, 0, NULL);
}
return 0;
}
@@ -8569,12 +8556,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags,
}
void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
- unsigned int gchanges)
+ unsigned int gchanges, u32 portid,
+ const struct nlmsghdr *nlh)
{
unsigned int changes = dev->flags ^ old_flags;
if (gchanges)
- rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC, portid, nlh);
if (changes & IFF_UP) {
if (dev->flags & IFF_UP)
@@ -8616,7 +8604,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
return ret;
changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
- __dev_notify_flags(dev, old_flags, changes);
+ __dev_notify_flags(dev, old_flags, changes, 0, NULL);
return ret;
}
EXPORT_SYMBOL(dev_change_flags);
@@ -8822,7 +8810,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user);
int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
{
- size_t size = sizeof(sa->sa_data);
+ size_t size = sizeof(sa->sa_data_min);
struct net_device *dev;
int ret = 0;
@@ -10059,7 +10047,7 @@ int register_netdevice(struct net_device *dev)
dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
write_unlock(&dev_base_lock);
if (ret)
- goto err_uninit;
+ goto err_uninit_notify;
__netdev_update_features(dev);
@@ -10101,11 +10089,13 @@ int register_netdevice(struct net_device *dev)
*/
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
out:
return ret;
+err_uninit_notify:
+ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -10379,24 +10369,16 @@ void netdev_run_todo(void)
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
const struct net_device_stats *netdev_stats)
{
-#if BITS_PER_LONG == 64
- BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
- memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
- /* zero out counters that only exist in rtnl_link_stats64 */
- memset((char *)stats64 + sizeof(*netdev_stats), 0,
- sizeof(*stats64) - sizeof(*netdev_stats));
-#else
- size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
- const unsigned long *src = (const unsigned long *)netdev_stats;
+ size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t);
+ const atomic_long_t *src = (atomic_long_t *)netdev_stats;
u64 *dst = (u64 *)stats64;
BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
- dst[i] = src[i];
+ dst[i] = atomic_long_read(&src[i]);
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + n * sizeof(u64), 0,
sizeof(*stats64) - n * sizeof(u64));
-#endif
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
@@ -10477,12 +10459,12 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
stats = per_cpu_ptr(netstats, cpu);
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
+ start = u64_stats_fetch_begin(&stats->syncp);
rx_packets = u64_stats_read(&stats->rx_packets);
rx_bytes = u64_stats_read(&stats->rx_bytes);
tx_packets = u64_stats_read(&stats->tx_packets);
tx_bytes = u64_stats_read(&stats->tx_bytes);
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ } while (u64_stats_fetch_retry(&stats->syncp, start));
s->rx_packets += rx_packets;
s->rx_bytes += rx_bytes;
@@ -10535,6 +10517,22 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+/**
+ * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
+ * @dev: netdev to enable the IRQ coalescing on
+ *
+ * Sets a conservative default for SW IRQ coalescing. Users can use
+ * sysfs attributes to override the default values.
+ */
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
+{
+ WARN_ON(dev->reg_state == NETREG_REGISTERED);
+
+ dev->gro_flush_timeout = 20000;
+ dev->napi_defer_hard_irqs = 1;
+}
+EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
+
void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
@@ -10780,14 +10778,8 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
}
EXPORT_SYMBOL(unregister_netdevice_queue);
-/**
- * unregister_netdevice_many - unregister many devices
- * @head: list of devices
- *
- * Note: As most callers use a stack allocated list_head,
- * we force a list_del() to make sure stack wont be corrupted later.
- */
-void unregister_netdevice_many(struct list_head *head)
+void unregister_netdevice_many_notify(struct list_head *head,
+ u32 portid, const struct nlmsghdr *nlh)
{
struct net_device *dev, *tmp;
LIST_HEAD(close_head);
@@ -10849,7 +10841,8 @@ void unregister_netdevice_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL, NULL, 0);
+ GFP_KERNEL, NULL, 0,
+ portid, nlmsg_seq(nlh));
/*
* Flush the unicast and multicast chains
@@ -10860,11 +10853,13 @@ void unregister_netdevice_many(struct list_head *head)
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
+ call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
+
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
if (skb)
- rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
+ rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
@@ -10887,6 +10882,18 @@ void unregister_netdevice_many(struct list_head *head)
list_del(head);
}
+
+/**
+ * unregister_netdevice_many - unregister many devices
+ * @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
+ */
+void unregister_netdevice_many(struct list_head *head)
+{
+ unregister_netdevice_many_notify(head, 0, NULL);
+}
EXPORT_SYMBOL(unregister_netdevice_many);
/**
@@ -11042,7 +11049,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
synchronize_net();
err = 0;
diff --git a/net/core/dev.h b/net/core/dev.h
index cbb8a925175a..814ed5b7b960 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -88,6 +88,13 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier);
void __dev_set_rx_mode(struct net_device *dev);
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
+ unsigned int gchanges, u32 portid,
+ const struct nlmsghdr *nlh);
+
+void unregister_netdevice_many_notify(struct list_head *head,
+ u32 portid, const struct nlmsghdr *nlh);
+
static inline void netif_set_gso_max_size(struct net_device *dev,
unsigned int size)
{
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7674bb9f3076..5cdbfbf9a7dc 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
if (ifr->ifr_hwaddr.sa_family != dev->type)
return -EINVAL;
memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
- min(sizeof(ifr->ifr_hwaddr.sa_data),
+ min(sizeof(ifr->ifr_hwaddr.sa_data_min),
(size_t)dev->addr_len));
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return 0;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 89baa7c0938b..6004bd0ccee4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -41,7 +41,7 @@ struct devlink_dev_stats {
struct devlink {
u32 index;
- struct list_head port_list;
+ struct xarray ports;
struct list_head rate_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
@@ -71,6 +71,7 @@ struct devlink {
refcount_t refcount;
struct completion comp;
struct rcu_head rcu;
+ struct notifier_block netdevice_nb;
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -194,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
[DEVLINK_PORT_FN_ATTR_STATE] =
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
@@ -381,19 +387,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
unsigned int port_index)
{
- struct devlink_port *devlink_port;
-
- list_for_each_entry(devlink_port, &devlink->port_list, list) {
- if (devlink_port->index == port_index)
- return devlink_port;
- }
- return NULL;
-}
-
-static bool devlink_port_index_exists(struct devlink *devlink,
- unsigned int port_index)
-{
- return devlink_port_get_by_index(devlink, port_index);
+ return xa_load(&devlink->ports, port_index);
}
static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
@@ -691,6 +685,87 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
return 0;
}
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+ u32 cap, bool is_enable)
+{
+ caps->selector |= cap;
+ if (is_enable)
+ caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_roce_get)
+ return 0;
+
+ err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_migratable_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
static int
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct genl_info *info,
@@ -769,7 +844,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
-static int devlink_nl_pre_doit(const struct genl_ops *ops,
+static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
struct devlink_linecard *linecard;
@@ -827,7 +902,7 @@ unlock:
return err;
}
-static void devlink_nl_post_doit(const struct genl_ops *ops,
+static void devlink_nl_post_doit(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
struct devlink_linecard *linecard;
@@ -879,6 +954,24 @@ nla_put_failure:
return -EMSGSIZE;
}
+int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
+{
+ if (devlink_nl_put_handle(msg, devlink_port->devlink))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ return -EMSGSIZE;
+ return 0;
+}
+
+size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+{
+ struct devlink *devlink = devlink_port->devlink;
+
+ return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
+ + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
+ + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
+}
+
struct devlink_reload_combination {
enum devlink_reload_action action;
enum devlink_reload_limit limit;
@@ -1184,6 +1277,14 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
devlink_rate->tx_max, DEVLINK_ATTR_PAD))
goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
+ devlink_rate->tx_priority))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
+ devlink_rate->tx_weight))
+ goto nla_put_failure;
+
if (devlink_rate->parent)
if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
devlink_rate->parent->name))
@@ -1249,6 +1350,51 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
}
static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_migratable_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_roce_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ err = devlink_port_fn_mig_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_MIGRATABLE,
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
@@ -1266,6 +1412,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
&msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_caps_fill(ops, port, msg, extack,
+ &msg_updated);
+ if (err)
+ goto out;
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
@@ -1292,8 +1442,6 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
goto nla_put_failure;
- /* Hold rtnl lock while accessing port's netdev attributes. */
- rtnl_lock();
spin_lock_bh(&devlink_port->type_lock);
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
goto nla_put_failure_type_locked;
@@ -1302,18 +1450,15 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
devlink_port->desi