diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/dev.c | 60 | ||||
| -rw-r--r-- | net/core/devlink.c | 132 | ||||
| -rw-r--r-- | net/core/filter.c | 4 | ||||
| -rw-r--r-- | net/core/flow_dissector.c | 4 | ||||
| -rw-r--r-- | net/core/link_watch.c | 20 | ||||
| -rw-r--r-- | net/core/skbuff.c | 5 | ||||
| -rw-r--r-- | net/core/sock_diag.c | 15 |
7 files changed, 177 insertions, 63 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 117e830cabb0..7627c475d991 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2073,13 +2073,10 @@ static DECLARE_WORK(netstamp_work, netstamp_clear); void net_enable_timestamp(void) { #ifdef CONFIG_JUMP_LABEL - int wanted; + int wanted = atomic_read(&netstamp_wanted); - while (1) { - wanted = atomic_read(&netstamp_wanted); - if (wanted <= 0) - break; - if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) + while (wanted > 0) { + if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted + 1)) return; } atomic_inc(&netstamp_needed_deferred); @@ -2093,13 +2090,10 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef CONFIG_JUMP_LABEL - int wanted; + int wanted = atomic_read(&netstamp_wanted); - while (1) { - wanted = atomic_read(&netstamp_wanted); - if (wanted <= 1) - break; - if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) + while (wanted > 1) { + if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted - 1)) return; } atomic_dec(&netstamp_needed_deferred); @@ -5985,10 +5979,9 @@ EXPORT_SYMBOL(__napi_schedule); */ bool napi_schedule_prep(struct napi_struct *n) { - unsigned long val, new; + unsigned long new, val = READ_ONCE(n->state); do { - val = READ_ONCE(n->state); if (unlikely(val & NAPIF_STATE_DISABLE)) return false; new = val | NAPIF_STATE_SCHED; @@ -6001,7 +5994,7 @@ bool napi_schedule_prep(struct napi_struct *n) */ new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED * NAPIF_STATE_MISSED; - } while (cmpxchg(&n->state, val, new) != val); + } while (!try_cmpxchg(&n->state, &val, new)); return !(val & NAPIF_STATE_SCHED); } @@ -6069,9 +6062,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done) local_irq_restore(flags); } + val = READ_ONCE(n->state); do { - val = READ_ONCE(n->state); - WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | @@ -6084,7 +6076,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) */ new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED * NAPIF_STATE_SCHED; - } while (cmpxchg(&n->state, val, new) != val); + } while (!try_cmpxchg(&n->state, &val, new)); if (unlikely(val & NAPIF_STATE_MISSED)) { __napi_schedule(n); @@ -6405,19 +6397,16 @@ void napi_disable(struct napi_struct *n) might_sleep(); set_bit(NAPI_STATE_DISABLE, &n->state); - for ( ; ; ) { - val = READ_ONCE(n->state); - if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) { + val = READ_ONCE(n->state); + do { + while (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) { usleep_range(20, 200); - continue; + val = READ_ONCE(n->state); } new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC; new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL); - - if (cmpxchg(&n->state, val, new) == val) - break; - } + } while (!try_cmpxchg(&n->state, &val, new)); hrtimer_cancel(&n->timer); @@ -6434,16 +6423,15 @@ EXPORT_SYMBOL(napi_disable); */ void napi_enable(struct napi_struct *n) { - unsigned long val, new; + unsigned long new, val = READ_ONCE(n->state); do { - val = READ_ONCE(n->state); BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC); if (n->dev->threaded && n->thread) new |= NAPIF_STATE_THREADED; - } while (cmpxchg(&n->state, val, new) != val); + } while (!try_cmpxchg(&n->state, &val, new)); } EXPORT_SYMBOL(napi_enable); @@ -10381,24 +10369,16 @@ void netdev_run_todo(void) void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats) { -#if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); - /* zero out counters that only exist in rtnl_link_stats64 */ - memset((char *)stats64 + sizeof(*netdev_stats), 0, - sizeof(*stats64) - sizeof(*netdev_stats)); -#else - size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); - const unsigned long *src = (const unsigned long *)netdev_stats; + size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t); + const atomic_long_t *src = (atomic_long_t *)netdev_stats; u64 *dst = (u64 *)stats64; BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = src[i]; + dst[i] = atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); -#endif } EXPORT_SYMBOL(netdev_stats_to_stats64); diff --git a/net/core/devlink.c b/net/core/devlink.c index 7f789bbcbbd7..cea154ddce7a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1203,6 +1203,14 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, devlink_rate->tx_max, DEVLINK_ATTR_PAD)) goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY, + devlink_rate->tx_priority)) + goto nla_put_failure; + + if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT, + devlink_rate->tx_weight)) + goto nla_put_failure; + if (devlink_rate->parent) if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, devlink_rate->parent->name)) @@ -1879,10 +1887,8 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, int err = -EOPNOTSUPP; parent = devlink_rate->parent; - if (parent && len) { - NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent."); - return -EBUSY; - } else if (parent && !len) { + + if (parent && !len) { if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, @@ -1896,7 +1902,7 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, refcount_dec(&parent->refcnt); devlink_rate->parent = NULL; - } else if (!parent && len) { + } else if (len) { parent = devlink_rate_node_get_by_name(devlink, parent_name); if (IS_ERR(parent)) return -ENODEV; @@ -1923,6 +1929,10 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, if (err) return err; + if (devlink_rate->parent) + /* we're reassigning to other parent in this case */ + refcount_dec(&devlink_rate->parent->refcnt); + refcount_inc(&parent->refcnt); devlink_rate->parent = parent; } @@ -1936,6 +1946,8 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, { struct nlattr *nla_parent, **attrs = info->attrs; int err = -EOPNOTSUPP; + u32 priority; + u32 weight; u64 rate; if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { @@ -1964,6 +1976,34 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, devlink_rate->tx_max = rate; } + if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) { + priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv, + priority, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv, + priority, info->extack); + + if (err) + return err; + devlink_rate->tx_priority = priority; + } + + if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) { + weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv, + weight, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv, + weight, info->extack); + + if (err) + return err; + devlink_rate->tx_weight = weight; + } + nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; if (nla_parent) { err = devlink_nl_rate_parent_node_set(devlink_rate, info, @@ -1995,6 +2035,18 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_PRIORITY], + "TX priority set isn't supported for the leafs"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_WEIGHT], + "TX weight set isn't supported for the leafs"); + return false; + } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); @@ -2009,6 +2061,18 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_PRIORITY], + "TX priority set isn't supported for the nodes"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_WEIGHT], + "TX weight set isn't supported for the nodes"); + return false; + } } else { WARN(1, "Unknown type of rate object"); return false; @@ -7782,8 +7846,6 @@ int devlink_health_report(struct devlink_health_reporter *reporter, return -ECANCELED; } - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - if (reporter->auto_dump) { mutex_lock(&reporter->dump_lock); /* store current dump of current error, for later analysis */ @@ -9187,6 +9249,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -10321,13 +10385,60 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); /** + * devl_rate_node_create - create devlink rate node + * @devlink: devlink instance + * @priv: driver private data + * @node_name: name of the resulting node + * @parent: parent devlink_rate struct + * + * Create devlink rate object of type node + */ +struct devlink_rate * +devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, + struct devlink_rate *parent) +{ + struct devlink_rate *rate_node; + + rate_node = devlink_rate_node_get_by_name(devlink, node_name); + if (!IS_ERR(rate_node)) + return ERR_PTR(-EEXIST); + + rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); + if (!rate_node) + return ERR_PTR(-ENOMEM); + + if (parent) { + rate_node->parent = parent; + refcount_inc(&rate_node->parent->refcnt); + } + + rate_node->type = DEVLINK_RATE_TYPE_NODE; + rate_node->devlink = devlink; + rate_node->priv = priv; + + rate_node->name = kstrdup(node_name, GFP_KERNEL); + if (!rate_node->name) { + kfree(rate_node); + return ERR_PTR(-ENOMEM); + } + + refcount_set(&rate_node->refcnt, 1); + list_add(&rate_node->list, &devlink->rate_list); + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); + return rate_node; +} +EXPORT_SYMBOL_GPL(devl_rate_node_create); + +/** * devl_rate_leaf_create - create devlink rate leaf * @devlink_port: devlink port object to create rate object on * @priv: driver private data + * @parent: parent devlink_rate struct * * Create devlink rate object of type leaf on provided @devlink_port. */ -int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) +int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv, + struct devlink_rate *parent) { struct devlink *devlink = devlink_port->devlink; struct devlink_rate *devlink_rate; @@ -10341,6 +10452,11 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) if (!devlink_rate) return -ENOMEM; + if (parent) { + devlink_rate->parent = parent; + refcount_inc(&devlink_rate->parent->refcnt); + } + devlink_rate->type = DEVLINK_RATE_TYPE_LEAF; devlink_rate->devlink = devlink; devlink_rate->devlink_port = devlink_port; diff --git a/net/core/filter.c b/net/core/filter.c index b6e1b81cdfae..37baaa6b8fc3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6437,7 +6437,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, else sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, - dif, sdif, &udp_table, NULL); + dif, sdif, net->ipv4.udp_table, NULL); #if IS_ENABLED(CONFIG_IPV6) } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; @@ -6453,7 +6453,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, src6, tuple->ipv6.sport, dst6, tuple->ipv6.dport, dif, sdif, - &udp_table, NULL); + net->ipv4.udp_table, NULL); #endif } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 25cd35f5922e..3e81798ed3e0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -971,12 +971,14 @@ bool __skb_flow_dissect(const struct net *net, #if IS_ENABLED(CONFIG_NET_DSA) if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && proto == htons(ETH_P_XDSA))) { + struct metadata_dst *md_dst = skb_metadata_dst(skb); const struct dsa_device_ops *ops; int offset = 0; ops = skb->dev->dsa_ptr->tag_ops; /* Only DSA header taggers break flow dissection */ - if (ops->needed_headroom) { + if (ops->needed_headroom && + (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) { if (ops->flow_dissect) ops->flow_dissect(skb, &proto, &offset); else diff --git a/net/core/link_watch.c b/net/core/link_watch.c index aa6cb1f90966..c469d1c4db5d 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -38,9 +38,23 @@ static unsigned char default_operstate(const struct net_device *dev) if (netif_testing(dev)) return IF_OPER_TESTING; - if (!netif_carrier_ok(dev)) - return (dev->ifindex != dev_get_iflink(dev) ? - IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN); + /* Some uppers (DSA) have additional sources for being down, so + * first check whether lower is indeed the source of its down state. + */ + if (!netif_carrier_ok(dev)) { + int iflink = dev_get_iflink(dev); + struct net_device *peer; + + if (iflink == dev->ifindex) + return IF_OPER_DOWN; + + peer = __dev_get_by_index(dev_net(dev), iflink); + if (!peer) + return IF_OPER_DOWN; + + return netif_carrier_ok(peer) ? IF_OPER_DOWN : + IF_OPER_LOWERLAYERDOWN; + } if (netif_dormant(dev)) return IF_OPER_DORMANT; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 90d085290d49..4bf95e36ed16 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1267,13 +1267,12 @@ int mm_account_pinned_pages(struct mmpin *mmp, size_t size) max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; user = mmp->user ? : current_user(); + old_pg = atomic_long_read(&user->locked_vm); do { - old_pg = atomic_long_read(&user->locked_vm); new_pg = old_pg + num_pg; if (new_pg > max_pg) return -ENOBUFS; - } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) != - old_pg); + } while (!atomic_long_try_cmpxchg(&user->locked_vm, &old_pg, new_pg)); if (!mmp->user) { mmp->user = get_uid(user); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index f7cf74cdd3db..b1e29e18d1d6 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -25,14 +25,17 @@ DEFINE_COOKIE(sock_cookie); u64 __sock_gen_cookie(struct sock *sk) { - while (1) { - u64 res = atomic64_read(&sk->sk_cookie); + u64 res = atomic64_read(&sk->sk_cookie); - if (res) - return res; - res = gen_cookie_next(&sock_cookie); - atomic64_cmpxchg(&sk->sk_cookie, 0, res); + if (!res) { + u64 new = gen_cookie_next(&sock_cookie); + + atomic64_cmpxchg(&sk->sk_cookie, res, new); + + /* Another thread might have changed sk_cookie before us. */ + res = atomic64_read(&sk->sk_cookie); } + return res; } int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) |
