summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/bonding/bond_main.c454
-rw-r--r--include/linux/filter.h13
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/net/bonding.h1
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/devmap.c69
-rw-r--r--lib/test_bpf.c2317
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/filter.c25
-rw-r--r--net/unix/unix_bpf.c3
-rw-r--r--samples/bpf/xdp1_kern.c2
-rw-r--r--samples/bpf/xdp2_kern.c2
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c2
-rw-r--r--samples/bpf/xdpsock_user.c20
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c12
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c520
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c14
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c148
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh2
28 files changed, 3431 insertions, 306 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3ba5f4871162..365953e8013e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond)
}
}
+static bool bond_xdp_check(struct bonding *bond)
+{
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_ACTIVEBACKUP:
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_update_slave_arr(bond, NULL);
+ if (!slave_dev->netdev_ops->ndo_bpf ||
+ !slave_dev->netdev_ops->ndo_xdp_xmit) {
+ if (bond->xdp_prog) {
+ NL_SET_ERR_MSG(extack, "Slave does not support XDP");
+ slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
+ res = -EOPNOTSUPP;
+ goto err_sysfs_del;
+ }
+ } else {
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = bond->xdp_prog,
+ .extack = extack,
+ };
+
+ if (dev_xdp_prog_count(slave_dev) > 0) {
+ NL_SET_ERR_MSG(extack,
+ "Slave has XDP program loaded, please unload before enslaving");
+ slave_err(bond_dev, slave_dev,
+ "Slave has XDP program loaded, please unload before enslaving\n");
+ res = -EOPNOTSUPP;
+ goto err_sysfs_del;
+ }
+
+ res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (res < 0) {
+ /* ndo_bpf() sets extack error message */
+ slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+ goto err_sysfs_del;
+ }
+ if (bond->xdp_prog)
+ bpf_prog_inc(bond->xdp_prog);
+ }
+
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
bond_is_active_slave(new_slave) ? "an active" : "a backup",
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@@ -2252,6 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev,
/* recompute stats just before removing the slave */
bond_get_stats(bond->dev, &bond->bond_stats);
+ if (bond->xdp_prog) {
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = NULL,
+ .extack = NULL,
+ };
+ if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+ slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+ }
+
/* unregister rx_handler early so bond_handle_frame wouldn't be called
* for this slave anymore.
*/
@@ -3614,55 +3673,80 @@ static struct notifier_block bond_netdev_notifier = {
/*---------------------------- Hashing Policies -----------------------------*/
+/* Helper to access data in a packet, with or without a backing skb.
+ * If skb is given the data is linearized if necessary via pskb_may_pull.
+ */
+static inline const void *bond_pull_data(struct sk_buff *skb,
+ const void *data, int hlen, int n)
+{
+ if (likely(n <= hlen))
+ return data;
+ else if (skb && likely(pskb_may_pull(skb, n)))
+ return skb->head;
+
+ return NULL;
+}
+
/* L2 hash helper */
-static inline u32 bond_eth_hash(struct sk_buff *skb)
+static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
- struct ethhdr *ep, hdr_tmp;
+ struct ethhdr *ep;
- ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
- if (ep)
- return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
- return 0;
+ data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+ if (!data)
+ return 0;
+
+ ep = (struct ethhdr *)(data + mhoff);
+ return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
}
-static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
- int *noff, int *proto, bool l34)
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+ int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
{
const struct ipv6hdr *iph6;
const struct iphdr *iph;
- if (skb->protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+ if (l2_proto == htons(ETH_P_IP)) {
+ data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+ if (!data)
return false;
- iph = (const struct iphdr *)(skb->data + *noff);
+
+ iph = (const struct iphdr *)(data + *nhoff);
iph_to_flow_copy_v4addrs(fk, iph);
- *noff += iph->ihl << 2;
+ *nhoff += iph->ihl << 2;
if (!ip_is_fragment(iph))
- *proto = iph->protocol;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+ *ip_proto = iph->protocol;
+ } else if (l2_proto == htons(ETH_P_IPV6)) {
+ data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+ if (!data)
return false;
- iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+
+ iph6 = (const struct ipv6hdr *)(data + *nhoff);
iph_to_flow_copy_v6addrs(fk, iph6);
- *noff += sizeof(*iph6);
- *proto = iph6->nexthdr;
+ *nhoff += sizeof(*iph6);
+ *ip_proto = iph6->nexthdr;
} else {
return false;
}
- if (l34 && *proto >= 0)
- fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+ if (l34 && *ip_proto >= 0)
+ fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true;
}
-static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
- struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *mac_hdr;
u32 srcmac_vendor = 0, srcmac_dev = 0;
u16 vlan;
int i;
+ data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+ if (!data)
+ return 0;
+ mac_hdr = (struct ethhdr *)(data + mhoff);
+
for (i = 0; i < 3; i++)
srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
@@ -3678,26 +3762,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
}
/* Extract the appropriate headers based on bond's xmit policy */
-static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
- struct flow_keys *fk)
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+ __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
{
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
- int noff, proto = -1;
+ int ip_proto = -1;
switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_ENCAP23:
case BOND_XMIT_POLICY_ENCAP34:
memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
- fk, NULL, 0, 0, 0, 0);
+ fk, data, l2_proto, nhoff, hlen, 0);
default:
break;
}
fk->ports.ports = 0;
memset(&fk->icmp, 0, sizeof(fk->icmp));
- noff = skb_network_offset(skb);
- if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+ if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
return false;
/* ICMP error packets contains at least 8 bytes of the header
@@ -3705,22 +3788,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
* to correlate ICMP error packets within the same flow which
* generated the error.
*/
- if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
- skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
- skb_transport_offset(skb),
- skb_headlen(skb));
- if (proto == IPPROTO_ICMP) {
+ if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+ skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+ if (ip_proto == IPPROTO_ICMP) {
if (!icmp_is_err(fk->icmp.type))
return true;
- noff += sizeof(struct icmphdr);
- } else if (proto == IPPROTO_ICMPV6) {
+ nhoff += sizeof(struct icmphdr);
+ } else if (ip_proto == IPPROTO_ICMPV6) {
if (!icmpv6_is_err(fk->icmp.type))
return true;
- noff += sizeof(struct icmp6hdr);
+ nhoff += sizeof(struct icmp6hdr);
}
- return bond_flow_ip(skb, fk, &noff, &proto, l34);
+ return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
}
return true;
@@ -3736,33 +3817,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
return hash >> 1;
}
-/**
- * bond_xmit_hash - generate a hash value based on the xmit policy
- * @bond: bonding device
- * @skb: buffer to use for headers
- *
- * This function will extract the necessary headers from the skb buffer and use
- * them to generate a hash based on the xmit_policy set in the bonding device
+/* Generate hash based on xmit policy. If @skb is given it is used to linearize
+ * the data as required, but this function can be used without it if the data is
+ * known to be linear (e.g. with xdp_buff).
*/
-u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+ __be16 l2_proto, int mhoff, int nhoff, int hlen)
{
struct flow_keys flow;
u32 hash;
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
- skb->l4_hash)
- return skb->hash;
-
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
- return bond_vlan_srcmac_hash(skb);
+ return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
- !bond_flow_dissect(bond, skb, &flow))
- return bond_eth_hash(skb);
+ !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+ return bond_eth_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
- hash = bond_eth_hash(skb);
+ hash = bond_eth_hash(skb, data, mhoff, hlen);
} else {
if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash));
@@ -3773,6 +3847,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
return bond_ip_hash(hash, &flow);
}
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ */
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+{
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+ skb->l4_hash)
+ return skb->hash;
+
+ return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+ skb->mac_header, skb->network_header,
+ skb_headlen(skb));
+}
+
+/**
+ * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @xdp: buffer to use for headers
+ *
+ * The XDP variant of bond_xmit_hash.
+ */
+static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+{
+ struct ethhdr *eth;
+
+ if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+ return 0;
+
+ eth = (struct ethhdr *)xdp->data;
+
+ return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+ sizeof(struct ethhdr), xdp->data_end - xdp->data);
+}
+
/*-------------------------- Device entry points ----------------------------*/
void bond_work_init_all(struct bonding *bond)
@@ -4421,6 +4534,47 @@ non_igmp:
return NULL;
}
+static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+ struct xdp_buff *xdp)
+{
+ struct slave *slave;
+ int slave_cnt;
+ u32 slave_id;
+ const struct ethhdr *eth;
+ void *data = xdp->data;
+
+ if (data + sizeof(struct ethhdr) > xdp->data_end)
+ goto non_igmp;
+
+ eth = (struct ethhdr *)data;
+ data += sizeof(struct ethhdr);
+
+ /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ const struct iphdr *iph;
+
+ if (data + sizeof(struct iphdr) > xdp->data_end)
+ goto non_igmp;
+
+ iph = (struct iphdr *)data;
+
+ if (iph->protocol == IPPROTO_IGMP) {
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (slave)
+ return slave;
+ return bond_get_slave_by_id(bond, 0);
+ }
+ }
+
+non_igmp:
+ slave_cnt = READ_ONCE(bond->slave_cnt);
+ if (likely(slave_cnt)) {
+ slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+ return bond_get_slave_by_id(bond, slave_id);
+ }
+ return NULL;
+}
+
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
struct net_device *bond_dev)
{
@@ -4434,8 +4588,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
return bond_tx_drop(bond_dev, skb);
}
-static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
- struct sk_buff *skb)
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
{
return rcu_dereference(bond->curr_active_slave);
}
@@ -4449,7 +4602,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- slave = bond_xmit_activebackup_slave_get(bond, skb);
+ slave = bond_xmit_activebackup_slave_get(bond);
if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev);
@@ -4637,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
return slave;
}
+static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+ struct xdp_buff *xdp)
+{
+ struct bond_up_slave *slaves;
+ unsigned int count;
+ u32 hash;
+
+ hash = bond_xmit_hash_xdp(bond, xdp);
+ slaves = rcu_dereference(bond->usable_slaves);
+ count = slaves ? READ_ONCE(slaves->count) : 0;
+ if (unlikely(!count))
+ return NULL;
+
+ return slaves->arr[hash % count];
+}
+
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
@@ -4747,7 +4916,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
slave = bond_xmit_roundrobin_slave_get(bond, skb);
break;
case BOND_MODE_ACTIVEBACKUP:
- slave = bond_xmit_activebackup_slave_get(bond, skb);
+ slave = bond_xmit_activebackup_slave_get(bond);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
@@ -4921,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
+static struct net_device *
+bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+
+ /* Caller needs to hold rcu_read_lock() */
+
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+ break;
+
+ case BOND_MODE_ACTIVEBACKUP:
+ slave = bond_xmit_activebackup_slave_get(bond);
+ break;
+
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+ break;
+
+ default:
+ /* Should never happen. Mode guarded by bond_xdp_check() */
+ netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ if (slave)
+ return slave->dev;
+
+ return NULL;
+}
+
+static int bond_xdp_xmit(struct net_device *bond_dev,
+ int n, struct xdp_frame **frames, u32 flags)
+{
+ int nxmit, err = -ENXIO;
+
+ rcu_read_lock();
+
+ for (nxmit = 0; nxmit < n; nxmit++) {
+ struct xdp_frame *frame = frames[nxmit];
+ struct xdp_frame *frames1[] = {frame};
+ struct net_device *slave_dev;
+ struct xdp_buff xdp;
+
+ xdp_convert_frame_to_buff(frame, &xdp);
+
+ slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+ if (!slave_dev) {
+ err = -ENXIO;
+ break;
+ }
+
+ err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+ if (err < 1)
+ break;
+ }
+
+ rcu_read_unlock();
+
+ /* If error happened on the first frame then we can pass the error up, otherwise
+ * report the number of frames that were xmitted.
+ */
+ if (err < 0)
+ return (nxmit == 0 ? err : nxmit);
+
+ return nxmit;
+}
+
+static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct list_head *iter;
+ struct slave *slave, *rollback_slave;
+ struct bpf_prog *old_prog;
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = prog,
+ .extack = extack,
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ if (!bond_xdp_check(bond))
+ return -EOPNOTSUPP;
+
+ old_prog = bond->xdp_prog;
+ bond->xdp_prog = prog;
+
+ bond_for_each_slave(bond, slave, iter) {
+ struct net_device *slave_dev = slave->dev;
+
+ if (!slave_dev->netdev_ops->ndo_bpf ||
+ !slave_dev->netdev_ops->ndo_xdp_xmit) {
+ NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
+ slave_err(dev, slave_dev, "Slave does not support XDP\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (dev_xdp_prog_count(slave_dev) > 0) {
+ NL_SET_ERR_MSG(extack,
+ "Slave has XDP program loaded, please unload before enslaving");
+ slave_err(dev, slave_dev,
+ "Slave has XDP program loaded, please unload before enslaving\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (err < 0) {
+ /* ndo_bpf() sets extack error message */
+ slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+ goto err;
+ }
+ if (prog)
+ bpf_prog_inc(prog);
+ }
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (prog)
+ static_branch_inc(&bpf_master_redirect_enabled_key);
+ else
+ static_branch_dec(&bpf_master_redirect_enabled_key);
+
+ return 0;
+
+err:
+ /* unwind the program changes */
+ bond->xdp_prog = old_prog;
+ xdp.prog = old_prog;
+ xdp.extack = NULL; /* do not overwrite original error */
+
+ bond_for_each_slave(bond, rollback_slave, iter) {
+ struct net_device *slave_dev = rollback_slave->dev;
+ int err_unwind;
+
+ if (slave == rollback_slave)
+ break;
+
+ err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (err_unwind < 0)
+ slave_err(dev, slave_dev,
+ "Error %d when unwinding XDP program change\n", err_unwind);
+ else if (xdp.prog)
+ bpf_prog_inc(xdp.prog);
+ }
+ return err;
+}
+
+static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return bond_xdp_set(dev, xdp->prog, xdp->extack);
+ default:
+ return -EINVAL;
+ }
+}
+
static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
{
if (speed == 0 || speed == SPEED_UNKNOWN)
@@ -5009,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
.ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
+ .ndo_bpf = bond_xdp,
+ .ndo_xdp_xmit = bond_xdp_xmit,
+ .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
};
static const struct device_type bond_type = {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ff698c9d1c94..1797e8506929 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -776,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
DECLARE_BPF_DISPATCHER(xdp)
+DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp);
+
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
@@ -783,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* under local_bh_disable(), which provides the needed RCU protection
* for accessing map entries.
*/
- return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+ u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+ if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+ if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+ act = xdp_master_redirect(xdp);
+ }
+
+ return act;
}
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 135c943699d0..bd8d5b8e2de3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1318,6 +1318,9 @@ struct netdev_net_notifier {
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
+ * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ * struct xdp_buff *xdp);
+ * Get the xmit slave of master device based on the xdp_buff.
* int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
* This function is used to wake up the softirq, ksoftirqd or kthread
* responsible for sending and/or receiving packets on a specific
@@ -1545,6 +1548,8 @@ struct net_device_ops {
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp,
u32 flags);
+ struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ struct xdp_buff *xdp);
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
@@ -4076,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags);
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 46df47004803..9f3fdc180c6c 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -259,6 +259,7 @@ struct bonding {
/* protecting ipsec_list */
spinlock_t ipsec_lock;
#endif /* CONFIG_XFRM_OFFLOAD */
+ struct bpf_prog *xdp_prog;
};
#define bond_slave_get_rcu(dev) \
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b1a5fc04492b..fe807b203a6f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1562,7 +1562,7 @@ select_insn:
if (unlikely(index >= array->map.max_entries))
goto out;
- if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+ if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
goto out;
tail_call_cnt++;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 542e94fa30b4..f02d04540c0c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
}
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
- int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
{
- if (!obj || obj->dev->ifindex == exclude_ifindex ||
+ if (!obj ||
!obj->dev->netdev_ops->ndo_xdp_xmit)
return false;
@@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
return 0;
}
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+ while (num_excluded--) {
+ if (ifindex == excluded[num_excluded])
+ return true;
+ }
+ return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+ struct net_device *upper;
+ struct list_head *iter;
+ int n = 0;
+
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ indexes[n++] = upper->ifindex;
+ }
+ return n;
+}
+
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct xdp_frame *xdpf;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ excluded_devices[num_excluded++] = dev_rx->ifindex;
+ }
+
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
lockdep_is_held(&dtab->index_lock)) {
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct hlist_node *next;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ excluded_devices[num_excluded++] = dev->ifindex;
+ }
+
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
return err;
<