summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-02-10 17:51:27 -0800
committerJakub Kicinski <kuba@kernel.org>2023-02-10 17:51:27 -0800
commitde4287336794f49323a5223c8b6e131f4840a866 (patch)
tree9a2dfaf47fc689cdae5a45a728d0529d27c77d98 /drivers/net/ethernet/intel
parentd12f9ad028062ff3ee2bfca3247f8812269d1f06 (diff)
parent17bcd27a08a21397698edf143084d7c87ce17946 (diff)
downloadlinux-de4287336794f49323a5223c8b6e131f4840a866.tar.gz
linux-de4287336794f49323a5223c8b6e131f4840a866.tar.bz2
linux-de4287336794f49323a5223c8b6e131f4840a866.zip
Daniel Borkmann says:
==================== pull-request: bpf-next 2023-02-11 We've added 96 non-merge commits during the last 14 day(s) which contain a total of 152 files changed, 4884 insertions(+), 962 deletions(-). There is a minor conflict in drivers/net/ethernet/intel/ice/ice_main.c between commit 5b246e533d01 ("ice: split probe into smaller functions") from the net-next tree and commit 66c0e13ad236 ("drivers: net: turn on XDP features") from the bpf-next tree. Remove the hunk given ice_cfg_netdev() is otherwise there a 2nd time, and add XDP features to the existing ice_cfg_netdev() one: [...] ice_set_netdev_features(netdev); netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_XSK_ZEROCOPY; ice_set_ops(netdev); [...] Stephen's merge conflict mail: https://lore.kernel.org/bpf/20230207101951.21a114fa@canb.auug.org.au/ The main changes are: 1) Add support for BPF trampoline on s390x which finally allows to remove many test cases from the BPF CI's DENYLIST.s390x, from Ilya Leoshkevich. 2) Add multi-buffer XDP support to ice driver, from Maciej Fijalkowski. 3) Add capability to export the XDP features supported by the NIC. Along with that, add a XDP compliance test tool, from Lorenzo Bianconi & Marek Majtyka. 4) Add __bpf_kfunc tag for marking kernel functions as kfuncs, from David Vernet. 5) Add a deep dive documentation about the verifier's register liveness tracking algorithm, from Eduard Zingerman. 6) Fix and follow-up cleanups for resolve_btfids to be compiled as a host program to avoid cross compile issues, from Jiri Olsa & Ian Rogers. 7) Batch of fixes to the BPF selftest for xdp_hw_metadata which resulted when testing on different NICs, from Jesper Dangaard Brouer. 8) Fix libbpf to better detect kernel version code on Debian, from Hao Xiang. 9) Extend libbpf to add an option for when the perf buffer should wake up, from Jon Doron. 10) Follow-up fix on xdp_metadata selftest to just consume on TX completion, from Stanislav Fomichev. 11) Extend the kfuncs.rst document with description on kfunc lifecycle & stability expectations, from David Vernet. 12) Fix bpftool prog profile to skip attaching to offline CPUs, from Tonghao Zhang. ==================== Link: https://lore.kernel.org/r/20230211002037.8489-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/intel')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c52
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c408
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h54
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c236
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h75
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c192
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c9
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_xdp.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c1
15 files changed, 665 insertions, 419 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 43693f902c27..3ee00c3bc319 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -13339,9 +13339,11 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) {
- if (!prog)
+ if (!prog) {
+ xdp_features_clear_redirect_target(vsi->netdev);
/* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
+ }
i40e_reset_and_rebuild(pf, true, true);
}
@@ -13362,11 +13364,13 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
/* Kick start the NAPI context if there is an AF_XDP socket open
* on that queue id. This so that receiving will start.
*/
- if (need_reset && prog)
+ if (need_reset && prog) {
for (i = 0; i < vsi->num_queue_pairs; i++)
if (vsi->xdp_rings[i]->xsk_pool)
(void)i40e_xsk_wakeup(vsi->netdev, i,
XDP_WAKEUP_RX);
+ xdp_features_set_redirect_target(vsi->netdev, true);
+ }
return 0;
}
@@ -13783,6 +13787,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
netdev->features &= ~NETIF_F_HW_TC;
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_XSK_ZEROCOPY;
if (vsi->type == I40E_VSI_MAIN) {
SET_NETDEV_DEV(netdev, &pf->pdev->dev);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 554095b25f44..1911d644dfa8 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -355,9 +355,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
{
if (ice_ring_uses_build_skb(rx_ring))
return ICE_SKB_PAD;
- else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
- return XDP_PACKET_HEADROOM;
-
return 0;
}
@@ -495,7 +492,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
- u16 num_bufs = ICE_DESC_UNUSED(ring);
+ u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
int err;
ring->rx_buf_len = ring->vsi->rx_buf_len;
@@ -503,8 +500,10 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (ring->vsi->type == ICE_VSI_PF) {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
- xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->q_index, ring->q_vector->napi.napi_id);
+ __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->vsi->rx_buf_len);
ring->xsk_pool = ice_xsk_pool(ring);
if (ring->xsk_pool) {
@@ -524,9 +523,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
} else {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
- xdp_rxq_info_reg(&ring->xdp_rxq,
- ring->netdev,
- ring->q_index, ring->q_vector->napi.napi_id);
+ __xdp_rxq_info_reg(&ring->xdp_rxq,
+ ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->vsi->rx_buf_len);
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
@@ -536,6 +537,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
}
}
+ xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+ ring->xdp.data = NULL;
err = ice_setup_rx_ctx(ring);
if (err) {
dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 6f03e9fe331a..b360bd8f1599 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3046,8 +3046,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
- xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
- xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
@@ -3092,7 +3090,7 @@ process_rx:
/* allocate Rx buffers */
err = ice_alloc_rx_bufs(&rx_rings[i],
- ICE_DESC_UNUSED(&rx_rings[i]));
+ ICE_RX_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
if (err) {
while (i) {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 960197b2301c..37fe639712e6 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1961,8 +1961,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
{
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
- vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
- vsi->rx_buf_len = ICE_RXBUF_2048;
+ vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
+ vsi->rx_buf_len = ICE_RXBUF_1664;
#if (PAGE_SIZE < 8192)
} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
@@ -1971,11 +1971,7 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
#endif
} else {
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
-#if (PAGE_SIZE < 8192)
vsi->rx_buf_len = ICE_RXBUF_3072;
-#else
- vsi->rx_buf_len = ICE_RXBUF_2048;
-#endif
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e6a35f875520..0712c1055aea 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -22,6 +22,7 @@
#include "ice_eswitch.h"
#include "ice_tc_lib.h"
#include "ice_vsi_vlan_ops.h"
+#include <net/xdp_sock_drv.h>
#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
static const char ice_driver_string[] = DRV_SUMMARY;
@@ -2569,8 +2570,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->netdev = NULL;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
- xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
- xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
@@ -2862,6 +2861,18 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
}
/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+ if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ return ICE_RXBUF_1664;
+ else
+ return ICE_RXBUF_3072;
+}
+
+/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
* @prog: XDP program
@@ -2871,13 +2882,16 @@ static int
ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
- int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
int ret = 0, xdp_ring_err = 0;
- if (frame_size > vsi->rx_buf_len) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
- return -EOPNOTSUPP;
+ if (prog && !prog->aux->xdp_has_frags) {
+ if (frame_size > ice_max_xdp_frame_size(vsi)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "MTU is too large for linear frames and XDP prog does not support frags");
+ return -EOPNOTSUPP;
+ }
}
/* need to stop netdev while setting up the program for Rx rings */
@@ -2898,11 +2912,13 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
}
+ xdp_features_set_redirect_target(vsi->netdev, false);
/* reallocate Rx queues that are used for zero-copy */
xdp_ring_err = ice_realloc_zc_buf(vsi, true);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ xdp_features_clear_redirect_target(vsi->netdev);
xdp_ring_err = ice_destroy_xdp_rings(vsi);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
@@ -4552,6 +4568,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
np->vsi = vsi;
ice_set_netdev_features(netdev);
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_XSK_ZEROCOPY;
ice_set_ops(netdev);
if (vsi->type == ICE_VSI_PF) {
@@ -7514,18 +7532,6 @@ clear_recovery:
}
/**
- * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
- * @vsi: Pointer to VSI structure
- */
-static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
-{
- if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
- return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
- else
- return ICE_RXBUF_3072;
-}
-
-/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
@@ -7537,6 +7543,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct bpf_prog *prog;
u8 count = 0;
int err = 0;
@@ -7545,7 +7552,8 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
- if (ice_is_xdp_ena_vsi(vsi)) {
+ prog = vsi->xdp_prog;
+ if (prog && !prog->aux->xdp_has_frags) {
int frame_size = ice_max_xdp_frame_size(vsi);
if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
@@ -7553,6 +7561,12 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
frame_size - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
+ } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
+ if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
+ netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n",
+ ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
+ return -EINVAL;
+ }
}
/* if a reset is in progress, wait for some time for it to complete */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ccf09c957a1c..466113c86e6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -113,12 +113,16 @@ static void
ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
- if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+ if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) {
devm_kfree(ring->dev, tx_buf->raw_buf);
- else if (ice_ring_is_xdp(ring))
- page_frag_free(tx_buf->raw_buf);
- else
+ } else if (ice_ring_is_xdp(ring)) {
+ if (ring->xsk_pool)
+ xsk_buff_free(tx_buf->xdp);
+ else
+ page_frag_free(tx_buf->raw_buf);
+ } else {
dev_kfree_skb_any(tx_buf->skb);
+ }
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
@@ -174,8 +178,6 @@ tx_skip_free:
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
- tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
- tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
if (!tx_ring->netdev)
return;
@@ -382,6 +384,7 @@ err:
*/
void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
{
+ struct xdp_buff *xdp = &rx_ring->xdp;
struct device *dev = rx_ring->dev;
u32 size;
u16 i;
@@ -390,16 +393,16 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
- if (rx_ring->skb) {
- dev_kfree_skb(rx_ring->skb);
- rx_ring->skb = NULL;
- }
-
if (rx_ring->xsk_pool) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}
+ if (xdp->data) {
+ xdp_return_buff(xdp);
+ xdp->data = NULL;
+ }
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -437,6 +440,7 @@ rx_skip_free:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
+ rx_ring->first_desc = 0;
rx_ring->next_to_use = 0;
}
@@ -506,6 +510,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
+ rx_ring->first_desc = 0;
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
@@ -523,8 +528,16 @@ err:
return -ENOMEM;
}
+/**
+ * ice_rx_frame_truesize
+ * @rx_ring: ptr to Rx ring
+ * @size: size
+ *
+ * calculate the truesize with taking into the account PAGE_SIZE of
+ * underlying arch
+ */
static unsigned int
-ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size)
+ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
{
unsigned int truesize;
@@ -545,34 +558,39 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused s
* @xdp: xdp_buff used as input to the XDP program
* @xdp_prog: XDP program to run
* @xdp_ring: ring to be used for XDP_TX action
+ * @rx_buf: Rx buffer to store the XDP action
*
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
-static int
+static void
ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+ struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+ struct ice_rx_buf *rx_buf)
{
- int err;
+ unsigned int ret = ICE_XDP_PASS;
u32 act;
+ if (!xdp_prog)
+ goto exit;
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
- return ICE_XDP_PASS;
+ break;
case XDP_TX:
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_lock(&xdp_ring->tx_lock);
- err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring);
+ ret = __ice_xmit_xdp_ring(xdp, xdp_ring);
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_unlock(&xdp_ring->tx_lock);
- if (err == ICE_XDP_CONSUMED)
+ if (ret == ICE_XDP_CONSUMED)
goto out_failure;
- return err;
+ break;
case XDP_REDIRECT:
- err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- if (err)
+ if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))
goto out_failure;
- return ICE_XDP_REDIR;
+ ret = ICE_XDP_REDIR;
+ break;
default:
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
fallthrough;
@@ -581,8 +599,12 @@ out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
- return ICE_XDP_CONSUMED;
+ ret = ICE_XDP_CONSUMED;
}
+exit:
+ rx_buf->act = ret;
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ ice_set_rx_bufs_act(xdp, rx_ring, ret);
}
/**
@@ -605,6 +627,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
unsigned int queue_index = smp_processor_id();
struct ice_vsi *vsi = np->vsi;
struct ice_tx_ring *xdp_ring;
+ struct ice_tx_buf *tx_buf;
int nxmit = 0, i;
if (test_bit(ICE_VSI_DOWN, vsi->state))
@@ -627,16 +650,18 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdp_ring = vsi->xdp_rings[queue_index];
}
+ tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
- err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+ err = ice_xmit_xdp_ring(xdpf, xdp_ring);
if (err != ICE_XDP_TX)
break;
nxmit++;
}
+ tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
if (unlikely(flags & XDP_XMIT_FLUSH))
ice_xdp_ring_update_tail(xdp_ring);
@@ -706,7 +731,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
* buffers. Then bump tail at most one time. Grouping like this lets us avoid
* multiple tail writes per call.
*/
-bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count)
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
{
union ice_32b_rx_flex_desc *rx_desc;
u16 ntu = rx_ring->next_to_use;
@@ -783,7 +808,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
/**
* ice_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buf: buffer containing the page
- * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling ice_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
@@ -791,7 +815,7 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
* page freed
*/
static bool
-ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
+ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
@@ -802,7 +826,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
+ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
return false;
#else
#define ICE_LAST_OFFSET \
@@ -824,33 +848,44 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
}
/**
- * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
* @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp: xdp buff to place the data into
* @rx_buf: buffer containing page to add
- * @skb: sk_buff to place the data into
* @size: packet length from rx_desc
*
- * This function will add the data contained in rx_buf->page to the skb.
- * It will just attach the page as a frag to the skb.
- * The function will then update the page offset.
+ * This function will add the data contained in rx_buf->page to the xdp buf.
+ * It will just attach the page as a frag.
*/
-static void
-ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct sk_buff *skb, unsigned int size)
+static int
+ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ struct ice_rx_buf *rx_buf, const unsigned int size)
{
-#if (PAGE_SIZE >= 8192)
- unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
-#else
- unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
-#endif
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
if (!size)
- return;
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
- rx_buf->page_offset, size, truesize);
+ return 0;
+
+ if (!xdp_buff_has_frags(xdp)) {
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(xdp);
+ }
+
+ if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+ return -ENOMEM;
+ }
+
+ __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
+ rx_buf->page_offset, size);
+ sinfo->xdp_frags_size += size;
- /* page is being used so we must update the page offset */
- ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ if (page_is_pfmemalloc(rx_buf->page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
+ return 0;
}
/**
@@ -886,19 +921,18 @@ ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
* ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
* @rx_ring: Rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
- * @rx_buf_pgcnt: rx_buf page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct ice_rx_buf *
ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
- int *rx_buf_pgcnt)
+ const unsigned int ntc)
{
struct ice_rx_buf *rx_buf;
- rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
- *rx_buf_pgcnt =
+ rx_buf = &rx_ring->rx_buf[ntc];
+ rx_buf->pgcnt =
#if (PAGE_SIZE < 8192)
page_count(rx_buf->page);
#else
@@ -922,26 +956,25 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
/**
* ice_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
- * @rx_buf: Rx buffer to pull data from
* @xdp: xdp_buff pointing to the data
*
- * This function builds an skb around an existing Rx buffer, taking care
- * to set up the skb correctly and avoid any memcpy overhead.
+ * This function builds an skb around an existing XDP buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead. Driver has
+ * already combined frags (if any) to skb_shared_info.
*/
static struct sk_buff *
-ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct xdp_buff *xdp)
+ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
u8 metasize = xdp->data - xdp->data_meta;
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- SKB_DATA_ALIGN(xdp->data_end -
- xdp->data_hard_start);
-#endif
+ struct skb_shared_info *sinfo = NULL;
+ unsigned int nr_frags;
struct sk_buff *skb;
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
+
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
* likely have a consumer accessing first few bytes of meta
@@ -949,7 +982,7 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
*/
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
- skb = napi_build_skb(xdp->data_hard_start, truesize);
+ skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
@@ -964,8 +997,11 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
if (metasize)
skb_metadata_set(skb, metasize);
- /* buffer is used by skb, update page_offset */
- ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
@@ -981,24 +1017,30 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
* skb correctly.
*/
static struct sk_buff *
-ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct xdp_buff *xdp)
+ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
- unsigned int metasize = xdp->data - xdp->data_meta;
unsigned int size = xdp->data_end - xdp->data;
+ struct skb_shared_info *sinfo = NULL;
+ struct ice_rx_buf *rx_buf;
+ unsigned int nr_frags = 0;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- net_prefetch(xdp->data_meta);
+ net_prefetch(xdp->data);
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- ICE_RX_HDR_SIZE + metasize,
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
+ rx_buf = &rx_ring->rx_buf[rx_ring->first_desc];
skb_record_rx_queue(skb, rx_ring->q_index);
/* Determine available headroom for copy */
headlen = size;
@@ -1006,32 +1048,42 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
- ALIGN(headlen + metasize, sizeof(long)));
-
- if (metasize) {
- skb_metadata_set(skb, metasize);
- __skb_pull(skb, metasize);
- }
+ memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+ sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
if (size) {
-#if (PAGE_SIZE >= 8192)
- unsigned int truesize = SKB_DATA_ALIGN(size);
-#else
- unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
-#endif
+ /* besides adding here a partial frag, we are going to add
+ * frags from xdp_buff, make sure there is enough space for
+ * them
+ */
+ if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
skb_add_rx_frag(skb, 0, rx_buf->page,
- rx_buf->page_offset + headlen, size, truesize);
- /* buffer is used by skb, update page_offset */
- ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ rx_buf->page_offset + headlen, size,
+ xdp->frame_sz);
} else {
- /* buffer is unused, reset bias back to rx_buf; data was copied
- * onto skb's linear part so there's no need for adjusting
- * page offset and we can reuse this buffer as-is
+ /* buffer is unused, change the act that should be taken later
+ * on; data was copied onto skb's linear part so there's no
+ * need for adjusting page offset and we can reuse this buffer
+ * as-is
*/
- rx_buf->pagecnt_bias++;
+ rx_buf->act = ICE_SKB_CONSUMED;
+ }
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ struct skb_shared_info *skinfo = skb_shinfo(skb);
+
+ memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
+ sizeof(skb_frag_t) * nr_frags);
+
+ xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
}
return skb;
@@ -1041,26 +1093,17 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
* ice_put_rx_buf - Clean up used buffer and either recycle or free
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
- * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
*
- * This function will update next_to_clean and then clean up the contents
- * of the rx_buf. It will either recycle the buffer or unmap it and free
- * the associated resources.
+ * This function will clean up the contents of the rx_buf. It will either
+ * recycle the buffer or unmap it and free the associated resources.
*/
static void
-ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- int rx_buf_pgcnt)
+ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
- u16 ntc = rx_ring->next_to_clean + 1;
-
- /* fetch, update, and store next to clean */
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
if (!rx_buf)
return;
- if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
+ if (ice_can_reuse_rx_page(rx_buf)) {
/* hand second half of page back to the ring */
ice_reuse_rx_page(rx_ring, rx_buf);
} else {
@@ -1076,27 +1119,6 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
}
/**
- * ice_is_non_eop - process handling of non-EOP buffers
- * @rx_ring: Rx ring being processed
- * @rx_desc: Rx descriptor for current buffer
- *
- * If the buffer is an EOP buffer, this function exits returning false,
- * otherwise return true indicating that this is in fact a non-EOP buffer.
- */
-static bool
-ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
-{
- /* if we are the last buffer then there is nothing else to do */
-#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
- if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
- return false;
-
- rx_ring->ring_stats->rx_stats.non_eop_descs++;
-
- return true;
-}
-
-/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
@@ -1110,39 +1132,42 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *