summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2023-02-01 23:30:27 +0100
committerDaniel Borkmann <daniel@iogearbox.net>2023-02-01 23:31:28 +0100
commit10d1b0e4dacccd617c502a926cd6299a19d40b65 (patch)
treeeacf16c8d895b89e36fba3dcdd0b241b9bd83494
parentc1a3daf7363b48c6c4b86aee2efa2287f695f649 (diff)
parenta24b4c6e9aab4f39982d40cfeb7c142e93310f8b (diff)
downloadlinux-10d1b0e4dacccd617c502a926cd6299a19d40b65.tar.gz
linux-10d1b0e4dacccd617c502a926cd6299a19d40b65.tar.bz2
linux-10d1b0e4dacccd617c502a926cd6299a19d40b65.zip
Merge branch 'xdp-ice-mbuf'
Maciej Fijalkowski says: ==================== Although this work started as an effort to add multi-buffer XDP support to ice driver, as usual it turned out that some other side stuff needed to be addressed, so let me give you an overview. First patch adjusts legacy-rx in a way that it will be possible to refer to skb_shared_info being at the end of the buffer when gathering up frame fragments within xdp_buff. Then, patches 2-9 prepare ice driver in a way that actual multi-buffer patches will be easier to swallow. 10 and 11 are the meat. What is worth mentioning is that this set actually *fixes* things as patch 11 removes the logic based on next_dd/rs and we previously stepped away from this for ice_xmit_zc(). Currently, AF_XDP ZC XDP_TX workload is off as there are two cleaning sides that can be triggered and two of them work on different internal logic. This set unifies that and allows us to improve the performance by 2x with a trick on the last (13) patch. 12th is a simple cleanup of no longer fields from Tx ring. I might be wrong but I have not seen anyone reporting performance impact among patches that add XDP multi-buffer support to a particular driver. Numbers below were gathered via xdp_rxq_info and xdp_redirect_map on 1500 MTU: XDP_DROP +1% XDP_PASS -1,2% XDP_TX -0,5% XDP_REDIRECT -3,3% Cherry on top, which is not directly related to mbuf support (last patch): XDP_TX ZC +126% Target the we agreed on was to not degrade performance for any action by anything that would be over 5%, so our goal was met. Basically this set keeps the performance where it was. Redirect is slower due to more frequent tail bumps. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c47
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c408
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h54
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c236
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h75
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c192
9 files changed, 629 insertions, 416 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 554095b25f44..1911d644dfa8 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -355,9 +355,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
{
if (ice_ring_uses_build_skb(rx_ring))
return ICE_SKB_PAD;
- else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
- return XDP_PACKET_HEADROOM;
-
return 0;
}
@@ -495,7 +492,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
- u16 num_bufs = ICE_DESC_UNUSED(ring);
+ u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
int err;
ring->rx_buf_len = ring->vsi->rx_buf_len;
@@ -503,8 +500,10 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (ring->vsi->type == ICE_VSI_PF) {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
- xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->q_index, ring->q_vector->napi.napi_id);
+ __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->vsi->rx_buf_len);
ring->xsk_pool = ice_xsk_pool(ring);
if (ring->xsk_pool) {
@@ -524,9 +523,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
} else {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
- xdp_rxq_info_reg(&ring->xdp_rxq,
- ring->netdev,
- ring->q_index, ring->q_vector->napi.napi_id);
+ __xdp_rxq_info_reg(&ring->xdp_rxq,
+ ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->vsi->rx_buf_len);
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
@@ -536,6 +537,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
}
}
+ xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+ ring->xdp.data = NULL;
err = ice_setup_rx_ctx(ring);
if (err) {
dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 5b71d40a7dc0..1adbcb4786c2 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3046,8 +3046,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
- xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
- xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
@@ -3092,7 +3090,7 @@ process_rx:
/* allocate Rx buffers */
err = ice_alloc_rx_bufs(&rx_rings[i],
- ICE_DESC_UNUSED(&rx_rings[i]));
+ ICE_RX_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
if (err) {
while (i) {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 301c1efe08b4..4a35479bd7dd 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1992,8 +1992,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
{
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
- vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
- vsi->rx_buf_len = ICE_RXBUF_2048;
+ vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
+ vsi->rx_buf_len = ICE_RXBUF_1664;
#if (PAGE_SIZE < 8192)
} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
@@ -2002,11 +2002,7 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
#endif
} else {
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
-#if (PAGE_SIZE < 8192)
vsi->rx_buf_len = ICE_RXBUF_3072;
-#else
- vsi->rx_buf_len = ICE_RXBUF_2048;
-#endif
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index fce86e8ff834..26a8910a41ff 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2570,8 +2570,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->netdev = NULL;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
- xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
- xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
@@ -2863,6 +2861,18 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
}
/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+ if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ return ICE_RXBUF_1664;
+ else
+ return ICE_RXBUF_3072;
+}
+
+/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
* @prog: XDP program
@@ -2872,13 +2882,16 @@ static int
ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
- int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
int ret = 0, xdp_ring_err = 0;
- if (frame_size > vsi->rx_buf_len) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
- return -EOPNOTSUPP;
+ if (prog && !prog->aux->xdp_has_frags) {
+ if (frame_size > ice_max_xdp_frame_size(vsi)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "MTU is too large for linear frames and XDP prog does not support frags");
+ return -EOPNOTSUPP;
+ }
}
/* need to stop netdev while setting up the program for Rx rings */
@@ -7331,18 +7344,6 @@ clear_recovery:
}
/**
- * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
- * @vsi: Pointer to VSI structure
- */
-static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
-{
- if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
- return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
- else
- return ICE_RXBUF_3072;
-}
-
-/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
@@ -7354,6 +7355,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct bpf_prog *prog;
u8 count = 0;
int err = 0;
@@ -7362,7 +7364,8 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
- if (ice_is_xdp_ena_vsi(vsi)) {
+ prog = vsi->xdp_prog;
+ if (prog && !prog->aux->xdp_has_frags) {
int frame_size = ice_max_xdp_frame_size(vsi);
if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
@@ -7370,6 +7373,12 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
frame_size - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
+ } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
+ if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
+ netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n",
+ ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
+ return -EINVAL;
+ }
}
/* if a reset is in progress, wait for some time for it to complete */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ccf09c957a1c..466113c86e6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -113,12 +113,16 @@ static void
ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
- if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+ if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) {
devm_kfree(ring->dev, tx_buf->raw_buf);
- else if (ice_ring_is_xdp(ring))
- page_frag_free(tx_buf->raw_buf);
- else
+ } else if (ice_ring_is_xdp(ring)) {
+ if (ring->xsk_pool)
+ xsk_buff_free(tx_buf->xdp);
+ else
+ page_frag_free(tx_buf->raw_buf);
+ } else {
dev_kfree_skb_any(tx_buf->skb);
+ }
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
@@ -174,8 +178,6 @@ tx_skip_free:
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
- tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
- tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
if (!tx_ring->netdev)
return;
@@ -382,6 +384,7 @@ err:
*/
void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
{
+ struct xdp_buff *xdp = &rx_ring->xdp;
struct device *dev = rx_ring->dev;
u32 size;
u16 i;
@@ -390,16 +393,16 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
- if (rx_ring->skb) {
- dev_kfree_skb(rx_ring->skb);
- rx_ring->skb = NULL;
- }
-
if (rx_ring->xsk_pool) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}
+ if (xdp->data) {
+ xdp_return_buff(xdp);
+ xdp->data = NULL;
+ }
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -437,6 +440,7 @@ rx_skip_free:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
+ rx_ring->first_desc = 0;
rx_ring->next_to_use = 0;
}
@@ -506,6 +510,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
+ rx_ring->first_desc = 0;
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
@@ -523,8 +528,16 @@ err:
return -ENOMEM;
}
+/**
+ * ice_rx_frame_truesize
+ * @rx_ring: ptr to Rx ring
+ * @size: size
+ *
+ * calculate the truesize with taking into the account PAGE_SIZE of
+ * underlying arch
+ */
static unsigned int
-ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size)
+ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
{
unsigned int truesize;
@@ -545,34 +558,39 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused s
* @xdp: xdp_buff used as input to the XDP program
* @xdp_prog: XDP program to run
* @xdp_ring: ring to be used for XDP_TX action
+ * @rx_buf: Rx buffer to store the XDP action
*
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
-static int
+static void
ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+ struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+ struct ice_rx_buf *rx_buf)
{
- int err;
+ unsigned int ret = ICE_XDP_PASS;
u32 act;
+ if (!xdp_prog)
+ goto exit;
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
- return ICE_XDP_PASS;
+ break;
case XDP_TX:
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_lock(&xdp_ring->tx_lock);
- err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring);
+ ret = __ice_xmit_xdp_ring(xdp, xdp_ring);
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_unlock(&xdp_ring->tx_lock);
- if (err == ICE_XDP_CONSUMED)
+ if (ret == ICE_XDP_CONSUMED)
goto out_failure;
- return err;
+ break;
case XDP_REDIRECT:
- err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- if (err)
+ if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))
goto out_failure;
- return ICE_XDP_REDIR;
+ ret = ICE_XDP_REDIR;
+ break;
default:
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
fallthrough;
@@ -581,8 +599,12 @@ out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
- return ICE_XDP_CONSUMED;
+ ret = ICE_XDP_CONSUMED;
}
+exit:
+ rx_buf->act = ret;
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ ice_set_rx_bufs_act(xdp, rx_ring, ret);
}
/**
@@ -605,6 +627,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
unsigned int queue_index = smp_processor_id();
struct ice_vsi *vsi = np->vsi;
struct ice_tx_ring *xdp_ring;
+ struct ice_tx_buf *tx_buf;
int nxmit = 0, i;
if (test_bit(ICE_VSI_DOWN, vsi->state))
@@ -627,16 +650,18 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdp_ring = vsi->xdp_rings[queue_index];
}
+ tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
- err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+ err = ice_xmit_xdp_ring(xdpf, xdp_ring);
if (err != ICE_XDP_TX)
break;
nxmit++;
}
+ tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
if (unlikely(flags & XDP_XMIT_FLUSH))
ice_xdp_ring_update_tail(xdp_ring);
@@ -706,7 +731,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
* buffers. Then bump tail at most one time. Grouping like this lets us avoid
* multiple tail writes per call.
*/
-bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count)
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
{
union ice_32b_rx_flex_desc *rx_desc;
u16 ntu = rx_ring->next_to_use;
@@ -783,7 +808,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
/**
* ice_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buf: buffer containing the page
- * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling ice_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
@@ -791,7 +815,7 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
* page freed
*/
static bool
-ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
+ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
@@ -802,7 +826,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
+ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
return false;
#else
#define ICE_LAST_OFFSET \
@@ -824,33 +848,44 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
}
/**
- * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
* @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp: xdp buff to place the data into
* @rx_buf: buffer containing page to add
- * @skb: sk_buff to place the data into
* @size: packet length from rx_desc
*
- * This function will add the data contained in rx_buf->page to the skb.
- * It will just attach the page as a frag to the skb.
- * The function will then update the page offset.
+ * This function will add the data contained in rx_buf->page to the xdp buf.
+ * It will just attach the page as a frag.
*/
-static void
-ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct sk_buff *skb, unsigned int size)
+static int
+ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ struct ice_rx_buf *rx_buf, const unsigned int size)
{
-#if (PAGE_SIZE >= 8192)
- unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
-#else
- unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
-#endif
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
if (!size)
- return;
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
- rx_buf->page_offset, size, truesize);
+ return 0;
+
+ if (!xdp_buff_has_frags(xdp)) {
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(xdp);
+ }
+
+ if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+ return -ENOMEM;
+ }
+
+ __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
+ rx_buf->page_offset, size);
+ sinfo->xdp_frags_size += size;
- /* page is being used so we must update the page offset */
- ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ if (page_is_pfmemalloc(rx_buf->page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
+ return 0;
}
/**
@@ -886,19 +921,18 @@ ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
* ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
* @rx_ring: Rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
- * @rx_buf_pgcnt: rx_buf page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct ice_rx_buf *
ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
- int *rx_buf_pgcnt)
+ const unsigned int ntc)
{
struct ice_rx_buf *rx_buf;
- rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
- *rx_buf_pgcnt =
+ rx_buf = &rx_ring->rx_buf[ntc];
+ rx_buf->pgcnt =
#if (PAGE_SIZE < 8192)
page_count(rx_buf->page);
#else
@@ -922,26 +956,25 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
/**
* ice_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
- * @rx_buf: Rx buffer to pull data from
* @xdp: xdp_buff pointing to the data
*
- * This function builds an skb around an existing Rx buffer, taking care
- * to set up the skb correctly and avoid any memcpy overhead.
+ * This function builds an skb around an existing XDP buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead. Driver has
+ * already combined frags (if any) to skb_shared_info.
*/
static struct sk_buff *
-ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct xdp_buff *xdp)
+ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
u8 metasize = xdp->data - xdp->data_meta;
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- SKB_DATA_ALIGN(xdp->data_end -
- xdp->data_hard_start);
-#endif
+ struct skb_shared_info *sinfo = NULL;
+ unsigned int nr_frags;
struct sk_buff *skb;
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
+
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
* likely have a consumer accessing first few bytes of meta
@@ -949,7 +982,7 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
*/
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
- skb = napi_build_skb(xdp->data_hard_start, truesize);
+ skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
@@ -964,8 +997,11 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
if (metasize)
skb_metadata_set(skb, metasize);
- /* buffer is used by skb, update page_offset */
- ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
@@ -981,24 +1017,30 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
* skb correctly.
*/
static struct sk_buff *
-ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct xdp_buff *xdp)
+ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
- unsigned int metasize = xdp->data - xdp->data_meta;
unsigned int size = xdp->data_end - xdp->data;
+ struct skb_shared_info *sinfo = NULL;
+ struct ice_rx_buf *rx_buf;
+ unsigned int nr_frags = 0;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- net_prefetch(xdp->data_meta);
+ net_prefetch(xdp->data);
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- ICE_RX_HDR_SIZE + metasize,
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
+ rx_buf = &rx_ring->rx_buf[rx_ring->first_desc];
skb_record_rx_queue(skb, rx_ring->q_index);
/* Determine available headroom for copy */
headlen = size;
@@ -1006,32 +1048,42 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
- ALIGN(headlen + metasize, sizeof(long)));
-
- if (metasize) {
- skb_metadata_set(skb, metasize);
- __skb_pull(skb, metasize);
- }
+ memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+ sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
if (size) {
-#if (PAGE_SIZE >= 8192)
- unsigned int truesize = SKB_DATA_ALIGN(size);
-#else
- unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
-#endif
+ /* besides adding here a partial frag, we are going to add
+ * frags from xdp_buff, make sure there is enough space for
+ * them
+ */
+ if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
skb_add_rx_frag(skb, 0, rx_buf->page,
- rx_buf->page_offset + headlen, size, truesize);
- /* buffer is used by skb, update page_offset */
- ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ rx_buf->page_offset + headlen, size,
+ xdp->frame_sz);
} else {
- /* buffer is unused, reset bias back to rx_buf; data was copied
- * onto skb's linear part so there's no need for adjusting
- * page offset and we can reuse this buffer as-is
+ /* buffer is unused, change the act that should be taken later
+ * on; data was copied onto skb's linear part so there's no
+ * need for adjusting page offset and we can reuse this buffer
+ * as-is
*/
- rx_buf->pagecnt_bias++;
+ rx_buf->act = ICE_SKB_CONSUMED;
+ }
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ struct skb_shared_info *skinfo = skb_shinfo(skb);
+
+ memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
+ sizeof(skb_frag_t) * nr_frags);
+
+ xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
}
return skb;
@@ -1041,26 +1093,17 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
* ice_put_rx_buf - Clean up used buffer and either recycle or free
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
- * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
*
- * This function will update next_to_clean and then clean up the contents
- * of the rx_buf. It will either recycle the buffer or unmap it and free
- * the associated resources.
+ * This function will clean up the contents of the rx_buf. It will either
+ * recycle the buffer or unmap it and free the associated resources.
*/
static void
-ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
- int rx_buf_pgcnt)
+ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
- u16 ntc = rx_ring->next_to_clean + 1;
-
- /* fetch, update, and store next to clean */
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
if (!rx_buf)
return;
- if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
+ if (ice_can_reuse_rx_page(rx_buf)) {
/* hand second half of page back to the ring */
ice_reuse_rx_page(rx_ring, rx_buf);
} else {
@@ -1076,27 +1119,6 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
}
/**
- * ice_is_non_eop - process handling of non-EOP buffers
- * @rx_ring: Rx ring being processed
- * @rx_desc: Rx descriptor for current buffer
- *
- * If the buffer is an EOP buffer, this function exits returning false,
- * otherwise return true indicating that this is in fact a non-EOP buffer.
- */
-static bool
-ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
-{
- /* if we are the last buffer then there is nothing else to do */
-#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
- if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
- return false;
-
- rx_ring->ring_stats->rx_stats.non_eop_descs++;
-
- return true;
-}
-
-/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
@@ -1110,39 +1132,42 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
*/
int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
{
- unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0;
- u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
unsigned int offset = rx_ring->rx_offset;
+ struct xdp_buff *xdp = &rx_ring->xdp;
struct ice_tx_ring *xdp_ring = NULL;
- unsigned int xdp_res, xdp_xmit = 0;
- struct sk_buff *skb = rx_ring->skb;
struct bpf_prog *xdp_prog = NULL;
- struct xdp_buff xdp;
+ u32 ntc = rx_ring->next_to_clean;
+ u32 cnt = rx_ring->count;
+ u32 cached_ntc = ntc;
+ u32 xdp_xmit = 0;
+ u32 cached_ntu;
bool failure;
+ u32 first;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
- frame_sz = ice_rx_frame_truesize(rx_ring, 0);
+ xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0);
#endif
- xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
- if (xdp_prog)
+ if (xdp_prog) {
xdp_ring = rx_ring->xdp_ring;
+ cached_ntu = xdp_ring->next_to_use;
+ }
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
struct ice_rx_buf *rx_buf;
- unsigned char *hard_start;
+ struct sk_buff *skb;
unsigned int size;
u16 stat_err_bits;
- int rx_buf_pgcnt;
u16 vlan_tag = 0;
u16 rx_ptype;
/* get the Rx desc from Rx ring based on 'next_to_clean' */
- rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+ rx_desc = ICE_RX_DESC(rx_ring, ntc);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
@@ -1166,8 +1191,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
ctrl_vsi->vf)
ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
- ice_put_rx_buf(rx_ring, NULL, 0);
- cleaned_count++;
+ if (++ntc == cnt)
+ ntc = 0;
continue;
}
@@ -1175,65 +1200,56 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
ICE_RX_FLX_DESC_PKT_LEN_M;
/* retrieve a buffer from the ring */
- rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt);
+ rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
- if (!size) {
- xdp.data = NULL;
- xdp.data_end = NULL;
- xdp.data_hard_start = NULL;
- xdp.data_meta = NULL;
- goto construct_skb;
- }
+ if (!xdp->data) {
+ void *hard_start;
- hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
- offset;
- xdp_prepare_buff(&xdp, hard_start, offset, size, true);
+ hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
+ offset;
+ xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
#if (PAGE_SIZE > 4096)
- /* At larger PAGE_SIZE, frame_sz depend on len size */
- xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
+ /* At larger PAGE_SIZE, frame_sz depend on len size */
+ xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size);
#endif
+ xdp_buff_clear_frags_flag(xdp);
+ } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
+ break;
+ }
+ if (++ntc == cnt)
+ ntc = 0;
- if (!xdp_prog)
- goto construct_skb;
+ /* skip if it is NOP desc */
+ if (ice_is_non_eop(rx_ring, rx_desc))
+ continue;
- xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring);
- if (!xdp_res)
+ ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
+ if (rx_buf->act == ICE_XDP_PASS)
goto construct_skb;
- if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
- xdp_xmit |= xdp_res;
- ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
- } else {
- rx_buf->pagecnt_bias++;
- }
- total_rx_bytes += size;
+ total_rx_bytes += xdp_get_buff_len(xdp);
total_rx_pkts++;
- cleaned_count++;
- ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
+ xdp->data = NULL;
+ rx_ring->first_desc = ntc;
continue;
construct_skb:
- if (skb) {
- ice_add_rx_frag(rx_ring, rx_buf, skb, size);
- } else if (likely(xdp.data)) {
- if (ice_ring_uses_build_skb(rx_ring))
- skb = ice_build_skb(rx_ring, rx_buf, &xdp);
- else
- skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
- }
+ if (likely(ice_ring_uses_build_skb(rx_ring)))
+ skb = ice_build_skb(rx_ring, xdp);
+ else
+ skb = ice_construct_skb(rx_ring, xdp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
- rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
- if (rx_buf)
- rx_buf->pagecnt_bias++;
+ rx_ring->ring_stats->rx_stats.alloc_page_failed++;
+ rx_buf->act = ICE_XDP_CONSUMED;
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ ice_set_rx_bufs_act(xdp, rx_ring,
+ ICE_XDP_CONSUMED);
+ xdp->data = NULL;
+ rx_ring->first_desc = ntc;
break;
}
-
- ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
- cleaned_count++;
-
- /* skip if it is NOP desc */
- if (ice_is_non_eop(rx_ring, rx_desc))
- continue;
+ xdp->data = NULL;
+ rx_ring->first_desc = ntc;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
@@ -1245,10 +1261,8 @@ construct_sk