summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-08-31 17:41:08 -0700
committerDavid S. Miller <davem@davemloft.net>2018-08-31 17:41:08 -0700
commitfd3c040b244b7aba6b18f7d12c87fd774b2257a8 (patch)
tree487567d6379c11e9c50b54791cc19081241c9e61
parentee713b6da510924f0965a1ea77e4e6219e3b8b5a (diff)
parent93ee30f3e8b412c5fc2d2f7d9d002529d9a209ad (diff)
downloadlinux-fd3c040b244b7aba6b18f7d12c87fd774b2257a8.tar.gz
linux-fd3c040b244b7aba6b18f7d12c87fd774b2257a8.tar.bz2
linux-fd3c040b244b7aba6b18f7d12c87fd774b2257a8.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-09-01 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add AF_XDP zero-copy support for i40e driver (!), from Björn and Magnus. 2) BPF verifier improvements by giving each register its own liveness chain which allows to simplify and getting rid of skip_callee() logic, from Edward. 3) Add bpf fs pretty print support for percpu arraymap, percpu hashmap and percpu lru hashmap. Also add generic percpu formatted print on bpftool so the same can be dumped there, from Yonghong. 4) Add bpf_{set,get}sockopt() helper support for TCP_SAVE_SYN and TCP_SAVED_SYN options to allow reflection of tos/tclass from received SYN packet, from Nikita. 5) Misc improvements to the BPF sockmap test cases in terms of cgroup v2 interaction and removal of incorrect shutdown() calls, from John. 6) Few cleanups in xdp_umem_assign_dev() and xdpsock samples, from Prashant. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/intel/i40e/Makefile3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h19
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c307
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c182
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h20
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx_common.h90
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c832
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.h25
-rw-r--r--include/linux/bpf_verifier.h8
-rw-r--r--include/linux/netdevice.h26
-rw-r--r--include/net/xdp.h6
-rw-r--r--include/net/xdp_sock.h51
-rw-r--r--kernel/bpf/arraymap.c24
-rw-r--r--kernel/bpf/hashtab.c31
-rw-r--r--kernel/bpf/syscall.c1
-rw-r--r--kernel/bpf/verifier.c216
-rw-r--r--net/core/filter.c25
-rw-r--r--net/core/xdp.c53
-rw-r--r--net/xdp/xdp_umem.c6
-rw-r--r--net/xdp/xdp_umem.h10
-rw-r--r--net/xdp/xdp_umem_props.h14
-rw-r--r--net/xdp/xsk.c34
-rw-r--r--net/xdp/xsk_queue.c5
-rw-r--r--net/xdp/xsk_queue.h13
-rw-r--r--samples/bpf/Makefile1
-rw-r--r--samples/bpf/tcp_tos_reflect_kern.c87
-rw-r--r--samples/bpf/xdpsock_kern.c2
-rw-r--r--samples/bpf/xdpsock_user.c15
-rw-r--r--tools/bpf/bpftool/map.c33
-rw-r--r--tools/testing/selftests/bpf/test_btf.c179
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c56
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c38
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c31
33 files changed, 2067 insertions, 376 deletions
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 14397e7e9925..50590e8d1fd1 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \
i40e_txrx.o \
i40e_ptp.o \
i40e_client.o \
- i40e_virtchnl_pf.o
+ i40e_virtchnl_pf.o \
+ i40e_xsk.o
i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 7a80652e2500..876cac317e79 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -786,6 +786,11 @@ struct i40e_vsi {
/* VSI specific handlers */
irqreturn_t (*irq_handler)(int irq, void *data);
+
+ /* AF_XDP zero-copy */
+ struct xdp_umem **xsk_umems;
+ u16 num_xsk_umems_used;
+ u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp;
struct i40e_netdev_priv {
@@ -1090,6 +1095,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
return !!vsi->xdp_prog;
}
+static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
+{
+ bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
+ int qid = ring->queue_index;
+
+ if (ring_is_xdp(ring))
+ qid -= ring->vsi->alloc_queue_pairs;
+
+ if (!ring->vsi->xsk_umems || !ring->vsi->xsk_umems[qid] || !xdp_on)
+ return NULL;
+
+ return ring->vsi->xsk_umems[qid];
+}
+
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 112245f32d7d..5d209d8fe9b8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -9,7 +9,9 @@
/* Local includes */
#include "i40e.h"
#include "i40e_diag.h"
+#include "i40e_xsk.h"
#include <net/udp_tunnel.h>
+#include <net/xdp_sock.h>
/* All i40e tracepoints are defined by the include below, which
* must be included exactly once across the whole kernel with
* CREATE_TRACE_POINTS defined
@@ -3074,6 +3076,9 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
i40e_status err = 0;
u32 qtx_ctl = 0;
+ if (ring_is_xdp(ring))
+ ring->xsk_umem = i40e_xsk_umem(ring);
+
/* some ATR related tx ring init */
if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
ring->atr_sample_rate = vsi->back->atr_sample_rate;
@@ -3183,13 +3188,46 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
struct i40e_hw *hw = &vsi->back->hw;
struct i40e_hmc_obj_rxq rx_ctx;
i40e_status err = 0;
+ bool ok;
+ int ret;
bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
/* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx));
- ring->rx_buf_len = vsi->rx_buf_len;
+ if (ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+ ring->xsk_umem = i40e_xsk_umem(ring);
+ if (ring->xsk_umem) {
+ ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+ XDP_PACKET_HEADROOM;
+ /* For AF_XDP ZC, we disallow packets to span on
+ * multiple buffers, thus letting us skip that
+ * handling in the fast-path.
+ */
+ chain_len = 1;
+ ring->zca.free = i40e_zca_free;
+ ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_ZERO_COPY,
+ &ring->zca);
+ if (ret)
+ return ret;
+ dev_info(&vsi->back->pdev->dev,
+ "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ ring->queue_index);
+
+ } else {
+ ring->rx_buf_len = vsi->rx_buf_len;
+ if (ring->vsi->type == I40E_VSI_MAIN) {
+ ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (ret)
+ return ret;
+ }
+ }
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -3245,7 +3283,15 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
- i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+ ok = ring->xsk_umem ?
+ i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
+ !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+ if (!ok) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+ ring->xsk_umem ? "UMEM enabled " : "",
+ ring->queue_index, pf_q);
+ }
return 0;
}
@@ -11851,6 +11897,256 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
}
/**
+ * i40e_enter_busy_conf - Enters busy config state
+ * @vsi: vsi
+ *
+ * Returns 0 on success, <0 for failure.
+ **/
+static int i40e_enter_busy_conf(struct i40e_vsi *vsi)
+{
+ struct i40e_pf *pf = vsi->back;
+ int timeout = 50;
+
+ while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+
+ return 0;
+}
+
+/**
+ * i40e_exit_busy_conf - Exits busy config state
+ * @vsi: vsi
+ **/
+static void i40e_exit_busy_conf(struct i40e_vsi *vsi)
+{
+ struct i40e_pf *pf = vsi->back;
+
+ clear_bit(__I40E_CONFIG_BUSY, pf->state);
+}
+
+/**
+ * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ **/
+static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
+{
+ memset(&vsi->rx_rings[queue_pair]->rx_stats, 0,
+ sizeof(vsi->rx_rings[queue_pair]->rx_stats));
+ memset(&vsi->tx_rings[queue_pair]->stats, 0,
+ sizeof(vsi->tx_rings[queue_pair]->stats));
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ memset(&vsi->xdp_rings[queue_pair]->stats, 0,
+ sizeof(vsi->xdp_rings[queue_pair]->stats));
+ }
+}
+
+/**
+ * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ **/
+static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
+{
+ i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
+ if (i40e_enabled_xdp_vsi(vsi))
+ i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
+ i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
+}
+
+/**
+ * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ * @enable: true for enable, false for disable
+ **/
+static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair,
+ bool enable)
+{
+ struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+ struct i40e_q_vector *q_vector = rxr->q_vector;
+
+ if (!vsi->netdev)
+ return;
+
+ /* All rings in a qp belong to the same qvector. */
+ if (q_vector->rx.ring || q_vector->tx.ring) {
+ if (enable)
+ napi_enable(&q_vector->napi);
+ else
+ napi_disable(&q_vector->napi);
+ }
+}
+
+/**
+ * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ * @enable: true for enable, false for disable
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair,
+ bool enable)
+{
+ struct i40e_pf *pf = vsi->back;
+ int pf_q, ret = 0;
+
+ pf_q = vsi->base_queue + queue_pair;
+ ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q,
+ false /*is xdp*/, enable);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "VSI seid %d Tx ring %d %sable timeout\n",
+ vsi->seid, pf_q, (enable ? "en" : "dis"));
+ return ret;
+ }
+
+ i40e_control_rx_q(pf, pf_q, enable);
+ ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "VSI seid %d Rx ring %d %sable timeout\n",
+ vsi->seid, pf_q, (enable ? "en" : "dis"));
+ return ret;
+ }
+
+ /* Due to HW errata, on Rx disable only, the register can
+ * indicate done before it really is. Needs 50ms to be sure
+ */
+ if (!enable)
+ mdelay(50);
+
+ if (!i40e_enabled_xdp_vsi(vsi))
+ return ret;
+
+ ret = i40e_control_wait_tx_q(vsi->seid, pf,
+ pf_q + vsi->alloc_queue_pairs,
+ true /*is xdp*/, enable);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "VSI seid %d XDP Tx ring %d %sable timeout\n",
+ vsi->seid, pf_q, (enable ? "en" : "dis"));
+ }
+
+ return ret;
+}
+
+/**
+ * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue_pair
+ **/
+static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
+{
+ struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+
+ /* All rings in a qp belong to the same qvector. */
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
+ else
+ i40e_irq_dynamic_enable_icr0(pf);
+
+ i40e_flush(hw);
+}
+
+/**
+ * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue_pair
+ **/
+static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
+{
+ struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+
+ /* For simplicity, instead of removing the qp interrupt causes
+ * from the interrupt linked list, we simply disable the interrupt, and
+ * leave the list intact.
+ *
+ * All rings in a qp belong to the same qvector.
+ */
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
+
+ wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
+ i40e_flush(hw);
+ synchronize_irq(pf->msix_entries[intpf].vector);
+ } else {
+ /* Legacy and MSI mode - this stops all interrupt handling */
+ wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+ wr32(hw, I40E_PFINT_DYN_CTL0, 0);
+ i40e_flush(hw);
+ synchronize_irq(pf->pdev->irq);
+ }
+}
+
+/**
+ * i40e_queue_pair_disable - Disables a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
+{
+ int err;
+
+ err = i40e_enter_busy_conf(vsi);
+ if (err)
+ return err;
+
+ i40e_queue_pair_disable_irq(vsi, queue_pair);
+ err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
+ i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
+ i40e_queue_pair_clean_rings(vsi, queue_pair);
+ i40e_queue_pair_reset_stats(vsi, queue_pair);
+
+ return err;
+}
+
+/**
+ * i40e_queue_pair_enable - Enables a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
+{
+ int err;
+
+ err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]);
+ if (err)
+ return err;
+
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]);
+ if (err)
+ return err;
+ }
+
+ err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]);
+ if (err)
+ return err;
+
+ err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */);
+ i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */);
+ i40e_queue_pair_enable_irq(vsi, queue_pair);
+
+ i40e_exit_busy_conf(vsi);
+
+ return err;
+}
+
+/**
* i40e_xdp - implements ndo_bpf for i40e
* @dev: netdevice
* @xdp: XDP command
@@ -11870,6 +12166,12 @@ static int i40e_xdp(struct net_device *dev,
case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0;
+ case XDP_QUERY_XSK_UMEM:
+ return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
+ xdp->xsk.queue_id);
+ case XDP_SETUP_XSK_UMEM:
+ return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
@@ -11909,6 +12211,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
.ndo_bpf = i40e_xdp,
.ndo_xdp_xmit = i40e_xdp_xmit,
+ .ndo_xsk_async_xmit = i40e_xsk_async_xmit,
};
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b5042d1a63c0..37bd4e50ccde 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -8,16 +8,8 @@
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
-
-static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
- u32 td_tag)
-{
- return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
- ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
- ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
- ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
- ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
-}
+#include "i40e_txrx_common.h"
+#include "i40e_xsk.h"
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
/**
@@ -536,8 +528,8 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
* This is used to verify if the FD programming or invalidation
* requested by SW to the HW is successful or not and take actions accordingly.
**/
-static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc, u8 prog_id)
+void i40e_fd_handle_status(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc, u8 prog_id)
{
struct i40e_pf *pf = rx_ring->vsi->back;
struct pci_dev *pdev = pf->pdev;
@@ -767,8 +759,6 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
}
}
-#define WB_STRIDE 4
-
/**
* i40e_clean_tx_irq - Reclaim resources after transmit completes
* @vsi: the VSI we care about
@@ -873,27 +863,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
i += tx_ring->count;
tx_ring->next_to_clean = i;
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.packets += total_packets;
- u64_stats_update_end(&tx_ring->syncp);
- tx_ring->q_vector->tx.total_bytes += total_bytes;
- tx_ring->q_vector->tx.total_packets += total_packets;
-
- if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
- /* check to see if there are < 4 descriptors
- * waiting to be written back, then kick the hardware to force
- * them to be written back in case we stay in NAPI.
- * In this mode on X722 we do not enable Interrupt.
- */
- unsigned int j = i40e_get_tx_pending(tx_ring, false);
-
- if (budget &&
- ((j / WB_STRIDE) == 0) && (j > 0) &&
- !test_bit(__I40E_VSI_DOWN, vsi->state) &&
- (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
- tx_ring->arm_wb = true;
- }
+ i40e_update_tx_stats(tx_ring, total_packets, total_bytes);
+ i40e_arm_wb(tx_ring, vsi, budget);
if (ring_is_xdp(tx_ring))
return !!budget;
@@ -1244,6 +1215,11 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
new_buff->page = old_buff->page;
new_buff->page_offset = old_buff->page_offset;
new_buff->pagecnt_bias = old_buff->pagecnt_bias;
+
+ rx_ring->rx_stats.page_reuse_count++;
+
+ /* clear contents of buffer_info */
+ old_buff->page = NULL;
}
/**
@@ -1266,7 +1242,7 @@ static inline bool i40e_rx_is_programming_status(u64 qw)
}
/**
- * i40e_clean_programming_status - clean the programming status descriptor
+ * i40e_clean_programming_status - try clean the programming status descriptor
* @rx_ring: the rx ring that has this descriptor
* @rx_desc: the rx descriptor written back by HW
* @qw: qword representing status_error_len in CPU ordering
@@ -1275,15 +1251,22 @@ static inline bool i40e_rx_is_programming_status(u64 qw)
* status being successful or not and take actions accordingly. FCoE should
* handle its context/filter programming/invalidation status and take actions.
*
+ * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
**/
-static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc,
- u64 qw)
+struct i40e_rx_buffer *i40e_clean_programming_status(
+ struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc,
+ u64 qw)
{
struct i40e_rx_buffer *rx_buffer;
- u32 ntc = rx_ring->next_to_clean;
+ u32 ntc;
u8 id;
+ if (!i40e_rx_is_programming_status(qw))
+ return NULL;
+
+ ntc = rx_ring->next_to_clean;
+
/* fetch, update, and store next to clean */
rx_buffer = &rx_ring->rx_bi[ntc++];
ntc = (ntc < rx_ring->count) ? ntc : 0;
@@ -1291,18 +1274,13 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
prefetch(I40E_RX_DESC(rx_ring, ntc));
- /* place unused page back on the ring */
- i40e_reuse_rx_page(rx_ring, rx_buffer);
- rx_ring->rx_stats.page_reuse_count++;
-
- /* clear contents of buffer_info */
- rx_buffer->page = NULL;
-
id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
i40e_fd_handle_status(rx_ring, rx_desc, id);
+
+ return rx_buffer;
}
/**
@@ -1372,6 +1350,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->skb = NULL;
}
+ if (rx_ring->xsk_umem)
+ goto skip_free;
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
@@ -1400,6 +1381,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_bi->page_offset = 0;
}
+skip_free:
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
memset(rx_ring->rx_bi, 0, bi_size);
@@ -1492,7 +1474,7 @@ err:
* @rx_ring: ring to bump
* @val: new head index
**/
-static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
{
rx_ring->next_to_use = val;
@@ -1576,8 +1558,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
* @skb: packet to send up
* @vlan_tag: vlan tag for packet
**/
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
- struct sk_buff *skb, u16 vlan_tag)
+void i40e_receive_skb(struct i40e_ring *rx_ring,
+ struct sk_buff *skb, u16 vlan_tag)
{
struct i40e_q_vector *q_vector = rx_ring->q_vector;
@@ -1804,7 +1786,6 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
* order to populate the hash, checksum, VLAN, protocol, and
* other fields within the skb.
**/
-static inline
void i40e_process_skb_fields(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, struct sk_buff *skb,
u8 rx_ptype)
@@ -2152,7 +2133,6 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
if (i40e_can_reuse_rx_page(rx_buffer)) {
/* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
- rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
@@ -2160,10 +2140,9 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
}
-
- /* clear contents of buffer_info */
- rx_buffer->page = NULL;
}
/**
@@ -2199,16 +2178,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
return true;
}
-#define I40E_XDP_PASS 0
-#define I40E_XDP_CONSUMED BIT(0)
-#define I40E_XDP_TX BIT(1)
-#define I40E_XDP_REDIR BIT(2)
-
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
struct i40e_ring *xdp_ring);
-static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
- struct i40e_ring *xdp_ring)
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
{
struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
@@ -2287,7 +2260,13 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
#endif
}
-static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
+/**
+ * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ **/
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
{
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
@@ -2297,6 +2276,48 @@ static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
}
/**
+ * i40e_update_rx_stats - Update Rx ring statistics
+ * @rx_ring: rx descriptor ring
+ * @total_rx_bytes: number of bytes received
+ * @total_rx_packets: number of packets received
+ *
+ * This function updates the Rx ring statistics.
+ **/
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
+ unsigned int total_rx_bytes,
+ unsigned int total_rx_packets)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->stats.packets += total_rx_packets;
+ rx_ring->stats.bytes += total_rx_bytes;
+ u64_stats_update_end(&rx_ring->syncp);
+ rx_ring->q_vector->rx.total_packets += total_rx_packets;
+ rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+}
+
+/**
+ * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ **/
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
+{
+ if (xdp_res & I40E_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & I40E_XDP_TX) {
+ struct i40e_ring *xdp_ring =
+ rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+
+ i40e_xdp_ring_update_tail(xdp_ring);
+ }
+}
+
+/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
@@ -2349,11 +2370,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
*/
dma_rmb();
- if (unlikely(i40e_rx_is_programming_status(qword))) {
- i40e_clean_programming_status(rx_ring, rx_desc, qword);
+ rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc,
+ qword);
+ if (unlikely(rx_buffer)) {
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
}
+
size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
if (!size)
@@ -2432,24 +2456,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
total_rx_packets++;
}
- if (xdp_xmit & I40E_XDP_REDIR)
- xdp_do_flush_map();
-
- if (xdp_xmit & I40E_XDP_TX) {
- struct i40e_ring *xdp_ring =
- rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-
- i40e_xdp_ring_update_tail(xdp_ring);
- }
-
+ i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
rx_ring->skb = skb;
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.packets += total_rx_packets;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_packets += total_rx_packets;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_packets;
@@ -2587,7 +2597,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
i40e_for_each_ring(ring, q_vector->tx) {
- if (!i40e_clean_tx_irq(vsi, ring, budget)) {
+ bool wd = ring->xsk_umem ?
+ i40e_clean_xdp_tx_irq(vsi, ring, budget) :
+ i40e_clean_tx_irq(vsi, ring, budget);
+
+ if (!wd) {
clean_complete = false;