summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c46
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c79
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c68
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h30
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c489
7 files changed, 596 insertions, 121 deletions
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 67f304289fd9..2b311382167a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -154,6 +154,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
ixgbe_link_speed speed);
+#define IXGBE_FAILED_READ_RETRIES 5
#define IXGBE_FAILED_READ_REG 0xffffffffU
#define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU
#define IXGBE_FAILED_READ_CFG_WORD 0xffffU
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index f2254528dcfc..68af127987bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -774,11 +774,7 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
- itd->flags = 0;
if (xs->id.proto == IPPROTO_ESP) {
- struct sk_buff *skb = first->skb;
- int ret, authlen, trailerlen;
- u8 padlen;
itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
IXGBE_ADVTXD_TUCMD_L4T_TCP;
@@ -790,19 +786,28 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
* padlen bytes of padding. This ends up not the same
* as the static value found in xs->props.trailer_len (21).
*
- * The "correct" way to get the auth length would be to use
- * authlen = crypto_aead_authsize(xs->data);
- * but since we know we only have one size to worry about
- * we can let the compiler use the constant and save us a
- * few CPU cycles.
+ * ... but if we're doing GSO, don't bother as the stack
+ * doesn't add a trailer for those.
*/
- authlen = IXGBE_IPSEC_AUTH_BITS / 8;
-
- ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1);
- if (unlikely(ret))
- return 0;
- trailerlen = authlen + 2 + padlen;
- itd->trailer_len = trailerlen;
+ if (!skb_is_gso(first->skb)) {
+ /* The "correct" way to get the auth length would be
+ * to use
+ * authlen = crypto_aead_authsize(xs->data);
+ * but since we know we only have one size to worry
+ * about * we can let the compiler use the constant
+ * and save us a few CPU cycles.
+ */
+ const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+ struct sk_buff *skb = first->skb;
+ u8 padlen;
+ int ret;
+
+ ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+ &padlen, 1);
+ if (unlikely(ret))
+ return 0;
+ itd->trailer_len = authlen + 2 + padlen;
+ }
}
if (tsa->encrypt)
itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
@@ -924,8 +929,13 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
ixgbe_ipsec_clear_hw_tables(adapter);
adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
- adapter->netdev->features |= NETIF_F_HW_ESP;
- adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+#define IXGBE_ESP_FEATURES (NETIF_F_HW_ESP | \
+ NETIF_F_HW_ESP_TX_CSUM | \
+ NETIF_F_GSO_ESP)
+
+ adapter->netdev->features |= IXGBE_ESP_FEATURES;
+ adapter->netdev->hw_enc_features |= IXGBE_ESP_FEATURES;
return;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 85369423452d..afadba99f7b8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -353,23 +353,32 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
ixgbe_service_event_schedule(adapter);
}
-static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
+static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
{
+ u8 __iomem *reg_addr;
u32 value;
+ int i;
+
+ reg_addr = READ_ONCE(hw->hw_addr);
+ if (ixgbe_removed(reg_addr))
+ return IXGBE_FAILED_READ_REG;
- /* The following check not only optimizes a bit by not
- * performing a read on the status register when the
- * register just read was a status register read that
- * returned IXGBE_FAILED_READ_REG. It also blocks any
- * potential recursion.
+ /* Register read of 0xFFFFFFF can indicate the adapter has been removed,
+ * so perform several status register reads to determine if the adapter
+ * has been removed.
*/
- if (reg == IXGBE_STATUS) {
- ixgbe_remove_adapter(hw);
- return;
+ for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
+ value = readl(reg_addr + IXGBE_STATUS);
+ if (value != IXGBE_FAILED_READ_REG)
+ break;
+ mdelay(3);
}
- value = ixgbe_read_reg(hw, IXGBE_STATUS);
+
if (value == IXGBE_FAILED_READ_REG)
ixgbe_remove_adapter(hw);
+ else
+ value = readl(reg_addr + reg);
+ return value;
}
/**
@@ -415,7 +424,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
writes_completed:
value = readl(reg_addr + reg);
if (unlikely(value == IXGBE_FAILED_READ_REG))
- ixgbe_check_remove(hw, reg);
+ value = ixgbe_check_remove(hw, reg);
return value;
}
@@ -1620,7 +1629,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
bi->dma = dma;
bi->page = page;
bi->page_offset = ixgbe_rx_offset(rx_ring);
- bi->pagecnt_bias = 1;
+ page_ref_add(page, USHRT_MAX - 1);
+ bi->pagecnt_bias = USHRT_MAX;
rx_ring->rx_stats.alloc_rx_page++;
return true;
@@ -2030,8 +2040,8 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
- if (unlikely(!pagecnt_bias)) {
- page_ref_add(page, USHRT_MAX);
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX - 1);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
@@ -7721,7 +7731,8 @@ static void ixgbe_service_task(struct work_struct *work)
static int ixgbe_tso(struct ixgbe_ring *tx_ring,
struct ixgbe_tx_buffer *first,
- u8 *hdr_len)
+ u8 *hdr_len,
+ struct ixgbe_ipsec_tx_data *itd)
{
u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
struct sk_buff *skb = first->skb;
@@ -7735,6 +7746,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
unsigned char *hdr;
} l4;
u32 paylen, l4_offset;
+ u32 fceof_saidx = 0;
int err;
if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -7760,13 +7772,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
if (ip.v4->version == 4) {
unsigned char *csum_start = skb_checksum_start(skb);
unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+ int len = csum_start - trans_start;
/* IP header will have to cancel out any data that
- * is not a part of the outer IP header
+ * is not a part of the outer IP header, so set to
+ * a reverse csum if needed, else init check to 0.
*/
- ip.v4->check = csum_fold(csum_partial(trans_start,
- csum_start - trans_start,
- 0));
+ ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+ csum_fold(csum_partial(trans_start,
+ len, 0)) : 0;
type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
ip.v4->tot_len = 0;
@@ -7797,12 +7811,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
+ fceof_saidx |= itd->sa_idx;
+ type_tucmd |= itd->flags | itd->trailer_len;
+
/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
vlan_macip_lens = l4.hdr - ip.hdr;
vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
- ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd,
+ ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
mss_l4len_idx);
return 1;
@@ -7864,10 +7881,8 @@ no_csum:
vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
- if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
- fceof_saidx |= itd->sa_idx;
- type_tucmd |= itd->flags | itd->trailer_len;
- }
+ fceof_saidx |= itd->sa_idx;
+ type_tucmd |= itd->flags | itd->trailer_len;
ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
}
@@ -8495,7 +8510,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
goto out_drop;
#endif
- tso = ixgbe_tso(tx_ring, first, &hdr_len);
+ tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
if (tso < 0)
goto out_drop;
else if (!tso)
@@ -9904,15 +9919,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
/* We can only support IPV4 TSO in tunnels if we can mangle the
* inner IP ID field, so strip TSO if MANGLEID is not supported.
+ * IPsec offoad sets skb->encapsulation but still can handle
+ * the TSO, so it's the exception.
*/
- if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
- features &= ~NETIF_F_TSO;
-
-#ifdef CONFIG_XFRM_OFFLOAD
- /* IPsec offload doesn't get along well with others *yet* */
- if (skb->sp)
- features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
+ if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
+#ifdef CONFIG_XFRM
+ if (!skb->sp)
#endif
+ features &= ~NETIF_F_TSO;
+ }
return features;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index f470d0204771..3123267dfba9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1847,9 +1847,9 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
(IXGBE_CS4227_EDC_MODE_SR << 1));
if (setup_linear)
- reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+ reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
else
- reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+ reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
ret_val = hw->phy.ops.write_reg(hw, reg_slice,
IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index e7623fed42da..8e7d6c6f5c92 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
+ Copyright(c) 1999 - 2018 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -82,6 +82,7 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
#define IXGBEVF_QUEUE_STATS_LEN ( \
(((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \
+ ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \
((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \
(sizeof(struct ixgbevf_stats) / sizeof(u64)))
#define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats)
@@ -269,7 +270,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL;
u32 new_rx_count, new_tx_count;
- int i, err = 0;
+ int i, j, err = 0;
if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
return -EINVAL;
@@ -293,15 +294,19 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
if (!netif_running(adapter->netdev)) {
for (i = 0; i < adapter->num_tx_queues; i++)
adapter->tx_ring[i]->count = new_tx_count;
+ for (i = 0; i < adapter->num_xdp_queues; i++)
+ adapter->xdp_ring[i]->count = new_tx_count;
for (i = 0; i < adapter->num_rx_queues; i++)
adapter->rx_ring[i]->count = new_rx_count;
adapter->tx_ring_count = new_tx_count;
+ adapter->xdp_ring_count = new_tx_count;
adapter->rx_ring_count = new_rx_count;
goto clear_reset;
}
if (new_tx_count != adapter->tx_ring_count) {
- tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
+ tx_ring = vmalloc((adapter->num_tx_queues +
+ adapter->num_xdp_queues) * sizeof(*tx_ring));
if (!tx_ring) {
err = -ENOMEM;
goto clear_reset;
@@ -324,6 +329,24 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
goto clear_reset;
}
}
+
+ for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+ /* clone ring and setup updated count */
+ tx_ring[i] = *adapter->xdp_ring[j];
+ tx_ring[i].count = new_tx_count;
+ err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ixgbevf_free_tx_resources(&tx_ring[i]);
+ }
+
+ vfree(tx_ring);
+ tx_ring = NULL;
+
+ goto clear_reset;
+ }
+ }
}
if (new_rx_count != adapter->rx_ring_count) {
@@ -336,8 +359,13 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
for (i = 0; i < adapter->num_rx_queues; i++) {
/* clone ring and setup updated count */
rx_ring[i] = *adapter->rx_ring[i];
+
+ /* Clear copied XDP RX-queue info */
+ memset(&rx_ring[i].xdp_rxq, 0,
+ sizeof(rx_ring[i].xdp_rxq));
+
rx_ring[i].count = new_rx_count;
- err = ixgbevf_setup_rx_resources(&rx_ring[i]);
+ err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]);
if (err) {
while (i) {
i--;
@@ -363,6 +391,12 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
}
adapter->tx_ring_count = new_tx_count;
+ for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+ ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+ *adapter->xdp_ring[j] = tx_ring[i];
+ }
+ adapter->xdp_ring_count = new_tx_count;
+
vfree(tx_ring);
tx_ring = NULL;
}
@@ -385,7 +419,8 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
clear_reset:
/* free Tx resources if Rx error is encountered */
if (tx_ring) {
- for (i = 0; i < adapter->num_tx_queues; i++)
+ for (i = 0;
+ i < adapter->num_tx_queues + adapter->num_xdp_queues; i++)
ixgbevf_free_tx_resources(&tx_ring[i]);
vfree(tx_ring);
}
@@ -457,6 +492,23 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev,
i += 2;
}
+ /* populate XDP queue data */
+ for (j = 0; j < adapter->num_xdp_queues; j++) {
+ ring = adapter->xdp_ring[j];
+ if (!ring) {
+ data[i++] = 0;
+ data[i++] = 0;
+ continue;
+ }
+
+ do {
+ start = u64_stats_fetch_begin_irq(&ring->syncp);
+ data[i] = ring->stats.packets;
+ data[i + 1] = ring->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+ i += 2;
+ }
+
/* populate Rx queue data */
for (j = 0; j < adapter->num_rx_queues; j++) {
ring = adapter->rx_ring[j];
@@ -500,6 +552,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
sprintf(p, "tx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
}
+ for (i = 0; i < adapter->num_xdp_queues; i++) {
+ sprintf(p, "xdp_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "xdp_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ }
for (i = 0; i < adapter->num_rx_queues; i++) {
sprintf(p, "rx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index c06ea4dc49a0..447ce1d5e0e3 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -2,7 +2,7 @@
/*******************************************************************************
Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
+ Copyright(c) 1999 - 2018 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -35,6 +35,7 @@
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
#include "vf.h"
@@ -51,7 +52,11 @@
struct ixgbevf_tx_buffer {
union ixgbe_adv_tx_desc *next_to_watch;
unsigned long time_stamp;
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ /* XDP uses address ptr on irq_clean */
+ void *data;
+ };
unsigned int bytecount;
unsigned short gso_segs;
__be16 protocol;
@@ -94,12 +99,21 @@ enum ixgbevf_ring_state_t {
__IXGBEVF_RX_BUILD_SKB_ENABLED,
__IXGBEVF_TX_DETECT_HANG,
__IXGBEVF_HANG_CHECK_ARMED,
+ __IXGBEVF_TX_XDP_RING,
};
+#define ring_is_xdp(ring) \
+ test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+ set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+ clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+
struct ixgbevf_ring {
struct ixgbevf_ring *next;
struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */
struct net_device *netdev;
+ struct bpf_prog *xdp_prog;
struct device *dev;
void *desc; /* descriptor ring memory */
dma_addr_t dma; /* phys. address of descriptor ring */
@@ -120,7 +134,7 @@ struct ixgbevf_ring {
struct ixgbevf_tx_queue_stats tx_stats;
struct ixgbevf_rx_queue_stats rx_stats;
};
-
+ struct xdp_rxq_info xdp_rxq;
u64 hw_csum_rx_error;
u8 __iomem *tail;
struct sk_buff *skb;
@@ -137,6 +151,7 @@ struct ixgbevf_ring {
#define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES
#define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES
+#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES
#define IXGBEVF_MAX_RSS_QUEUES 2
#define IXGBEVF_82599_RETA_SIZE 128 /* 128 entries */
#define IXGBEVF_X550_VFRETA_SIZE 64 /* 64 entries */
@@ -337,6 +352,10 @@ struct ixgbevf_adapter {
u32 eims_enable_mask;
u32 eims_other;
+ /* XDP */
+ int num_xdp_queues;
+ struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES];
+
/* TX */
int num_tx_queues;
struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
@@ -357,6 +376,7 @@ struct ixgbevf_adapter {
/* OS defined structs */
struct net_device *netdev;
+ struct bpf_prog *xdp_prog;
struct pci_dev *pdev;
/* structs defined in ixgbe_vf.h */
@@ -370,6 +390,7 @@ struct ixgbevf_adapter {
unsigned long state;
u64 tx_busy;
unsigned int tx_ring_count;
+ unsigned int xdp_ring_count;
unsigned int rx_ring_count;
u8 __iomem *io_addr; /* Mainly for iounmap use */
@@ -443,7 +464,8 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter);
void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);
void ixgbevf_reset(struct ixgbevf_adapter *adapter);
void ixgbevf_set_ethtool_ops(struct net_device *netdev);
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *);
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *rx_ring);
int ixgbevf_setup_tx_resources(struct ixgbevf_ring *);
void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4da449e0a4ba..3d9033f26eff 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
+ Copyright(c) 1999 - 2018 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -50,6 +50,9 @@
#include <linux/if_vlan.h>
#include <linux/prefetch.h>
#include <net/mpls.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
#include "ixgbevf.h"
@@ -321,7 +324,10 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
total_packets += tx_buffer->gso_segs;
/* free the skb */
- napi_consume_skb(tx_buffer->skb, napi_budget);
+ if (ring_is_xdp(tx_ring))
+ page_frag_free(tx_buffer->data);
+ else
+ napi_consume_skb(tx_buffer->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -385,7 +391,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
eop_desc = tx_ring->tx_buffer_info[i].next_to_watch;
- pr_err("Detected Tx Unit Hang\n"
+ pr_err("Detected Tx Unit Hang%s\n"
" Tx Queue <%d>\n"
" TDH, TDT <%x>, <%x>\n"
" next_to_use <%x>\n"
@@ -395,6 +401,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
" eop_desc->wb.status <%x>\n"
" time_stamp <%lx>\n"
" jiffies <%lx>\n",
+ ring_is_xdp(tx_ring) ? " XDP" : "",
tx_ring->queue_index,
IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)),
IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)),
@@ -402,7 +409,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
eop_desc, (eop_desc ? eop_desc->wb.status : 0),
tx_ring->tx_buffer_info[i].time_stamp, jiffies);
- netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+ if (!ring_is_xdp(tx_ring))
+ netif_stop_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
/* schedule immediate reset if we believe we hung */
ixgbevf_tx_timeout_reset(adapter);
@@ -410,6 +419,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
return true;
}
+ if (ring_is_xdp(tx_ring))
+ return !!budget;
+
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
(ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
@@ -552,19 +564,21 @@ struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
}
static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
- struct ixgbevf_rx_buffer *rx_buffer)
+ struct ixgbevf_rx_buffer *rx_buffer,
+ struct sk_buff *skb)
{
if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
/* hand second half of page back to the ring */
ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
} else {
- /* We are not reusing the buffer so unmap it and free
- * any references we are holding to it
- */
- dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
- ixgbevf_rx_pg_size(rx_ring),
- DMA_FROM_DEVICE,
- IXGBEVF_RX_DMA_ATTR);
+ if (IS_ERR(skb))
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ ixgbevf_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
}
@@ -737,6 +751,10 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
+ /* XDP packets use error pointer so abort at this point */
+ if (IS_ERR(skb))
+ return true;
+
/* verify that the packet does not have any known errors */
if (unlikely(ixgbevf_test_staterr(rx_desc,
IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@@ -853,23 +871,38 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
static
struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
- union ixgbe_adv_rx_desc *rx_desc,
- unsigned int size)
+ struct xdp_buff *xdp,
+ union ixgbe_adv_rx_desc *rx_desc)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
#else
- unsigned int truesize = SKB_DATA_ALIGN(size);
+ unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
#endif
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
+ prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
+ prefetch(xdp->data + L1_CACHE_BYTES);
#endif
+ /* Note, we get here by enabling legacy-rx via:
+ *
+ * ethtool --set-priv-flags <dev> legacy-rx on
+ *
+ * In this mode, we currently get 0 extra XDP headroom as
+ * opposed to having legacy-rx off, where we process XDP
+ * packets going to stack via ixgbevf_build_skb().
+ *
+ * For ixgbevf_construct_skb() mode it means that the
+ * xdp->data_meta will always point to xdp->data, since
+ * the helper cannot expand the head. Should this ever
+ * changed in future for legacy-rx mode on, then lets also
+ * add xdp->data_meta handling here.
+ */
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
@@ -879,16 +912,18 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IXGBEVF_RX_HDR_SIZE)
- headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
+ headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), xdp->data,
+ ALIGN(headlen, sizeof(long)));
/* update all of the pointers */
size -= headlen;
if (size) {
skb_add_rx_frag(skb, 0, rx_buffer->page,
- (va + headlen) - page_address(rx_buffer->page),
+ (xdp->data + headlen) -
+ page_address(rx_buffer->page),
size, truesize);
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
@@ -912,32 +947,39 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
- union ixgbe_adv_rx_desc *rx_desc,
- unsigned int size)
+ struct xdp_buff *xdp,
+ union ixgbe_adv_rx_desc *rx_desc)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ unsigned int metasize = xdp->data - xdp->data_meta;
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+ SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
#endif
struct sk_buff *skb;
- /* prefetch first cache line of first page */
- prefetch(va);
+ /* Prefetch first cache line of first page. If xdp->data_meta
+ * is unused, this points to xdp->data, otherwise, we likely
+ * have a consumer accessing first few bytes of meta data,
+ * and then actual data.
+ */
+ prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
+ prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
- /* build an skb to around the page buffer */
- skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+ /* build an skb around the page buffer */
+ skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
return NULL;
/* update pointers within the skb to store the data */
- skb_reserve(skb, IXGBEVF_SKB_PAD);
- __skb_put(skb, size);
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
/* update buffer offset */
#if (PAGE_SIZE < 8192)
@@ -948,17 +990,138 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
return skb;
}
+
+#define IXGBEVF_XDP_PASS 0
+#define IXGBEVF_XDP_CONSUMED 1
+#define IXGBEVF_XDP_TX 2
+
+static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
+ struct xdp_buff *xdp)
+{
+ struct ixgbevf_tx_buffer *tx_buffer;
+ union ixgbe_adv_tx_desc *tx_desc;
+ u32 len, cmd_type;
+ dma_addr_t dma;
+ u16 i;
+
+ len = xdp->data_end - xdp->data;
+
+ if (unlikely(!ixgbevf_desc_unused(ring)))
+ return IXGBEVF_XDP_CONSUMED;
+
+ dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(ring->dev, dma))
+ return IXGBEVF_XDP_CONSUMED;
+
+ /* record the location of the first descriptor for this packet */
+ tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
+ tx_buffer->bytecount = len;
+ tx_buffer->gso_segs = 1;
+ tx_buffer->protocol = 0;
+
+ i = ring->next_to_use;
+ tx_desc = IXGBEVF_TX_DESC(ring, i);
+
+ dma_unmap_len_set(tx_buffer, len, len);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
+ tx_buffer->data = xdp->data;
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ /* put descriptor type bits */
+ cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+ IXGBE_ADVTXD_DCMD_DEXT |
+ IXGBE_ADVTXD_DCMD_IFCS;
+ cmd_type |= len | IXGBE_TXD_CMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ tx_desc->read.olinfo_status =
+ cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) |
+ IXGBE_ADVTXD_CC);
+
+ /* Avoid any potential race with cleanup */
+ smp_wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ i++;
+ if (i == ring->count)
+ i = 0;
+
+ tx_buffer->next_to_watch = tx_desc;
+ ring->next_to_use = i;
+
+ return IXGBEVF_XDP_TX;
+}
+
+static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *rx_ring,
+ struct xdp_buff *xdp)
+{
+ int result = IXGBEVF_XDP_PASS;
+ struct ixgbevf_ring *xdp_ring;
+ struct bpf_prog *xdp_prog;
+ u32 act;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+ if (!xdp_prog)
+ goto xdp_out;
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
+ result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping packet */
+ case XDP_DROP:
+ result = IXGBEVF_XDP_CONSUMED;
+ break;
+ }
+xdp_out:
+ rcu_read_unlock();
+ return ERR_PTR(-result);
+}
+
+static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer,
+ unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+
+ rx_buffer->page_offset ^= truesize;
+#else
+ unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+
+ rx_buffer->page_offset += truesize;
+#endif
+}
+
static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct ixgbevf_ring *rx_ring,
int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ struct ixgbevf_adapter *adapter = q_vector->adapter;
u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
struct sk_buff *skb = rx_ring->skb;
+ bool xdp_xmit = false;
+ struct xdp_buff xdp;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < budget)) {
- union ixgbe_adv_rx_desc *rx_desc;
struct ixgbevf_rx_buffer *rx_buffer;
+ union ixgbe_adv_rx_desc *rx_desc;
unsigned int size;
/* return some buffers to hardware, one at a time is too slow */
@@ -981,14 +1144,36 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
/* retrieve a buffer from the ring */
- if (skb)
+ if (!skb) {
+ xdp.data = page_address(rx_buffer->page) +
+ rx_buffer->page_offset;
+ xdp.data_meta = xdp.data;
+ xdp.data_hard_start = xdp.data -
+ ixgbevf_rx_offset(rx_ring);
+ xdp.data_end = xdp.data + size;
+
+ skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
+ }
+
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+ xdp_xmit = true;
+ ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
+ size);
+ } else {
+ rx_buffer->pagecnt_bias++;
+ }
+ total_rx_packets++;
+ total_rx_bytes += size;
+ } else if (skb) {
ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
- else if (ring_uses_build_skb(rx_ring))
+ } else if (ring_uses_build_skb(rx_ring)) {
skb = ixgbevf_build_skb(rx_ring, rx_buffer,
- rx_desc, size);
- else
+ &xdp, rx_desc);
+ } else {
skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
- rx_desc, size);
+ &xdp, rx_desc);
+ }
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -997,7 +1182,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
break;
}
- ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
+ ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb);
cleaned_count++;
/* fetch next buffer in frame if non-eop */
@@ -1039,6 +1224,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
+ if (xdp_xmit) {
+ struct ixgbevf_ring *xdp_ring =
+ adapter->xdp_ring[rx_ring->queue_index];
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch.
+ */
+ wmb();
+ ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use);
+ }
+
u64_stats_update