summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-05-24 18:36:16 -0700
committerAlexei Starovoitov <ast@kernel.org>2018-05-24 18:36:16 -0700
commit10f678683e4026e43524b0492068a371d00fdeed (patch)
tree069715fbcf7b0f4b73103861fd0a111e143b5705 /include
parentf80acbd233382619f597f785f8c238084dc62e21 (diff)
parenta570e48fee1bc26f47aba2e1493f96a03bed3c8f (diff)
downloadlinux-10f678683e4026e43524b0492068a371d00fdeed.tar.gz
linux-10f678683e4026e43524b0492068a371d00fdeed.tar.bz2
linux-10f678683e4026e43524b0492068a371d00fdeed.zip
Merge branch 'xdp_xmit-bulking'
Jesper Dangaard Brouer says: ==================== This patchset change ndo_xdp_xmit API to take a bulk of xdp frames. When kernel is compiled with CONFIG_RETPOLINE, every indirect function pointer (branch) call hurts performance. For XDP this have a huge negative performance impact. This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but also prepares for further optimizations. The DMA APIs use of indirect function pointer calls is the primary source the regression. It is left for a followup patchset, to use bulking calls towards the DMA API (via the scatter-gatter calls). The other advantage of this API change is that drivers can easier amortize the cost of any sync/locking scheme, over the bulk of packets. The assumption of the current API is that the driver implemementing the NDO will also allocate a dedicated XDP TX queue for every CPU in the system. Which is not always possible or practical to configure. E.g. ixgbe cannot load an XDP program on a machine with more than 96 CPUs, due to limited hardware TX queues. E.g. virtio_net is hard to configure as it requires manually increasing the queues. E.g. tun driver chooses to use a per XDP frame producer lock modulo smp_processor_id over avail queues. I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of this patchset. This will be a followup patchset, once we know if this will be needed (e.g. for non-map xdp_redirect flush-flag, and if AF_XDP chooses to use ndo_xdp_xmit for TX). --- V5: Fixed up issues spotted by Daniel and John V4: Splitout the patches from 4 to 8 patches. I cannot split the driver changes from the NDO change, but I've tried to isolated the NDO change together with the driver change as much as possible. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h18
-rw-r--r--include/linux/netdevice.h14
-rw-r--r--include/net/page_pool.h5
-rw-r--r--include/net/xdp.h1
-rw-r--r--include/trace/events/xdp.h50
5 files changed, 77 insertions, 11 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1795eeee846c..bbe297436e5d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -487,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
-struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct xdp_buff;
+
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map);
-struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -573,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{
}
+struct xdp_buff;
+struct bpf_dtab_netdev;
+
+static inline
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx)
+{
+ return 0;
+}
+
static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
@@ -587,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{
}
-struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp,
struct net_device *dev_rx)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 03ed492c4e14..debdb6286170 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
- * This function is used to submit a XDP packet for transmit on a
- * netdevice.
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
+ * This function is used to submit @n XDP packets for transmit on a
+ * netdevice. Returns number of frames successfully transmitted, frames
+ * that got dropped are freed/returned via xdp_return_frame().
+ * Returns negative number, means general error invoking ndo, meaning
+ * no frames were xmit'ed and core-caller will free all frames.
+ * TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom);
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
- int (*ndo_xdp_xmit)(struct net_device *dev,
- struct xdp_frame *xdp);
+ int (*ndo_xdp_xmit)(struct net_device *dev, int n,
+ struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev);
};
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c79087153148..694d055e01ef 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct);
-static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
+static inline void page_pool_put_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- __page_pool_put_page(pool, page, false);
+ __page_pool_put_page(pool, page, allow_direct);
#endif
}
/* Very limited use-cases allow recycle direct */
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 0b689cf561c7..7ad779237ae8 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
}
void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 8989a92c571a..1ecf4c67fcf7 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index)
);
+#ifndef __DEVMAP_OBJ_TYPE
+#define __DEVMAP_OBJ_TYPE
+struct _bpf_dtab_netdev {
+ struct net_device *dev;
+};
+#endif /* __DEVMAP_OBJ_TYPE */
+
#define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \
(!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
- ((struct net_device *)fwd)->ifindex : 0)))
+ ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
__entry->to_cpu)
);
+TRACE_EVENT(xdp_devmap_xmit,
+
+ TP_PROTO(const struct bpf_map *map, u32 map_index,
+ int sent, int drops,
+ const struct net_device *from_dev,
+ const struct net_device *to_dev, int err),
+
+ TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(u32, map_index)
+ __field(int, drops)
+ __field(int, sent)
+ __field(int, from_ifindex)
+ __field(int, to_ifindex)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map->id;
+ __entry->act = XDP_REDIRECT;
+ __entry->map_index = map_index;
+ __entry->drops = drops;
+ __entry->sent = sent;
+ __entry->from_ifindex = from_dev->ifindex;
+ __entry->to_ifindex = to_dev->ifindex;
+ __entry->err = err;
+ ),
+
+ TP_printk("ndo_xdp_xmit"
+ " map_id=%d map_index=%d action=%s"
+ " sent=%d drops=%d"
+ " from_ifindex=%d to_ifindex=%d err=%d",
+ __entry->map_id, __entry->map_index,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->sent, __entry->drops,
+ __entry->from_ifindex, __entry->to_ifindex, __entry->err)
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>