diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2018-05-24 18:36:16 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2018-05-24 18:36:16 -0700 |
| commit | 10f678683e4026e43524b0492068a371d00fdeed (patch) | |
| tree | 069715fbcf7b0f4b73103861fd0a111e143b5705 /include | |
| parent | f80acbd233382619f597f785f8c238084dc62e21 (diff) | |
| parent | a570e48fee1bc26f47aba2e1493f96a03bed3c8f (diff) | |
| download | linux-10f678683e4026e43524b0492068a371d00fdeed.tar.gz linux-10f678683e4026e43524b0492068a371d00fdeed.tar.bz2 linux-10f678683e4026e43524b0492068a371d00fdeed.zip | |
Merge branch 'xdp_xmit-bulking'
Jesper Dangaard Brouer says:
====================
This patchset change ndo_xdp_xmit API to take a bulk of xdp frames.
When kernel is compiled with CONFIG_RETPOLINE, every indirect function
pointer (branch) call hurts performance. For XDP this have a huge
negative performance impact.
This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but
also prepares for further optimizations. The DMA APIs use of indirect
function pointer calls is the primary source the regression. It is
left for a followup patchset, to use bulking calls towards the DMA API
(via the scatter-gatter calls).
The other advantage of this API change is that drivers can easier
amortize the cost of any sync/locking scheme, over the bulk of
packets. The assumption of the current API is that the driver
implemementing the NDO will also allocate a dedicated XDP TX queue for
every CPU in the system. Which is not always possible or practical to
configure. E.g. ixgbe cannot load an XDP program on a machine with
more than 96 CPUs, due to limited hardware TX queues. E.g. virtio_net
is hard to configure as it requires manually increasing the
queues. E.g. tun driver chooses to use a per XDP frame producer lock
modulo smp_processor_id over avail queues.
I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of
this patchset. This will be a followup patchset, once we know if this
will be needed (e.g. for non-map xdp_redirect flush-flag, and if
AF_XDP chooses to use ndo_xdp_xmit for TX).
---
V5: Fixed up issues spotted by Daniel and John
V4: Splitout the patches from 4 to 8 patches. I cannot split the
driver changes from the NDO change, but I've tried to isolated the NDO
change together with the driver change as much as possible.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/bpf.h | 18 | ||||
| -rw-r--r-- | include/linux/netdevice.h | 14 | ||||
| -rw-r--r-- | include/net/page_pool.h | 5 | ||||
| -rw-r--r-- | include/net/xdp.h | 1 | ||||
| -rw-r--r-- | include/trace/events/xdp.h | 50 |
5 files changed, 77 insertions, 11 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1795eeee846c..bbe297436e5d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -487,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ -struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +struct xdp_buff; + +struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_flush(struct bpf_map *map); -struct xdp_buff; int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -573,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map) { } +struct xdp_buff; +struct bpf_dtab_netdev; + +static inline +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx) +{ + return 0; +} + static inline struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) { @@ -587,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map) { } -struct xdp_buff; static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03ed492c4e14..debdb6286170 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1185,9 +1185,13 @@ struct dev_ifalias { * This function is used to set or query state related to XDP on the * netdevice and manage BPF offload. See definition of * enum bpf_netdev_command for details. - * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp); - * This function is used to submit a XDP packet for transmit on a - * netdevice. + * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp); + * This function is used to submit @n XDP packets for transmit on a + * netdevice. Returns number of frames successfully transmitted, frames + * that got dropped are freed/returned via xdp_return_frame(). + * Returns negative number, means general error invoking ndo, meaning + * no frames were xmit'ed and core-caller will free all frames. + * TODO: Consider add flag to allow sending flush operation. * void (*ndo_xdp_flush)(struct net_device *dev); * This function is used to inform the driver to flush a particular * xdp tx queue. Must be called on same CPU as xdp_xmit. @@ -1375,8 +1379,8 @@ struct net_device_ops { int needed_headroom); int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); - int (*ndo_xdp_xmit)(struct net_device *dev, - struct xdp_frame *xdp); + int (*ndo_xdp_xmit)(struct net_device *dev, int n, + struct xdp_frame **xdp); void (*ndo_xdp_flush)(struct net_device *dev); }; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index c79087153148..694d055e01ef 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool); void __page_pool_put_page(struct page_pool *pool, struct page *page, bool allow_direct); -static inline void page_pool_put_page(struct page_pool *pool, struct page *page) +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - __page_pool_put_page(pool, page, false); + __page_pool_put_page(pool, page, allow_direct); #endif } /* Very limited use-cases allow recycle direct */ diff --git a/include/net/xdp.h b/include/net/xdp.h index 0b689cf561c7..7ad779237ae8 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) } void xdp_return_frame(struct xdp_frame *xdpf); +void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 8989a92c571a..1ecf4c67fcf7 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, __entry->map_id, __entry->map_index) ); +#ifndef __DEVMAP_OBJ_TYPE +#define __DEVMAP_OBJ_TYPE +struct _bpf_dtab_netdev { + struct net_device *dev; +}; +#endif /* __DEVMAP_OBJ_TYPE */ + #define devmap_ifindex(fwd, map) \ (!fwd ? 0 : \ (!map ? 0 : \ ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ - ((struct net_device *)fwd)->ifindex : 0))) + ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ @@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue, __entry->to_cpu) ); +TRACE_EVENT(xdp_devmap_xmit, + + TP_PROTO(const struct bpf_map *map, u32 map_index, + int sent, int drops, + const struct net_device *from_dev, + const struct net_device *to_dev, int err), + + TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err), + + TP_STRUCT__entry( + __field(int, map_id) + __field(u32, act) + __field(u32, map_index) + __field(int, drops) + __field(int, sent) + __field(int, from_ifindex) + __field(int, to_ifindex) + __field(int, err) + ), + + TP_fast_assign( + __entry->map_id = map->id; + __entry->act = XDP_REDIRECT; + __entry->map_index = map_index; + __entry->drops = drops; + __entry->sent = sent; + __entry->from_ifindex = from_dev->ifindex; + __entry->to_ifindex = to_dev->ifindex; + __entry->err = err; + ), + + TP_printk("ndo_xdp_xmit" + " map_id=%d map_index=%d action=%s" + " sent=%d drops=%d" + " from_ifindex=%d to_ifindex=%d err=%d", + __entry->map_id, __entry->map_index, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->sent, __entry->drops, + __entry->from_ifindex, __entry->to_ifindex, __entry->err) +); + #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h> |
