From 89527be8d8d672773eeaec910118a6e84fb597e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:33:56 -0700 Subject: net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes New netlink attributes IFLA_TSO_MAX_SIZE and IFLA_TSO_MAX_SEGS are used to report to user-space the device TSO limits. ip -d link sh dev eth1 ... tso_max_size 65536 tso_max_segs 65535 Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d1e600816b82..5f58dcfe2787 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -368,6 +368,8 @@ enum { IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, __IFLA_MAX }; -- cgit v1.2.3 From 7c4e983c4f3cf94fcd879730c6caa877e0768a4d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 13 May 2022 11:33:57 -0700 Subject: net: allow gso_max_size to exceed 65536 The code for gso_max_size was added originally to allow for debugging and workaround of buggy devices that couldn't support TSO with blocks 64K in size. The original reason for limiting it to 64K was because that was the existing limits of IPv4 and non-jumbogram IPv6 length fields. With the addition of Big TCP we can remove this limit and allow the value to potentially go up to UINT_MAX and instead be limited by the tso_max_size value. So in order to support this we need to go through and clean up the remaining users of the gso_max_size value so that the values will cap at 64K for non-TCPv6 flows. In addition we can clean up the GSO_MAX_SIZE value so that 64K becomes GSO_LEGACY_MAX_SIZE and UINT_MAX will now be the upper limit for GSO_MAX_SIZE. v6: (edumazet) fixed a compile error if CONFIG_IPV6=n, in a new sk_trim_gso_size() helper. netif_set_tso_max_size() caps the requested TSO size with GSO_MAX_SIZE. Signed-off-by: Alexander Duyck Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 536321691c72..ce780e352f43 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2272,7 +2272,9 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; /* for setting kernel sock attribute on TCP connection setup */ -#define GSO_MAX_SIZE 65536 +#define GSO_LEGACY_MAX_SIZE 65536u +#define GSO_MAX_SIZE UINT_MAX + unsigned int gso_max_size; #define TSO_LEGACY_MAX_SIZE 65536 #define TSO_MAX_SIZE UINT_MAX -- cgit v1.2.3 From 34b92e8d19da1e44070dc0ecc2747871469ac534 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:33:58 -0700 Subject: net: limit GSO_MAX_SIZE to 524280 bytes Make sure we will not overflow shinfo->gso_segs Minimal TCP MSS size is 8 bytes, and shinfo->gso_segs is a 16bit field. TCP_MIN_GSO_SIZE is currently defined in include/net/tcp.h, it seems cleaner to not bring tcp details into include/linux/netdevice.h Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce780e352f43..fd38847d0dc7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2272,14 +2272,17 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; /* for setting kernel sock attribute on TCP connection setup */ +#define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u -#define GSO_MAX_SIZE UINT_MAX +/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), + * and shinfo->gso_segs is a 16bit field. + */ +#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) unsigned int gso_max_size; #define TSO_LEGACY_MAX_SIZE 65536 #define TSO_MAX_SIZE UINT_MAX unsigned int tso_max_size; -#define GSO_MAX_SEGS 65535 u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; -- cgit v1.2.3 From 7c96d8ec96bb71aac54c9f872aaa65d7411ab864 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:34:00 -0700 Subject: ipv6: add struct hop_jumbo_hdr definition Following patches will need to add and remove local IPv6 jumbogram options to enable BIG TCP. Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/net/ipv6.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 213612f1680c..63d019953c47 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -151,6 +151,17 @@ struct frag_hdr { __be32 identification; }; +/* + * Jumbo payload option, as described in RFC 2675 2. + */ +struct hop_jumbo_hdr { + u8 nexthdr; + u8 hdrlen; + u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ + u8 tlv_len; /* 4 */ + __be32 jumbo_payload_len; +}; + #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 -- cgit v1.2.3 From 09f3d1a3a52c696208008618a67e2c7c3fb16d41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:34:01 -0700 Subject: ipv6/gso: remove temporary HBH/jumbo header ipv6 tcp and gro stacks will soon be able to build big TCP packets, with an added temporary Hop By Hop header. If GSO is involved for these large packets, we need to remove the temporary HBH header before segmentation happens. v2: perform HBH removal from ipv6_gso_segment() instead of skb_segment() (Alexander feedback) Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/net/ipv6.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 63d019953c47..b6df0314aa02 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -467,6 +467,39 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); +/* This helper is specialized for BIG TCP needs. + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. + * It assumes headers are already in skb->head. + * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there. + */ +static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) +{ + const struct hop_jumbo_hdr *jhdr; + const struct ipv6hdr *nhdr; + + if (likely(skb->len <= GRO_MAX_SIZE)) + return 0; + + if (skb->protocol != htons(ETH_P_IPV6)) + return 0; + + if (skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) + return 0; + + nhdr = ipv6_hdr(skb); + + if (nhdr->nexthdr != NEXTHDR_HOP) + return 0; + + jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || + jhdr->nexthdr != IPPROTO_TCP) + return 0; + return jhdr->nexthdr; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special -- cgit v1.2.3 From 0fe79f28bfaf73b66b7b1562d2468f94aa03bd12 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 13 May 2022 11:34:03 -0700 Subject: net: allow gro_max_size to exceed 65536 Allow the gro_max_size to exceed a value larger than 65536. There weren't really any external limitations that prevented this other than the fact that IPv4 only supports a 16 bit length field. Since we have the option of adding a hop-by-hop header for IPv6 we can allow IPv6 to exceed this value and for IPv4 and non-TCP flows we can cap things at 65536 via a constant rather than relying on gro_max_size. [edumazet] limit GRO_MAX_SIZE to (8 * 65535) to avoid overflows. Signed-off-by: Alexander Duyck Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++++- include/net/ipv6.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fd38847d0dc7..d57ce248004c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2161,7 +2161,11 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; int napi_defer_hard_irqs; -#define GRO_MAX_SIZE 65536 +#define GRO_LEGACY_MAX_SIZE 65536u +/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), + * and shinfo->gso_segs is a 16bit field. + */ +#define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b6df0314aa02..5b38bf1a586b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -477,7 +477,7 @@ static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) const struct hop_jumbo_hdr *jhdr; const struct ipv6hdr *nhdr; - if (likely(skb->len <= GRO_MAX_SIZE)) + if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) return 0; if (skb->protocol != htons(ETH_P_IPV6)) -- cgit v1.2.3 From 80e425b613421911f89664663a7060216abcaed2 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Fri, 13 May 2022 11:34:04 -0700 Subject: ipv6: Add hop-by-hop header to jumbograms in ip6_output Instead of simply forcing a 0 payload_len in IPv6 header, implement RFC 2675 and insert a custom extension header. Note that only TCP stack is currently potentially generating jumbograms, and that this extension header is purely local, it wont be sent on a physical link. This is needed so that packet capture (tcpdump and friends) can properly dissect these large packets. Signed-off-by: Coco Li Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ec5ca392eaa3..38c8203d52cb 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -145,6 +145,7 @@ struct inet6_skb_parm { #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 +#define IP6SKB_FAKEJUMBO 512 }; #if defined(CONFIG_NET_L3_MASTER_DEV) -- cgit v1.2.3