summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-05-25 13:20:47 +0200
committerPaolo Abeni <pabeni@redhat.com>2023-05-25 13:20:48 +0200
commite8f8b3323039e4c60c626ce853eca7b36b0aaa8f (patch)
treed7f96fb0f8de6cadd9f3d429fe8425f9caf6987c
parentae4899bb486f73e6d3d55a82b40482b2c5529a98 (diff)
parent7016eb738651ed1dfeef2bbf266bc7dac734067d (diff)
downloadlinux-e8f8b3323039e4c60c626ce853eca7b36b0aaa8f.tar.gz
linux-e8f8b3323039e4c60c626ce853eca7b36b0aaa8f.tar.bz2
linux-e8f8b3323039e4c60c626ce853eca7b36b0aaa8f.zip
Merge branch 'net-tcp-make-txhash-use-consistent-for-ipv4'
Antoine Tenart says: ==================== net: tcp: make txhash use consistent for IPv4 Series is divided in two parts. First two commits make the txhash (used for the skb hash in TCP) to be consistent for all IPv4/TCP packets (IPv6 doesn't have the same issue). Last commit improve a hash-related doc. One example is when using OvS with dp_hash, which uses skb->hash, to select a path. We'd like packets from the same flow to be consistent, as well as the hash being stable over time when using net.core.txrehash=0. Same applies for kernel ECMP which also can use skb->hash. ==================== Link: https://lore.kernel.org/r/20230523161453.196094-1-atenart@kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--Documentation/admin-guide/sysctl/net.rst4
-rw-r--r--include/net/ip.h2
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_minisocks.c2
5 files changed, 16 insertions, 10 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 466c560b0c30..4877563241f3 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -386,8 +386,8 @@ Default : 0 (for compatibility reasons)
txrehash
--------
-Controls default hash rethink behaviour on listening socket when SO_TXREHASH
-option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
+Controls default hash rethink behaviour on socket when SO_TXREHASH option is set
+to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
If set to 1 (default), hash rethink is performed on listening socket.
If set to 0, hash rethink is not performed.
diff --git a/include/net/ip.h b/include/net/ip.h
index 8a3860a916dc..2982dd13cf16 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -286,7 +286,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
- unsigned int len, u64 transmit_time);
+ unsigned int len, u64 transmit_time, u32 txhash);
#define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field)
#define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 553c740a6bfb..244fb9365d87 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1570,7 +1570,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
- unsigned int len, u64 transmit_time)
+ unsigned int len, u64 transmit_time, u32 txhash)
{
struct ip_options_data replyopts;
struct ipcm_cookie ipc;
@@ -1633,6 +1633,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
nskb->mono_delivery_time = !!transmit_time;
+ if (txhash)
+ skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
ip_push_pending_frames(sk, &fl4);
}
out:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index af043f063a73..a50bd782f91f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
u64 transmit_time = 0;
struct sock *ctl_sk;
struct net *net;
+ u32 txhash = 0;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
xfrm_sk_clone_policy(ctl_sk, sk);
+ txhash = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_txhash : sk->sk_txhash;
} else {
ctl_sk->sk_mark = 0;
ctl_sk->sk_priority = 0;
@@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
- transmit_time);
+ transmit_time, txhash);
xfrm_sk_free_policy(ctl_sk);
sock_net_set(ctl_sk, &init_net);
@@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
- int reply_flags, u8 tos)
+ int reply_flags, u8 tos, u32 txhash)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
- transmit_time);
+ transmit_time, txhash);
sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
- tw->tw_tos
+ tw->tw_tos,
+ tw->tw_txhash
);
inet_twsk_put(tw);
@@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
0,
tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
- ip_hdr(skb)->tos);
+ ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
}
/*
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dac0d62120e6..04fc328727e6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -303,6 +303,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_offset = tp->tsoffset;
tcptw->tw_last_oow_ack_time = 0;
tcptw->tw_tx_delay = tp->tcp_tx_delay;
+ tw->tw_txhash = sk->sk_txhash;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -311,7 +312,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
- tw->tw_txhash = sk->sk_txhash;
tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif