diff options
author | Jiayuan Chen <mrpre@163.com> | 2025-01-22 18:09:14 +0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-02-27 04:10:50 -0800 |
commit | 05a571ee23c006b59a924c9977acb8c93ac8a3a1 (patch) | |
tree | 5a970307fd694495926507fa734e97d202f63d4e /include | |
parent | a26f95b6e317acae6f85e8d9cf48c9d87aaccc82 (diff) | |
download | linux-05a571ee23c006b59a924c9977acb8c93ac8a3a1.tar.gz linux-05a571ee23c006b59a924c9977acb8c93ac8a3a1.tar.bz2 linux-05a571ee23c006b59a924c9977acb8c93ac8a3a1.zip |
bpf: Fix wrong copied_seq calculation
[ Upstream commit 36b62df5683c315ba58c950f1a9c771c796c30ec ]
'sk->copied_seq' was updated in the tcp_eat_skb() function when the action
of a BPF program was SK_REDIRECT. For other actions, like SK_PASS, the
update logic for 'sk->copied_seq' was moved to tcp_bpf_recvmsg_parser()
to ensure the accuracy of the 'fionread' feature.
It works for a single stream_verdict scenario, as it also modified
sk_data_ready->sk_psock_verdict_data_ready->tcp_read_skb
to remove updating 'sk->copied_seq'.
However, for programs where both stream_parser and stream_verdict are
active (strparser purpose), tcp_read_sock() was used instead of
tcp_read_skb() (sk_data_ready->strp_data_ready->tcp_read_sock).
tcp_read_sock() now still updates 'sk->copied_seq', leading to duplicate
updates.
In summary, for strparser + SK_PASS, copied_seq is redundantly calculated
in both tcp_read_sock() and tcp_bpf_recvmsg_parser().
The issue causes incorrect copied_seq calculations, which prevent
correct data reads from the recv() interface in user-land.
We do not want to add new proto_ops to implement a new version of
tcp_read_sock, as this would introduce code complexity [1].
We could have added noack and copied_seq to desc, and then called
ops->read_sock. However, unfortunately, other modules didn’t fully
initialize desc to zero. So, for now, we are directly calling
tcp_read_sock_noack() in tcp_bpf.c.
[1]: https://lore.kernel.org/bpf/20241218053408.437295-1-mrpre@163.com
Fixes: e5c6de5fa025 ("bpf, sockmap: Incorrectly handling copied_seq")
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Jiayuan Chen <mrpre@163.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://patch.msgid.link/20250122100917.49845-3-mrpre@163.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/skmsg.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 8 |
2 files changed, 10 insertions, 0 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 6ccfd9236387..32bbebf5b71e 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -87,6 +87,8 @@ struct sk_psock { struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; + u32 copied_seq; + u32 ingress_bytes; #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; diff --git a/include/net/tcp.h b/include/net/tcp.h index 78c755414fa8..a6def0aab3ed 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -690,6 +690,9 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); void tcp_read_done(struct sock *sk, size_t len); @@ -2404,6 +2407,11 @@ struct sk_psock; #ifdef CONFIG_BPF_SYSCALL int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#ifdef CONFIG_BPF_STREAM_PARSER +struct strparser; +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); +#endif /* CONFIG_BPF_STREAM_PARSER */ #endif /* CONFIG_BPF_SYSCALL */ #ifdef CONFIG_INET |