summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJiayuan Chen <mrpre@163.com>2025-01-22 18:09:14 +0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-02-27 04:10:50 -0800
commit05a571ee23c006b59a924c9977acb8c93ac8a3a1 (patch)
tree5a970307fd694495926507fa734e97d202f63d4e /include
parenta26f95b6e317acae6f85e8d9cf48c9d87aaccc82 (diff)
downloadlinux-05a571ee23c006b59a924c9977acb8c93ac8a3a1.tar.gz
linux-05a571ee23c006b59a924c9977acb8c93ac8a3a1.tar.bz2
linux-05a571ee23c006b59a924c9977acb8c93ac8a3a1.zip
bpf: Fix wrong copied_seq calculation
[ Upstream commit 36b62df5683c315ba58c950f1a9c771c796c30ec ] 'sk->copied_seq' was updated in the tcp_eat_skb() function when the action of a BPF program was SK_REDIRECT. For other actions, like SK_PASS, the update logic for 'sk->copied_seq' was moved to tcp_bpf_recvmsg_parser() to ensure the accuracy of the 'fionread' feature. It works for a single stream_verdict scenario, as it also modified sk_data_ready->sk_psock_verdict_data_ready->tcp_read_skb to remove updating 'sk->copied_seq'. However, for programs where both stream_parser and stream_verdict are active (strparser purpose), tcp_read_sock() was used instead of tcp_read_skb() (sk_data_ready->strp_data_ready->tcp_read_sock). tcp_read_sock() now still updates 'sk->copied_seq', leading to duplicate updates. In summary, for strparser + SK_PASS, copied_seq is redundantly calculated in both tcp_read_sock() and tcp_bpf_recvmsg_parser(). The issue causes incorrect copied_seq calculations, which prevent correct data reads from the recv() interface in user-land. We do not want to add new proto_ops to implement a new version of tcp_read_sock, as this would introduce code complexity [1]. We could have added noack and copied_seq to desc, and then called ops->read_sock. However, unfortunately, other modules didn’t fully initialize desc to zero. So, for now, we are directly calling tcp_read_sock_noack() in tcp_bpf.c. [1]: https://lore.kernel.org/bpf/20241218053408.437295-1-mrpre@163.com Fixes: e5c6de5fa025 ("bpf, sockmap: Incorrectly handling copied_seq") Suggested-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Jiayuan Chen <mrpre@163.com> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com> Acked-by: John Fastabend <john.fastabend@gmail.com> Link: https://patch.msgid.link/20250122100917.49845-3-mrpre@163.com Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skmsg.h2
-rw-r--r--include/net/tcp.h8
2 files changed, 10 insertions, 0 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 6ccfd9236387..32bbebf5b71e 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -87,6 +87,8 @@ struct sk_psock {
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
struct strparser strp;
+ u32 copied_seq;
+ u32 ingress_bytes;
#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 78c755414fa8..a6def0aab3ed 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -690,6 +690,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
void tcp_read_done(struct sock *sk, size_t len);
@@ -2404,6 +2407,11 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#ifdef CONFIG_BPF_STREAM_PARSER
+struct strparser;
+int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
+#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET