diff options
author | Zijian Zhang <zijianzhang@bytedance.com> | 2024-12-10 01:20:39 +0000 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-01-02 10:30:48 +0100 |
commit | be848bde4a7ffa75a339bcead9265bc1b74b026a (patch) | |
tree | ac68b86f32f2ddbbdd72981936026877aa8b933c | |
parent | 2a572e308f67ecf8d0234ead4a2728c3ad176591 (diff) | |
download | linux-be848bde4a7ffa75a339bcead9265bc1b74b026a.tar.gz linux-be848bde4a7ffa75a339bcead9265bc1b74b026a.tar.bz2 linux-be848bde4a7ffa75a339bcead9265bc1b74b026a.zip |
tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress redirection
[ Upstream commit d888b7af7c149c115dd6ac772cc11c375da3e17c ]
When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will
be created out of the skb, and the rmem accounting of the sk_msg will be
handled by the skb.
For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting
to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to
the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result,
except for the global memory limit, the rmem of sk_redir is nearly
unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer.
Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are
used in these two paths. We use "msg->skb" to test whether the sk_msg is
skb backed up. If it's not, we shall do the memory accounting explicitly.
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r-- | include/linux/skmsg.h | 11 | ||||
-rw-r--r-- | net/core/skmsg.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_bpf.c | 4 |
3 files changed, 16 insertions, 5 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 062fe440f5d0..6ccfd9236387 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -308,17 +308,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } -static inline void sk_psock_queue_msg(struct sk_psock *psock, +static inline bool sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { + bool ret; + spin_lock_bh(&psock->ingress_lock); - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { list_add_tail(&msg->list, &psock->ingress_msg); - else { + ret = true; + } else { sk_msg_free(psock->sk, msg); kfree(msg); + ret = false; } spin_unlock_bh(&psock->ingress_lock); + return ret; } static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 584516387fda..65764952bc68 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -444,8 +444,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (likely(!peek)) { sge->offset += copy; sge->length -= copy; - if (!msg_rx->skb) + if (!msg_rx->skb) { sk_mem_uncharge(sk, copy); + atomic_sub(copy, &sk->sk_rmem_alloc); + } msg_rx->sg.size -= copy; if (!sge->length) { @@ -771,6 +773,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { list_del(&msg->list); + if (!msg->skb) + atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); sk_msg_free(psock->sk, msg); kfree(msg); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index cd999c55da99..a8db010e9e61 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, } sk_mem_charge(sk, size); + atomic_add(size, &sk->sk_rmem_alloc); sk_msg_xfer(tmp, msg, i, size); copied += size; if (sge->length) @@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, if (!ret) { msg->sg.start = i; - sk_psock_queue_msg(psock, tmp); + if (!sk_psock_queue_msg(psock, tmp)) + atomic_sub(copied, &sk->sk_rmem_alloc); sk_psock_data_ready(sk, psock); } else { sk_msg_free(sk, tmp); |