From 0ead60804b64f5bd6999eec88e503c6a1a242d41 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Oct 2024 13:46:21 -0400 Subject: sctp: properly validate chunk size in sctp_sf_ootb() A size validation fix similar to that in Commit 50619dbf8db7 ("sctp: add size validation when walking chunks") is also required in sctp_sf_ootb() to address a crash reported by syzbot: BUG: KMSAN: uninit-value in sctp_sf_ootb+0x7f5/0xce0 net/sctp/sm_statefuns.c:3712 sctp_sf_ootb+0x7f5/0xce0 net/sctp/sm_statefuns.c:3712 sctp_do_sm+0x181/0x93d0 net/sctp/sm_sideeffect.c:1166 sctp_endpoint_bh_rcv+0xc38/0xf90 net/sctp/endpointola.c:407 sctp_inq_push+0x2ef/0x380 net/sctp/inqueue.c:88 sctp_rcv+0x3831/0x3b20 net/sctp/input.c:243 sctp4_rcv+0x42/0x50 net/sctp/protocol.c:1159 ip_protocol_deliver_rcu+0xb51/0x13d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x336/0x500 net/ipv4/ip_input.c:233 Reported-by: syzbot+f0cbb34d39392f2746ca@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Link: https://patch.msgid.link/a29ebb6d8b9f8affd0f9abb296faafafe10c17d8.1730223981.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7d315a18612b..a0524ba8d787 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3751,7 +3751,7 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, } ch = (struct sctp_chunkhdr *)ch_end; - } while (ch_end < skb_tail_pointer(skb)); + } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb)); if (ootb_shut_ack) return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands); -- cgit v1.2.3 From cfbbd4859882a5469f6f4945937a074ee78c4b46 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 4 Nov 2024 13:31:41 +0100 Subject: mptcp: no admin perm to list endpoints During the switch to YNL, the command to list all endpoints has been accidentally restricted to users with admin permissions. It looks like there are no reasons to have this restriction which makes it harder for a user to quickly check if the endpoint list has been correctly populated by an automated tool. Best to go back to the previous behaviour then. mptcp_pm_gen.c has been modified using ynl-gen-c.py: $ ./tools/net/ynl/ynl-gen-c.py --mode kernel \ --spec Documentation/netlink/specs/mptcp_pm.yaml --source \ -o net/mptcp/mptcp_pm_gen.c The header file doesn't need to be regenerated. Fixes: 1d0507f46843 ("net: mptcp: convert netlink from small_ops to ops") Cc: stable@vger.kernel.org Reviewed-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241104-net-mptcp-misc-6-12-v1-1-c13f2ff1656f@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/mptcp_pm_gen.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index c30a2a90a192..bfb37c5a88c4 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -112,7 +112,6 @@ const struct genl_ops mptcp_pm_nl_ops[11] = { .dumpit = mptcp_pm_nl_get_addr_dumpit, .policy = mptcp_pm_get_addr_nl_policy, .maxattr = MPTCP_PM_ATTR_TOKEN, - .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, -- cgit v1.2.3 From 99635c91fb8b860a6404b9bc8b769df7bdaa2ae3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 4 Nov 2024 13:31:42 +0100 Subject: mptcp: use sock_kfree_s instead of kfree The local address entries on userspace_pm_local_addr_list are allocated by sock_kmalloc(). It's then required to use sock_kfree_s() instead of kfree() to free these entries in order to adjust the allocated size on the sk side. Fixes: 24430f8bf516 ("mptcp: add address into userspace pm list") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241104-net-mptcp-misc-6-12-v1-2-c13f2ff1656f@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2cceded3a83a..56dfea9862b7 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -91,6 +91,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *addr) { struct mptcp_pm_addr_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { @@ -98,7 +99,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, * be used multiple times (e.g. fullmesh mode). */ list_del_rcu(&entry->list); - kfree(entry); + sock_kfree_s(sk, entry, sizeof(*entry)); msk->pm.local_addr_used--; return 0; } -- cgit v1.2.3 From c03d278fdf35e73dd0ec543b9b556876b9d9a8dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Nov 2024 12:07:22 +0100 Subject: netfilter: nf_tables: wait for rcu grace period on net_device removal 8c873e219970 ("netfilter: core: free hooks with call_rcu") removed synchronize_net() call when unregistering basechain hook, however, net_device removal event handler for the NFPROTO_NETDEV was not updated to wait for RCU grace period. Note that 835b803377f5 ("netfilter: nf_tables_netdev: unregister hooks on net_device removal") does not remove basechain rules on device removal, I was hinted to remove rules on net_device removal later, see 5ebe0b0eec9d ("netfilter: nf_tables: destroy basechain and rules on netdevice removal"). Although NETDEV_UNREGISTER event is guaranteed to be handled after synchronize_net() call, this path needs to wait for rcu grace period via rcu callback to release basechain hooks if netns is alive because an ongoing netlink dump could be in progress (sockets hold a reference on the netns). Note that nf_tables_pre_exit_net() unregisters and releases basechain hooks but it is possible to see NETDEV_UNREGISTER at a later stage in the netns exit path, eg. veth peer device in another netns: cleanup_net() default_device_exit_batch() unregister_netdevice_many_notify() notifier_call_chain() nf_tables_netdev_event() __nft_release_basechain() In this particular case, same rule of thumb applies: if netns is alive, then wait for rcu grace period because netlink dump in the other netns could be in progress. Otherwise, if the other netns is going away then no netlink dump can be in progress and basechain hooks can be released inmediately. While at it, turn WARN_ON() into WARN_ON_ONCE() for the basechain validation, which should not ever happen. Fixes: 835b803377f5 ("netfilter: nf_tables_netdev: unregister hooks on net_device removal") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a24fe62650a7..588a2757986c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1495,6 +1495,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); INIT_LIST_HEAD(&table->flowtables); + write_pnet(&table->net, net); table->family = family; table->flags = flags; table->handle = ++nft_net->table_handle; @@ -11430,22 +11431,48 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -int __nft_release_basechain(struct nft_ctx *ctx) +static void __nft_release_basechain_now(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; - if (WARN_ON(!nft_is_base_chain(ctx->chain))) - return 0; - - nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } + nf_tables_chain_destroy(ctx->chain); +} + +static void nft_release_basechain_rcu(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + struct nft_ctx ctx = { + .family = chain->table->family, + .chain = chain, + .net = read_pnet(&chain->table->net), + }; + + __nft_release_basechain_now(&ctx); + put_net(ctx.net); +} + +int __nft_release_basechain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) + return 0; + + nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); + list_for_each_entry(rule, &ctx->chain->rules, list) + nft_use_dec(&ctx->chain->use); + nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx->chain); + + if (maybe_get_net(ctx->net)) + call_rcu(&ctx->chain->rcu_head, nft_release_basechain_rcu); + else + __nft_release_basechain_now(ctx); return 0; } -- cgit v1.2.3 From de88df01796b309903b70888fbdf2b89607e3a6a Mon Sep 17 00:00:00 2001 From: Wenjia Zhang Date: Wed, 6 Nov 2024 09:26:12 +0100 Subject: net/smc: Fix lookup of netdev by using ib_device_get_netdev() The SMC-R variant of the SMC protocol used direct call to function ib_device_ops.get_netdev() to lookup netdev. As we used mlx5 device driver to run SMC-R, it failed to find a device, because in mlx5_ib the internal net device management for retrieving net devices was replaced by a common interface ib_device_get_netdev() in commit 8d159eb2117b ("RDMA/mlx5: Use IB set_netdev and get_netdev functions"). Since such direct accesses to the internal net device management is not recommended at all, update the SMC-R code to use proper API ib_device_get_netdev(). Fixes: 54903572c23c ("net/smc: allow pnetid-less configuration") Reported-by: Aswin K Reviewed-by: Gerd Bayer Reviewed-by: Halil Pasic Reviewed-by: Simon Horman Reviewed-by: Dust Li Reviewed-by: Wen Gu Reviewed-by: Zhu Yanjun Reviewed-by: D. Wythe Signed-off-by: Wenjia Zhang Reviewed-by: Leon Romanovsky Link: https://patch.msgid.link/20241106082612.57803-1-wenjia@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/smc_ib.c | 8 ++------ net/smc/smc_pnet.c | 4 +--- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9297dc20bfe2..9c563cdbea90 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -899,9 +899,7 @@ static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port) struct ib_device *ibdev = smcibdev->ibdev; struct net_device *ndev; - if (!ibdev->ops.get_netdev) - return; - ndev = ibdev->ops.get_netdev(ibdev, port + 1); + ndev = ib_device_get_netdev(ibdev, port + 1); if (ndev) { smcibdev->ndev_ifidx[port] = ndev->ifindex; dev_put(ndev); @@ -921,9 +919,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) port_cnt = smcibdev->ibdev->phys_port_cnt; for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { libdev = smcibdev->ibdev; - if (!libdev->ops.get_netdev) - continue; - lndev = libdev->ops.get_netdev(libdev, i + 1); + lndev = ib_device_get_netdev(libdev, i + 1); dev_put(lndev); if (lndev != ndev) continue; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index a04aa0e882f8..716808f374a8 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1054,9 +1054,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; - if (!ibdev->ibdev->ops.get_netdev) - continue; - ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); + ndev = ib_device_get_netdev(ibdev->ibdev, i); if (!ndev) continue; dev_put(ndev); -- cgit v1.2.3 From fc9de52de38f656399d2ce40f7349a6b5f86e787 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Nov 2024 13:03:22 +0000 Subject: rxrpc: Fix missing locking causing hanging calls If a call gets aborted (e.g. because kafs saw a signal) between it being queued for connection and the I/O thread picking up the call, the abort will be prioritised over the connection and it will be removed from local->new_client_calls by rxrpc_disconnect_client_call() without a lock being held. This may cause other calls on the list to disappear if a race occurs. Fix this by taking the client_call_lock when removing a call from whatever list its ->wait_link happens to be on. Signed-off-by: David Howells cc: linux-afs@lists.infradead.org Reported-by: Marc Dionne Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread") Link: https://patch.msgid.link/726660.1730898202@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/conn_client.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index d25bf1cf3670..bb11e8289d6d 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -516,6 +516,7 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local) spin_lock(&local->client_call_lock); list_move_tail(&call->wait_link, &bundle->waiting_calls); + rxrpc_see_call(call, rxrpc_call_see_waiting_call); spin_unlock(&local->client_call_lock); if (rxrpc_bundle_has_space(bundle)) @@ -586,7 +587,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + /* May still be on ->new_client_calls. */ + spin_lock(&local->client_call_lock); list_del_init(&call->wait_link); + spin_unlock(&local->client_call_lock); return; } -- cgit v1.2.3 From d293958a8595ba566fb90b99da4d6263e14fee15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2024 22:19:22 +0000 Subject: net/smc: do not leave a dangling sk pointer in __smc_create() Thanks to commit 4bbd360a5084 ("socket: Print pf->create() when it does not clear sock->sk on failure."), syzbot found an issue with AF_SMC: smc_create must clear sock->sk on failure, family: 43, type: 1, protocol: 0 WARNING: CPU: 0 PID: 5827 at net/socket.c:1565 __sock_create+0x96f/0xa30 net/socket.c:1563 Modules linked in: CPU: 0 UID: 0 PID: 5827 Comm: syz-executor259 Not tainted 6.12.0-rc6-next-20241106-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__sock_create+0x96f/0xa30 net/socket.c:1563 Code: 03 00 74 08 4c 89 e7 e8 4f 3b 85 f8 49 8b 34 24 48 c7 c7 40 89 0c 8d 8b 54 24 04 8b 4c 24 0c 44 8b 44 24 08 e8 32 78 db f7 90 <0f> 0b 90 90 e9 d3 fd ff ff 89 e9 80 e1 07 fe c1 38 c1 0f 8c ee f7 RSP: 0018:ffffc90003e4fda0 EFLAGS: 00010246 RAX: 099c6f938c7f4700 RBX: 1ffffffff1a595fd RCX: ffff888034823c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 00000000ffffffe9 R08: ffffffff81567052 R09: 1ffff920007c9f50 R10: dffffc0000000000 R11: fffff520007c9f51 R12: ffffffff8d2cafe8 R13: 1ffffffff1a595fe R14: ffffffff9a789c40 R15: ffff8880764298c0 FS: 000055557b518380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa62ff43225 CR3: 0000000031628000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sock_create net/socket.c:1616 [inline] __sys_socket_create net/socket.c:1653 [inline] __sys_socket+0x150/0x3c0 net/socket.c:1700 __do_sys_socket net/socket.c:1714 [inline] __se_sys_socket net/socket.c:1712 [inline] For reference, see commit 2d859aff775d ("Merge branch 'do-not-leave-dangling-sk-pointers-in-pf-create-functions'") Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Signed-off-by: Eric Dumazet Cc: Ignat Korchagin Cc: D. Wythe Cc: Dust Li Reviewed-by: Kuniyuki Iwashima Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/20241106221922.1544045-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0316217b7687..9d76e902fd77 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3359,8 +3359,10 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, else rc = smc_create_clcsk(net, sk, family); - if (rc) + if (rc) { sk_common_release(sk); + sock->sk = NULL; + } out: return rc; } -- cgit v1.2.3