From 16d584d2fc8f4ea36203af45a76becd7093586f1 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Jun 2022 12:06:21 +0800 Subject: net/dsa/hirschmann: Add missing of_node_get() in hellcreek_led_setup() of_find_node_by_name() will decrease the refcount of its first arg and we need a of_node_get() to keep refcount balance. Fixes: 7d9ee2e8ff15 ("net: dsa: hellcreek: Add PTP status LEDs") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220622040621.4094304-1-windhl@126.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/hellcreek_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c index 2572c6087bb5..b28baab6d56a 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -300,6 +300,7 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek) const char *label, *state; int ret = -EINVAL; + of_node_get(hellcreek->dev->of_node); leds = of_find_node_by_name(hellcreek->dev->of_node, "leds"); if (!leds) { dev_err(hellcreek->dev, "No LEDs specified in device tree!\n"); -- cgit v1.2.3 From 7c97bc0128b2eecc703106112679a69d446d1a12 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 22 Jun 2022 20:02:04 -0700 Subject: net: dsa: bcm_sf2: force pause link settings The pause settings reported by the PHY should also be applied to the GMII port status override otherwise the switch will not generate pause frames towards the link partner despite the advertisement saying otherwise. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220623030204.1966851-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 87e81c636339..be0edfa093d0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -878,6 +878,11 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, if (duplex == DUPLEX_FULL) reg |= DUPLX_MODE; + if (tx_pause) + reg |= TXFLOW_CNTL; + if (rx_pause) + reg |= RXFLOW_CNTL; + core_writel(priv, reg, offset); } -- cgit v1.2.3 From ad887a507d7386de09a532c5d303ed659eba2cfb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 22 Jun 2022 13:54:16 +0200 Subject: net/ncsi: use proper "mellanox" DT vendor prefix "mlx" Devicetree vendor prefix is not documented and instead "mellanox" should be used. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220622115416.7400-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-manage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 78814417d753..80713febfac6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1803,7 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; - if (np && of_get_property(np, "mlx,multi-host", NULL)) + if (np && (of_get_property(np, "mellanox,multi-host", NULL) || + of_get_property(np, "mlx,multi-host", NULL))) ndp->mlx_multi_host = true; } -- cgit v1.2.3 From 1228b34c8d0ecf6de18c4c95d22f60cc8607c50a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jun 2022 15:02:20 +0000 Subject: net: clear msg_get_inq in __sys_recvfrom() and __copy_msghdr_from_user() syzbot reported uninit-value in tcp_recvmsg() [1] Issue here is that msg->msg_get_inq should have been cleared, otherwise tcp_recvmsg() might read garbage and perform more work than needed, or have undefined behavior. Given CONFIG_INIT_STACK_ALL_ZERO=y is probably going to be the default soon, I chose to change __sys_recvfrom() to clear all fields but msghdr.addr which might be not NULL. For __copy_msghdr_from_user(), I added an explicit clear of kmsg->msg_get_inq. [1] BUG: KMSAN: uninit-value in tcp_recvmsg+0x6cf/0xb60 net/ipv4/tcp.c:2557 tcp_recvmsg+0x6cf/0xb60 net/ipv4/tcp.c:2557 inet_recvmsg+0x13a/0x5a0 net/ipv4/af_inet.c:850 sock_recvmsg_nosec net/socket.c:995 [inline] sock_recvmsg net/socket.c:1013 [inline] __sys_recvfrom+0x696/0x900 net/socket.c:2176 __do_sys_recvfrom net/socket.c:2194 [inline] __se_sys_recvfrom net/socket.c:2190 [inline] __x64_sys_recvfrom+0x122/0x1c0 net/socket.c:2190 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Local variable msg created at: __sys_recvfrom+0x81/0x900 net/socket.c:2154 __do_sys_recvfrom net/socket.c:2194 [inline] __se_sys_recvfrom net/socket.c:2190 [inline] __x64_sys_recvfrom+0x122/0x1c0 net/socket.c:2190 CPU: 0 PID: 3493 Comm: syz-executor170 Not tainted 5.19.0-rc3-syzkaller-30868-g4b28366af7d9 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: f94fd25cb0aa ("tcp: pass back data left in socket after receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jens Axboe Tested-by: Alexander Potapenko Link: https://lore.kernel.org/r/20220622150220.1091182-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/socket.c b/net/socket.c index 2bc8773d9dc5..96300cdc0625 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2149,10 +2149,13 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len) { + struct sockaddr_storage address; + struct msghdr msg = { + /* Save some cycles and don't copy the address if not needed */ + .msg_name = addr ? (struct sockaddr *)&address : NULL, + }; struct socket *sock; struct iovec iov; - struct msghdr msg; - struct sockaddr_storage address; int err, err2; int fput_needed; @@ -2163,14 +2166,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, if (!sock) goto out; - msg.msg_control = NULL; - msg.msg_controllen = 0; - /* Save some cycles and don't copy the address if not needed */ - msg.msg_name = addr ? (struct sockaddr *)&address : NULL; - /* We assume all kernel code knows the size of sockaddr_storage */ - msg.msg_namelen = 0; - msg.msg_iocb = NULL; - msg.msg_flags = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); @@ -2375,6 +2370,7 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, return -EFAULT; kmsg->msg_control_is_user = true; + kmsg->msg_get_inq = 0; kmsg->msg_control_user = msg.msg_control; kmsg->msg_controllen = msg.msg_controllen; kmsg->msg_flags = msg.msg_flags; -- cgit v1.2.3 From b968080808f7f28b89aa495b7402ba48eb17ee93 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Wed, 22 Jun 2022 17:02:34 -0700 Subject: selftests/net: pass ipv6_args to udpgso_bench's IPv6 TCP test udpgso_bench.sh has been running its IPv6 TCP test with IPv4 arguments since its initial conmit. Looks like a typo. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Cc: willemb@google.com Signed-off-by: Dimitris Michailidis Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20220623000234.61774-1-dmichail@fungible.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e..dc932fd65363 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } -- cgit v1.2.3 From 935336c191040809bf5739654ea337c13fe8f9af Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 23 Jun 2022 11:12:31 +0200 Subject: selftests/bpf: Test sockmap update when socket has ULP Cover the scenario when we cannot insert a socket into the sockmap, because it has it is using ULP. Failed insert should not have any effect on the ULP state. This is a regression test. Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220623091231.417138-1-jakub@cloudflare.com Signed-off-by: Jakub Kicinski --- .../selftests/bpf/prog_tests/sockmap_ktls.c | 84 +++++++++++++++++++--- 1 file changed, 75 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index af293ea1542c..e172d89e92e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -4,6 +4,7 @@ * Tests for sockmap/sockhash holding kTLS sockets. */ +#include #include "test_progs.h" #define MAX_TEST_NAME 80 @@ -92,9 +93,78 @@ close_srv: close(srv); } +static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) +{ + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + struct sockaddr_in6 *v6; + struct sockaddr_in *v4; + int err, s, zero = 0; + + switch (family) { + case AF_INET: + v4 = (struct sockaddr_in *)&addr; + v4->sin_family = AF_INET; + break; + case AF_INET6: + v6 = (struct sockaddr_in6 *)&addr; + v6->sin6_family = AF_INET6; + break; + default: + PRINT_FAIL("unsupported socket family %d", family); + return; + } + + s = socket(family, SOCK_STREAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + return; + + err = bind(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + goto close; + + err = getsockname(s, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + goto close; + + err = connect(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "connect")) + goto close; + + /* save sk->sk_prot and set it to tls_prots */ + err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto close; + + /* sockmap update should not affect saved sk_prot */ + err = bpf_map_update_elem(map, &zero, &s, BPF_ANY); + if (!ASSERT_ERR(err, "sockmap update elem")) + goto close; + + /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */ + err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + ASSERT_OK(err, "setsockopt(TCP_NODELAY)"); + +close: + close(s); +} + +static const char *fmt_test_name(const char *subtest_name, int family, + enum bpf_map_type map_type) +{ + const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"; + const char *family_str = AF_INET ? "IPv4" : "IPv6"; + static char test_name[MAX_TEST_NAME]; + + snprintf(test_name, MAX_TEST_NAME, + "sockmap_ktls %s %s %s", + subtest_name, family_str, map_type_str); + + return test_name; +} + static void run_tests(int family, enum bpf_map_type map_type) { - char test_name[MAX_TEST_NAME]; int map; map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); @@ -103,14 +173,10 @@ static void run_tests(int family, enum bpf_map_type map_type) return; } - snprintf(test_name, MAX_TEST_NAME, - "sockmap_ktls disconnect_after_delete %s %s", - family == AF_INET ? "IPv4" : "IPv6", - map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"); - if (!test__start_subtest(test_name)) - return; - - test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) + test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) + test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } -- cgit v1.2.3 From 6f0012e35160cd08a53e46e3b3bbf724b92dfe68 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 05:04:36 +0000 Subject: tcp: add a missing nf_reset_ct() in 3WHS handling When the third packet of 3WHS connection establishment contains payload, it is added into socket receive queue without the XFRM check and the drop of connection tracking context. This means that if the data is left unread in the socket receive queue, conntrack module can not be unloaded. As most applications usually reads the incoming data immediately after accept(), bug has been hiding for quite a long time. Commit 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists") exposed this bug because even if the application reads this data, the skb with nfct state could stay in a per-cpu cache for an arbitrary time, if said cpu no longer process RX softirqs. Many thanks to Ilya Maximets for reporting this issue, and for testing various patches: https://lore.kernel.org/netdev/20220619003919.394622-1-i.maximets@ovn.org/ Note that I also added a missing xfrm4_policy_check() call, although this is probably not a big issue, as the SYN packet should have been dropped earlier. Fixes: b59c270104f0 ("[NETFILTER]: Keep conntrack reference until IPsec policy checks are done") Reported-by: Ilya Maximets Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Pablo Neira Ayuso Cc: Steffen Klassert Tested-by: Ilya Maximets Reviewed-by: Ilya Maximets Link: https://lore.kernel.org/r/20220623050436.1290307-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe8f23b95d32..da5a3c44c4fb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1964,7 +1964,10 @@ process: struct sock *nsk; sk = req->rsk_listener; - drop_reason = tcp_inbound_md5_hash(sk, skb, + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + drop_reason = SKB_DROP_REASON_XFRM_POLICY; + else + drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, &iph->daddr, AF_INET, dif, sdif); if (unlikely(drop_reason)) { @@ -2016,6 +2019,7 @@ process: } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); -- cgit v1.2.3 From 3b9bc84d311104906d2b4995a9a02d7b7ddab2db Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Jun 2022 21:20:39 -0700 Subject: net: tun: unlink NAPI from device on destruction Syzbot found a race between tun file and device destruction. NAPIs live in struct tun_file which can get destroyed before the netdev so we have to del them explicitly. The current code is missing deleting the NAPI if the queue was detached first. Fixes: 943170998b20 ("tun: enable NAPI for TUN/TAP driver") Reported-by: syzbot+b75c138e9286ac742647@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220623042039.2274708-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 87a635aac008..7fd0288c3789 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -727,6 +727,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); -- cgit v1.2.3 From c96614eeab663646f57f67aa591e015abd8bd0ba Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Thu, 23 Jun 2022 15:46:44 +0200 Subject: net: dp83822: disable false carrier interrupt When unplugging an Ethernet cable, false carrier events were produced by the PHY at a very high rate. Once the false carrier counter full, an interrupt was triggered every few clock cycles until the cable was replugged. This resulted in approximately 10k/s interrupts. Since the false carrier counter (FCSCR) is never used, we can safely disable this interrupt. In addition to improving performance, this also solved MDIO read timeouts I was randomly encountering with an i.MX8 fec MAC because of the interrupt flood. The interrupt count and MDIO timeout fix were tested on a v5.4.110 kernel. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index e6ad3a494d32..95ef507053a6 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -230,7 +230,6 @@ static int dp83822_config_intr(struct phy_device *phydev) return misr_status; misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_FALSE_CARRIER_HF_INT_EN | DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); -- cgit v1.2.3 From 0e597e2affb90d6ea48df6890d882924acf71e19 Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Thu, 23 Jun 2022 15:46:45 +0200 Subject: net: dp83822: disable rx error interrupt Some RX errors, notably when disconnecting the cable, increase the RCSR register. Once half full (0x7fff), an interrupt flood is generated. I measured ~3k/s interrupts even after the RX errors transfer was stopped. Since we don't read and clear the RCSR register, we should disable this interrupt. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 95ef507053a6..8549e0e356c9 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -229,8 +229,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (misr_status < 0) return misr_status; - misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_LINK_STAT_INT_EN | + misr_status |= (DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); -- cgit v1.2.3 From 3b89b511ea0c705cc418440e2abf9d692a556d84 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jun 2022 16:32:32 +0300 Subject: net: fix IFF_TX_SKB_NO_LINEAR definition The "1<<31" shift has a sign extension bug so IFF_TX_SKB_NO_LINEAR is 0xffffffff80000000 instead of 0x0000000080000000. Fixes: c2ff53d8049f ("net: Add priv_flags for allow tx skb without linear") Signed-off-by: Dan Carpenter Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/YrRrcGttfEVnf85Q@kili Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..2563d30736e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1671,7 +1671,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, - IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; -- cgit v1.2.3 From 853a7614880231747040cada91d2b8d2e995c51a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Jun 2022 15:30:20 +0000 Subject: tunnels: do not assume mac header is set in skb_tunnel_check_pmtu() Recently added debug in commit f9aefd6b2aa3 ("net: warn if mac header was not set") caught a bug in skb_tunnel_check_pmtu(), as shown in this syzbot report [1]. In ndo_start_xmit() paths, there is really no need to use skb->mac_header, because skb->data is supposed to point at it. [1] WARNING: CPU: 1 PID: 8604 at include/linux/skbuff.h:2784 skb_mac_header_len include/linux/skbuff.h:2784 [inline] WARNING: CPU: 1 PID: 8604 at include/linux/skbuff.h:2784 skb_tunnel_check_pmtu+0x5de/0x2f90 net/ipv4/ip_tunnel_core.c:413 Modules linked in: CPU: 1 PID: 8604 Comm: syz-executor.3 Not tainted 5.19.0-rc2-syzkaller-00443-g8720bd951b8e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_mac_header_len include/linux/skbuff.h:2784 [inline] RIP: 0010:skb_tunnel_check_pmtu+0x5de/0x2f90 net/ipv4/ip_tunnel_core.c:413 Code: 00 00 00 00 fc ff df 4c 89 fa 48 c1 ea 03 80 3c 02 00 0f 84 b9 fe ff ff 4c 89 ff e8 7c 0f d7 f9 e9 ac fe ff ff e8 c2 13 8a f9 <0f> 0b e9 28 fc ff ff e8 b6 13 8a f9 48 8b 54 24 70 48 b8 00 00 00 RSP: 0018:ffffc90002e4f520 EFLAGS: 00010212 RAX: 0000000000000324 RBX: ffff88804d5fd500 RCX: ffffc90005b52000 RDX: 0000000000040000 RSI: ffffffff87f05e3e RDI: 0000000000000003 RBP: ffffc90002e4f650 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000000 R12: 000000000000ffff R13: 0000000000000000 R14: 000000000000ffcd R15: 000000000000001f FS: 00007f3babba9700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 0000000075319000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: geneve_xmit_skb drivers/net/geneve.c:927 [inline] geneve_xmit+0xcf8/0x35d0 drivers/net/geneve.c:1107 __netdev_start_xmit include/linux/netdevice.h:4805 [inline] netdev_start_xmit include/linux/netdevice.h:4819 [inline] __dev_direct_xmit+0x500/0x730 net/core/dev.c:4309 dev_direct_xmit include/linux/netdevice.h:3007 [inline] packet_direct_xmit+0x1b8/0x2c0 net/packet/af_packet.c:282 packet_snd net/packet/af_packet.c:3073 [inline] packet_sendmsg+0x21f4/0x55d0 net/packet/af_packet.c:3104 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2489 ___sys_sendmsg+0xf3/0x170 net/socket.c:2543 __sys_sendmsg net/socket.c:2572 [inline] __do_sys_sendmsg net/socket.c:2581 [inline] __se_sys_sendmsg net/socket.c:2579 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3baaa89109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3babba9168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f3baab9bf60 RCX: 00007f3baaa89109 RDX: 0000000000000000 RSI: 0000000020000a00 RDI: 0000000000000003 RBP: 00007f3baaae305d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe74f2543f R14: 00007f3babba9300 R15: 0000000000022000 Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Stefano Brivio Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6b2dc7b2b612..cc1caab4a654 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || - (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu)) + (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); -- cgit v1.2.3 From cb8092d70a6f5f01ec1490fce4d35efed3ed996c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 24 Jun 2022 12:24:31 -0400 Subject: tipc: move bc link creation back to tipc_node_create Shuang Li reported a NULL pointer dereference crash: [] BUG: kernel NULL pointer dereference, address: 0000000000000068 [] RIP: 0010:tipc_link_is_up+0x5/0x10 [tipc] [] Call Trace: [] [] tipc_bcast_rcv+0xa2/0x190 [tipc] [] tipc_node_bc_rcv+0x8b/0x200 [tipc] [] tipc_rcv+0x3af/0x5b0 [tipc] [] tipc_udp_recv+0xc7/0x1e0 [tipc] It was caused by the 'l' passed into tipc_bcast_rcv() is NULL. When it creates a node in tipc_node_check_dest(), after inserting the new node into hashtable in tipc_node_create(), it creates the bc link. However, there is a gap between this insert and bc link creation, a bc packet may come in and get the node from the hashtable then try to dereference its bc link, which is NULL. This patch is to fix it by moving the bc link creation before inserting into the hashtable. Note that for a preliminary node becoming "real", the bc link creation should also be called before it's rehashed, as we don't create it for preliminary nodes. Fixes: 4cbf8ac2fe5a ("tipc: enable creating a "preliminary" node") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 6ef95ce565bd..b48d97cbbe29 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; - struct tipc_link *l; unsigned long intv; int bearer_id; int i; @@ -488,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, goto exit; /* A preliminary node becomes "real" now, refresh its data */ tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } n->preliminary = false; n->addr = addr; hlist_del_rcu(&n->hash); @@ -567,7 +577,16 @@ update: n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - n->bc_entry.link = NULL; + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n); + n = NULL; + goto exit; + } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ @@ -1155,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l, *snd_l; + struct tipc_link *l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1175,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr, return; tipc_node_write_lock(n); - if (unlikely(!n->bc_entry.link)) { - snd_l = tipc_bc_sndlink(net); - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, peer_id, U16_MAX, - tipc_link_min_win(snd_l), - tipc_link_max_win(snd_l), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, snd_l, - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no mem\n"); - tipc_node_write_unlock_fast(n); - tipc_node_put(n); - return; - } - } le = &n->links[b->identity]; -- cgit v1.2.3 From 05907f10e235680cc7fb196810e4ad3215d5e648 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jun 2022 14:01:41 +0200 Subject: netfilter: nft_dynset: restore set element counter when failing to update This patch fixes a race condition. nft_rhash_update() might fail for two reasons: - Element already exists in the hashtable. - Another packet won race to insert an entry in the hashtable. In both cases, new() has already bumped the counter via atomic_add_unless(), therefore, decrement the set element counter. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index df40314de21f..76de6c8d9865 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -152,6 +153,7 @@ out: err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } -- cgit v1.2.3 From e34b9ed96ce3b06c79bf884009b16961ca478f87 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 16:43:57 +0200 Subject: netfilter: nf_tables: avoid skb access on nf_stolen When verdict is NF_STOLEN, the skb might have been freed. When tracing is enabled, this can result in a use-after-free: 1. access to skb->nf_trace 2. access to skb->mark 3. computation of trace id 4. dump of packet payload To avoid 1, keep a cached copy of skb->nf_trace in the trace state struct. Refresh this copy whenever verdict is != STOLEN. Avoid 2 by skipping skb->mark access if verdict is STOLEN. 3 is avoided by precomputing the trace id. Only dump the packet when verdict is not "STOLEN". Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++------ net/netfilter/nf_tables_core.c | 24 ++++++++++++++++++--- net/netfilter/nf_tables_trace.c | 44 +++++++++++++++++++++------------------ 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 279ae0fff7ad..5c4e5a96a984 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1338,24 +1338,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 53f40e473855..3ddce24ac76d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, enum nft_trace_types type) { - const struct nft_pktinfo *pkt = info->pkt; - - if (!info->trace || !pkt->skb->nf_trace) + if (!info->trace || !info->nf_trace) return; info->chain = chain; @@ -42,11 +40,24 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + info->nf_trace = pkt->skb->nf_trace; info->rule = rule; __nft_trace_packet(info, chain, type); } } +static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info) +{ + if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + if (info->trace) + info->nf_trace = pkt->skb->nf_trace; + } +} + static void nft_bitwise_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { @@ -85,6 +96,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_regs *regs) { + const struct nft_pktinfo *pkt = info->pkt; enum nft_trace_types type; switch (regs->verdict.code) { @@ -92,8 +104,13 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, case NFT_RETURN: type = NFT_TRACETYPE_RETURN; break; + case NF_STOLEN: + type = NFT_TRACETYPE_RULE; + /* can't access skb->nf_trace; use copy */ + break; default: type = NFT_TRACETYPE_RULE; + info->nf_trace = pkt->skb->nf_trace; break; } @@ -254,6 +271,7 @@ next_rule: switch (regs.verdict.code) { case NFT_BREAK: regs.verdict.code = NFT_CONTINUE; + nft_trace_copy_nftrace(&info); continue; case NFT_CONTINUE: nft_trace_packet(&info, chain, rule, diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 5041725423c2..1163ba9c1401 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,22 +25,6 @@ DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); EXPORT_SYMBOL_GPL(nft_trace_enabled); -static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) -{ - __be32 id; - - /* using skb address as ID results in a limited number of - * values (and quick reuse). - * - * So we attempt to use as many skb members that will not - * change while skb is with netfilter. - */ - id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), - skb->skb_iif); - - return nla_put_be32(nlskb, NFTA_TRACE_ID, id); -} - static int trace_fill_header(struct sk_buff *nlskb, u16 type, const struct sk_buff *skb, int off, unsigned int len) @@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; + u32 mark = 0; u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) @@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) goto nla_put_failure; - if (trace_fill_id(skb, pkt->skb)) + if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure; if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) @@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info) case NFT_TRACETYPE_RULE: if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) goto nla_put_failure; + + /* pkt->skb undefined iff NF_STOLEN, disable dump */ + if (info->verdict->code == NF_STOLEN) + info->packet_dumped = true; + else + mark = pkt->skb->mark; + break; case NFT_TRACETYPE_POLICY: + mark = pkt->skb->mark; + if (nla_put_be32(skb, NFTA_TRACE_POLICY, htonl(info->basechain->policy))) goto nla_put_failure; break; } - if (pkt->skb->mark && - nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) goto nla_put_failure; if (!info->packet_dumped) { @@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_chain *chain) { + static siphash_key_t trace_key __read_mostly; + struct sk_buff *skb = pkt->skb; + info->basechain = nft_base_chain(chain); info->trace = true; + info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; info->pkt = pkt; info->verdict = verdict; + + net_get_random_once(&trace_key, sizeof(trace_key)); + + info->skbid = (u32)siphash_3u32(hash32_ptr(skb), + skb_get_hash(skb), + skb->skb_iif, + &trace_key); } -- cgit v1.2.3 From c2577862eeb0be94f151f2f1fff662b028061b00 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jun 2022 18:26:03 +0200 Subject: netfilter: br_netfilter: do not skip all hooks with 0 priority When br_netfilter module is loaded, skbs may be diverted to the ipv4/ipv6 hooks, just like as if we were routing. Unfortunately, bridge filter hooks with priority 0 may be skipped in this case. Example: 1. an nftables bridge ruleset is loaded, with a prerouting hook that has priority 0. 2. interface is added to the bridge. 3. no tcp packet is ever seen by the bridge prerouting hook. 4. flush the ruleset 5. load the bridge ruleset again. 6. tcp packets are processed as expected. After 1) the only registered hook is the bridge prerouting hook, but its not called yet because the bridge hasn't been brought up yet. After 2), hook order is: 0 br_nf_pre_routing // br_netfilter internal hook 0 chain bridge f prerouting // nftables bridge ruleset The packet is diverted to br_nf_pre_routing. If call-iptables is off, the nftables bridge ruleset is called as expected. But if its enabled, br_nf_hook_thresh() will skip it because it assumes that all 0-priority hooks had been called previously in bridge context. To avoid this, check for the br_nf_pre_routing hook itself, we need to resume directly after it, even if this hook has a priority of 0. Unfortunately, this still results in different packet flow. With this fix, the eval order after in 3) is: 1. br_nf_pre_routing 2. ip(6)tables (if enabled) 3. nftables bridge but after 5 its the much saner: 1. nftables bridge 2. br_nf_pre_routing 3. ip(6)tables (if enabled) Unfortunately I don't see a solution here: It would be possible to move br_nf_pre_routing to a higher priority so that it will be called later in the pipeline, but this also impacts ebtables evaluation order, and would still result in this very ordering problem for all nftables-bridge hooks with the same priority as the br_nf_pre_routing one. Searching back through the git history I don't think this has ever behaved in any other way, hence, no fixes-tag. Reported-by: Radim Hrazdil Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4fd882686b04..ff4779036649 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); -- cgit v1.2.3 From a8fc8cb5692aebb9c6f7afd4265366d25dcd1d01 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Jun 2022 21:21:05 -0700 Subject: net: tun: stop NAPI when detaching queues While looking at a syzbot report I noticed the NAPI only gets disabled before it's deleted. I think that user can detach the queue before destroying the device and the NAPI will never be stopped. Fixes: 943170998b20 ("tun: enable NAPI for TUN/TAP driver") Acked-by: Petar Penkov Link: https://lore.kernel.org/r/20220623042105.2274812-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7fd0288c3789..e2eb35887394 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -273,6 +273,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -653,8 +659,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -808,6 +816,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); -- cgit v1.2.3 From 8ee9d82cd0a45e7d050ade598c9f33032a0f2891 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 26 Jun 2022 21:33:48 -0700 Subject: epic100: fix use after free on rmmod epic_close() calls epic_rx() and uses dma buffer, but in epic_remove_one() we already freed the dma buffer. To fix this issue, reorder function calls like in the .probe function. BUG: KASAN: use-after-free in epic_rx+0xa6/0x7e0 [epic100] Call Trace: epic_rx+0xa6/0x7e0 [epic100] epic_close+0xec/0x2f0 [epic100] unregister_netdev+0x18/0x20 epic_remove_one+0xaa/0xf0 [epic100] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yilun Wu Signed-off-by: Tong Zhang Reviewed-by: Francois Romieu Link: https://lore.kernel.org/r/20220627043351.25615-1-ztong0001@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/epic100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index a0654e88444c..0329caf63279 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1515,14 +1515,14 @@ static void epic_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct epic_private *ep = netdev_priv(dev); + unregister_netdev(dev); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma); dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma); - unregister_netdev(dev); pci_iounmap(pdev, ep->ioaddr); - pci_release_regions(pdev); free_netdev(dev); + pci_release_regions(pdev); pci_disable_device(pdev); /* pci_power_off(pdev, -1); */ } -- cgit v1.2.3 From 76b39b94382f9e0a639e1c70c3253de248cc4c83 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 23 Jun 2022 11:07:41 -0300 Subject: net/sched: act_api: Notify user space if any actions were flushed before error If during an action flush operation one of the actions is still being referenced, the flush operation is aborted and the kernel returns to user space with an error. However, if the kernel was able to flush, for example, 3 actions and failed on the fourth, the kernel will not notify user space that it deleted 3 actions before failing. This patch fixes that behaviour by notifying user space of how many actions were deleted before flush failed and by setting extack with a message describing what happened. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index da9733da9868..817065aa2833 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -588,7 +588,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -604,20 +605,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -638,7 +644,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { -- cgit v1.2.3 From 88153e29c1e0f3ace8c831b06f6cea9503f16cec Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 23 Jun 2022 11:07:42 -0300 Subject: selftests: tc-testing: Add testcases to test new flush behaviour Add tdc test cases to verify new flush behaviour is correct, which do the following: - Try to flush only one action which is being referenced by a filter - Try to flush three actions where the last one (index 3) is being referenced by a filter Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/actions/gact.json | 77 ++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index b24494c6f546..c652e8c1157d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -609,5 +609,82 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "7f52", + "name": "Try to flush action which is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + "$TC actions add action pass index 1", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "1", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] + }, + { + "id": "ae1e", + "name": "Try to flush actions when last one is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + [ + "$TC actions add action pass index 1", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 2", + "$TC actions add action drop index 3", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] } ] -- cgit v1.2.3 From 4bbfed9112ca9da88ac83d5ffe62c988f7169e9f Mon Sep 17 00:00:00 2001 From: Shreenidhi Shedi Date: Sun, 26 Jun 2022 18:59:47 +0530 Subject: octeon_ep: use bitwise AND This should be bitwise operator not logical. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Shreenidhi Shedi Link: https://lore.kernel.org/r/20220626132947.3992423-1-sshedi@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index cc51149790ff..3d5d39a52fe6 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -52,7 +52,7 @@ #define CN93_SDP_EPF_RINFO_SRN(val) ((val) & 0xFF) #define CN93_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) -#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) && 0xFF) +#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0xFF) /* SDP Function select */ #define CN93_SDP_FUNC_SEL_EPF_BIT_POS 8 -- cgit v1.2.3 From 6b9f1d46fdada756294d2d5f04df613478386d34 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 26 Jun 2022 22:00:39 +0200 Subject: MAINTAINERS: nfc: drop Charles Gorand from NXP-NCI Mails to Charles get an auto reply, that he is no longer working at Eff'Innov technologies. Drop the entry and mark the driver as orphaned. Signed-off-by: Michael Walle Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220626200039.4062784-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6cc825857722..6a288d59ff79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14350,9 +14350,8 @@ F: Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml F: sound/soc/codecs/tfa989x.c NXP-NCI NFC DRIVER -R: Charles Gorand L: linux-nfc@lists.01.org (subscribers-only) -S: Supported +S: Orphan F: Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml F: drivers/nfc/nxp-nci -- cgit v1.2.3 From 805206e66fab4ba1e0ebd19402006d62cd1d4902 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Jun 2022 09:51:38 +0200 Subject: net: asix: fix "can't send until first packet is send" issue If cable is attached after probe sequence, the usbnet framework would not automatically start processing RX packets except at least one packet was transmitted. On systems with any kind of address auto configuration this issue was not detected, because some packets are send immediately after link state is changed to "running". With this patch we will notify usbnet about link status change provided by the PHYlib. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Signed-off-by: Oleksij Rempel Tested-by: Anton Lundin Link: https://lore.kernel.org/r/20220624075139.3139300-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 632fa6c1d5e3..b4a1b7abcfc9 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -431,6 +431,7 @@ void asix_adjust_link(struct net_device *netdev) asix_write_medium_mode(dev, mode, 0); phy_print_status(phydev); + usbnet_link_change(dev, phydev->link, 0); } int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) -- cgit v1.2.3 From ce95ab775f8d8e89a038c0e5611a7381a2ef8e43 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Jun 2022 09:51:39 +0200 Subject: net: usb: asix: do not force pause frames support We should respect link partner capabilities and not force flow control support on every link. Even more, in current state the MAC driver do not advertises pause support so we should not keep flow control enabled at all. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Signed-off-by: Oleksij Rempel Tested-by: Anton Lundin Link: https://lore.kernel.org/r/20220624075139.3139300-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 2c81236c6c7c..45d3cc5cc355 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -126,8 +126,7 @@ AX_MEDIUM_RE) #define AX88772_MEDIUM_DEFAULT \ - (AX_MEDIUM_FD | AX_MEDIUM_RFC | \ - AX_MEDIUM_TFC | AX_MEDIUM_PS | \ + (AX_MEDIUM_FD | AX_MEDIUM_PS | \ AX_MEDIUM_AC | AX_MEDIUM_RE) /* AX88772 & AX88178 RX_CTL values */ -- cgit v1.2.3 From 3b0dc529f56b5f2328244130683210be98f16f7f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Jun 2022 14:00:15 +0200 Subject: ipv6: take care of disable_policy when restoring routes When routes corresponding to addresses are restored by fixup_permanent_addr(), the dst_nopolicy parameter was not set. The typical use case is a user that configures an address on a down interface and then put this interface up. Let's take care of this flag in addrconf_f6i_alloc(), so that every callers benefit ont it. CC: stable@kernel.org CC: David Forster Fixes: df789fe75206 ("ipv6: Provide ipv6 version of "disable_policy" sysctl") Reported-by: Siwar Zitouni Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220623120015.32640-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 4 ---- net/ipv6/route.c | 9 ++++++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b1932502e9e..5864cbc30db6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1109,10 +1109,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - if (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy) - f6i->dst_nopolicy = true; - neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *cfg->pfx; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d25dc83bac62..828355710c57 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4569,8 +4569,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (!IS_ERR(f6i)) + if (!IS_ERR(f6i)) { f6i->dst_nocount = true; + + if (!anycast && + (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy)) + f6i->dst_nopolicy = true; + } + return f6i; } -- cgit v1.2.3 From ab84db251c04d38b8dc7ee86e13d4050bedb1c88 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2022 10:28:13 +0000 Subject: net: bonding: fix possible NULL deref in rlb code syzbot has two reports involving the same root cause. bond_alb_initialize() must not set bond->alb_info.rlb_enabled if a memory allocation error is detected. Report 1: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 0 PID: 12276 Comm: kworker/u4:10 Not tainted 5.19.0-rc3-syzkaller-00132-g3b89b511ea0c #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:rlb_clear_slave+0x10e/0x690 drivers/net/bonding/bond_alb.c:393 Code: 8e fc 83 fb ff 0f 84 74 02 00 00 e8 cc 2a 8e fc 48 8b 44 24 08 89 dd 48 c1 e5 06 4c 8d 34 28 49 8d 7e 14 48 89 f8 48 c1 e8 03 <42> 0f b6 14 20 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RSP: 0018:ffffc90018a8f678 EFLAGS: 00010203 RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88803375bb00 RSI: ffffffff84ec4ac4 RDI: 0000000000000014 RBP: 0000000000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffff8880ac889000 R14: 0000000000000000 R15: ffff88815a668c80 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005597077e10b0 CR3: 0000000026668000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bond_alb_deinit_slave+0x43c/0x6b0 drivers/net/bonding/bond_alb.c:1663 __bond_release_one.cold+0x383/0xd53 drivers/net/bonding/bond_main.c:2370 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3778 [inline] bond_netdev_event+0x993/0xad0 drivers/net/bonding/bond_main.c:3889 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] unregister_netdevice_many+0x948/0x18b0 net/core/dev.c:10839 default_device_exit_batch+0x449/0x590 net/core/dev.c:11333 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 Report 2: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 PID: 5206 Comm: syz-executor.1 Not tainted 5.18.0-syzkaller-12108-g58f9d52ff689 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:rlb_req_update_slave_clients+0x109/0x2f0 drivers/net/bonding/bond_alb.c:502 Code: 5d 18 8f fc 41 80 3e 00 0f 85 a5 01 00 00 89 d8 48 c1 e0 06 49 03 84 24 68 01 00 00 48 8d 78 30 49 89 c7 48 89 fa 48 c1 ea 03 <80> 3c 2a 00 0f 85 98 01 00 00 4d 39 6f 30 75 83 e8 22 18 8f fc 49 RSP: 0018:ffffc9000300ee80 EFLAGS: 00010206 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90016c11000 RDX: 0000000000000006 RSI: ffffffff84eb6bf3 RDI: 0000000000000030 RBP: dffffc0000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: ffff888027c80c80 R13: ffff88807d7ff800 R14: ffffed1004f901bd R15: 0000000000000000 FS: 00007f6f46c58700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 00000000516cc000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: alb_fasten_mac_swap+0x886/0xa80 drivers/net/bonding/bond_alb.c:1070 bond_alb_handle_active_change+0x624/0x1050 drivers/net/bonding/bond_alb.c:1765 bond_change_active_slave+0xfa1/0x29b0 drivers/net/bonding/bond_main.c:1173 bond_select_active_slave+0x23f/0xa50 drivers/net/bonding/bond_main.c:1253 bond_enslave+0x3b34/0x53b0 drivers/net/bonding/bond_main.c:2159 do_set_master+0x1c8/0x220 net/core/rtnetlink.c:2577 rtnl_newlink_create net/core/rtnetlink.c:3380 [inline] __rtnl_newlink+0x13ac/0x17e0 net/core/rtnetlink.c:3580 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x43a/0xc90 net/core/rtnetlink.c:6089 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f6f45a89109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6f46c58168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f6f45b9c030 RCX: 00007f6f45a89109 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000006 RBP: 00007f6f45ae308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffed99029af R14: 00007f6f46c58300 R15: 0000000000022000 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-b