From 0e63208915a8d7590d0a6218dadb2a6a00ac705a Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 4 Mar 2019 23:26:10 +0100 Subject: tipc: fix RDM/DGRAM connect() regression Fix regression bug introduced in commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Only signal -EDESTADDRREQ for RDM/DGRAM if we don't have a cached sockaddr. Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e482b342bfa8..3274ef625dba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1333,7 +1333,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(!dest)) { dest = &tsk->peer; - if (!syn || dest->family != AF_TIPC) + if (!syn && dest->family != AF_TIPC) return -EDESTADDRREQ; } -- cgit v1.2.3 From 4177c5d94264b57f426ef5c45a788808d1a1e536 Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 5 Mar 2019 08:29:28 +0800 Subject: net/sched: act_tunnel_key: Fix double free dst_cache dst_cache_destroy will be called in dst_release dst_release-->dst_destroy_rcu-->dst_destroy-->metadata_dst_free -->dst_cache_destroy It should not call dst_cache_destroy before dst_release Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support") Signed-off-by: wenxu Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 3beb4717d3b7..7c6591b991d5 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -201,14 +201,9 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) { if (!p) return; - if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { -#ifdef CONFIG_DST_CACHE - struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info; - - dst_cache_destroy(&info->dst_cache); -#endif + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) dst_release(&p->tcft_enc_metadata->dst); - } + kfree_rcu(p, rcu); } @@ -338,7 +333,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, &metadata->u.tun_info, opts_len, extack); if (ret < 0) - goto release_dst_cache; + goto release_tun_meta; } metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; @@ -354,14 +349,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, &act_tunnel_key_ops, bind, true); if (ret) { NL_SET_ERR_MSG(extack, "Cannot create TC IDR"); - goto release_dst_cache; + goto release_tun_meta; } ret = ACT_P_CREATED; } else if (!ovr) { NL_SET_ERR_MSG(extack, "TC IDR already exists"); ret = -EEXIST; - goto release_dst_cache; + goto release_tun_meta; } t = to_tunnel_key(*a); @@ -371,7 +366,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; - goto release_dst_cache; + goto release_tun_meta; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; @@ -388,12 +383,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return ret; -release_dst_cache: -#ifdef CONFIG_DST_CACHE - if (metadata) - dst_cache_destroy(&metadata->u.tun_info.dst_cache); release_tun_meta: -#endif if (metadata) dst_release(&metadata->dst); -- cgit v1.2.3 From 22c74764aa2943ecdf9f07c900d8a9c8ba6c9265 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 6 Mar 2019 10:42:53 +0100 Subject: ipv4/route: fail early when inet dev is missing If a non local multicast packet reaches ip_route_input_rcu() while the ingress device IPv4 private data (in_dev) is NULL, we end up doing a NULL pointer dereference in IN_DEV_MFORWARD(). Since the later call to ip_route_input_mc() is going to fail if !in_dev, we can fail early in such scenario and avoid the dangerous code path. v1 -> v2: - clarified the commit message, no code changes Reported-by: Tianhao Zhao Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 738ff0a1a048..8ca3642f0d9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2149,12 +2149,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, int our = 0; int err = -EINVAL; - if (in_dev) - our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + if (!in_dev) + return err; + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); /* check l3 master if no match yet */ - if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + if (!our && netif_is_l3_slave(dev)) { struct in_device *l3_in_dev; l3_in_dev = __in_dev_get_rcu(skb->dev); -- cgit v1.2.3 From f4b3ec4e6aa1a2ca437905a519ae08e8cf6af754 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Mar 2019 10:25:42 +0000 Subject: iptunnel: NULL pointer deref for ip_md_tunnel_xmit Naresh Kamboju noted the following oops during execution of selftest tools/testing/selftests/bpf/test_tunnel.sh on x86_64: [ 274.120445] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 274.128285] #PF error: [INSTR] [ 274.131351] PGD 8000000414a0e067 P4D 8000000414a0e067 PUD 3b6334067 PMD 0 [ 274.138241] Oops: 0010 [#1] SMP PTI [ 274.141734] CPU: 1 PID: 11464 Comm: ping Not tainted 5.0.0-rc4-next-20190129 #1 [ 274.149046] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS 2.0b 07/27/2017 [ 274.156526] RIP: 0010: (null) [ 274.160280] Code: Bad RIP value. [ 274.163509] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286 [ 274.168726] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000 [ 274.175851] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0 [ 274.182974] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c [ 274.190098] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000 [ 274.197222] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400 [ 274.204346] FS: 00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000) knlGS:0000000000000000 [ 274.212424] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 274.218162] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0 [ 274.225292] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 274.232416] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 274.239541] Call Trace: [ 274.241988] ? tnl_update_pmtu+0x296/0x3b0 [ 274.246085] ip_md_tunnel_xmit+0x1bc/0x520 [ 274.250176] gre_fb_xmit+0x330/0x390 [ 274.253754] gre_tap_xmit+0x128/0x180 [ 274.257414] dev_hard_start_xmit+0xb7/0x300 [ 274.261598] sch_direct_xmit+0xf6/0x290 [ 274.265430] __qdisc_run+0x15d/0x5e0 [ 274.269007] __dev_queue_xmit+0x2c5/0xc00 [ 274.273011] ? dev_queue_xmit+0x10/0x20 [ 274.276842] ? eth_header+0x2b/0xc0 [ 274.280326] dev_queue_xmit+0x10/0x20 [ 274.283984] ? dev_queue_xmit+0x10/0x20 [ 274.287813] arp_xmit+0x1a/0xf0 [ 274.290952] arp_send_dst.part.19+0x46/0x60 [ 274.295138] arp_solicit+0x177/0x6b0 [ 274.298708] ? mod_timer+0x18e/0x440 [ 274.302281] neigh_probe+0x57/0x70 [ 274.305684] __neigh_event_send+0x197/0x2d0 [ 274.309862] neigh_resolve_output+0x18c/0x210 [ 274.314212] ip_finish_output2+0x257/0x690 [ 274.318304] ip_finish_output+0x219/0x340 [ 274.322314] ? ip_finish_output+0x219/0x340 [ 274.326493] ip_output+0x76/0x240 [ 274.329805] ? ip_fragment.constprop.53+0x80/0x80 [ 274.334510] ip_local_out+0x3f/0x70 [ 274.337992] ip_send_skb+0x19/0x40 [ 274.341391] ip_push_pending_frames+0x33/0x40 [ 274.345740] raw_sendmsg+0xc15/0x11d0 [ 274.349403] ? __might_fault+0x85/0x90 [ 274.353151] ? _copy_from_user+0x6b/0xa0 [ 274.357070] ? rw_copy_check_uvector+0x54/0x130 [ 274.361604] inet_sendmsg+0x42/0x1c0 [ 274.365179] ? inet_sendmsg+0x42/0x1c0 [ 274.368937] sock_sendmsg+0x3e/0x50 [ 274.372460] ___sys_sendmsg+0x26f/0x2d0 [ 274.376293] ? lock_acquire+0x95/0x190 [ 274.380043] ? __handle_mm_fault+0x7ce/0xb70 [ 274.384307] ? lock_acquire+0x95/0x190 [ 274.388053] ? __audit_syscall_entry+0xdd/0x130 [ 274.392586] ? ktime_get_coarse_real_ts64+0x64/0xc0 [ 274.397461] ? __audit_syscall_entry+0xdd/0x130 [ 274.401989] ? trace_hardirqs_on+0x4c/0x100 [ 274.406173] __sys_sendmsg+0x63/0xa0 [ 274.409744] ? __sys_sendmsg+0x63/0xa0 [ 274.413488] __x64_sys_sendmsg+0x1f/0x30 [ 274.417405] do_syscall_64+0x55/0x190 [ 274.421064] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 274.426113] RIP: 0033:0x7ff4ae0e6e87 [ 274.429686] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 ca d9 2b 00 48 63 d2 48 63 ff 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 53 48 89 f3 48 83 ec 10 48 89 7c 24 08 [ 274.448422] RSP: 002b:00007ffcd9b76db8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 274.455978] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007ff4ae0e6e87 [ 274.463104] RDX: 0000000000000000 RSI: 00000000006092e0 RDI: 0000000000000003 [ 274.470228] RBP: 0000000000000000 R08: 00007ffcd9bc40a0 R09: 00007ffcd9bc4080 [ 274.477349] R10: 000000000000060a R11: 0000000000000246 R12: 0000000000000003 [ 274.484475] R13: 0000000000000016 R14: 00007ffcd9b77fa0 R15: 00007ffcd9b78da4 [ 274.491602] Modules linked in: cls_bpf sch_ingress iptable_filter ip_tables algif_hash af_alg x86_pkg_temp_thermal fuse [last unloaded: test_bpf] [ 274.504634] CR2: 0000000000000000 [ 274.507976] ---[ end trace 196d18386545eae1 ]--- [ 274.512588] RIP: 0010: (null) [ 274.516334] Code: Bad RIP value. [ 274.519557] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286 [ 274.524775] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000 [ 274.531921] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0 [ 274.539082] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c [ 274.546205] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000 [ 274.553329] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400 [ 274.560456] FS: 00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000) knlGS:0000000000000000 [ 274.568541] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 274.574277] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0 [ 274.581403] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 274.588535] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 274.595658] Kernel panic - not syncing: Fatal exception in interrupt [ 274.602046] Kernel Offset: 0x14400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 274.612827] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- [ 274.620387] ------------[ cut here ]------------ I'm also seeing the same failure on x86_64, and it reproduces consistently. >From poking around it looks like the skb's dst entry is being used to calculate the mtu in: mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; ...but because that dst_entry has an "ops" value set to md_dst_ops, the various ops (including mtu) are not set: crash> struct sk_buff._skb_refdst ffff928f87447700 -x _skb_refdst = 0xffffcd6fbf5ea590 crash> struct dst_entry.ops 0xffffcd6fbf5ea590 ops = 0xffffffffa0193800 crash> struct dst_ops.mtu 0xffffffffa0193800 mtu = 0x0 crash> I confirmed that the dst entry also has dst->input set to dst_md_discard, so it looks like it's an entry that's been initialized via __metadata_dst_init alright. I think the fix here is to use skb_valid_dst(skb) - it checks for DST_METADATA also, and with that fix in place, the problem - which was previously 100% reproducible - disappears. The below patch resolves the panic and all bpf tunnel tests pass without incident. Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit") Reported-by: Naresh Kamboju Signed-off-by: Alan Maguire Acked-by: Alexei Starovoitov Tested-by: Anders Roxell Reported-by: Nicolas Dichtel Tested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2756fb725bf0..a5d8cad18ead 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -515,9 +515,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - tunnel_hlen; else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - skb_dst_update_pmtu(skb, mtu); + if (skb_valid_dst(skb)) + skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && @@ -530,9 +531,11 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt6; __be32 daddr; + rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : + NULL; daddr = md ? dst : tunnel->parms.iph.daddr; if (rt6 && mtu < dst_mtu(skb_dst(skb)) && -- cgit v1.2.3 From a10674bf2406afc2554f9c7d31b2dc65d6a27fd9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 6 Mar 2019 14:10:22 +0300 Subject: tcp: detecting the misuse of .sendpage for Slab objects sendpage was not designed for processing of the Slab pages, in some situations it can trigger BUG_ON on receiving side. Signed-off-by: Vasily Averin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ad07dd71063d..dbb08140cdc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -943,6 +943,10 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + if (IS_ENABLED(CONFIG_DEBUG_VM) && + WARN_ONCE(PageSlab(page), "page must not be a Slab one")) + return -EINVAL; + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. -- cgit v1.2.3 From ecb3dea400d3beaf611ce76ac7a51d4230492cf2 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 6 Mar 2019 16:22:12 +0200 Subject: net: sched: flower: insert new filter to idr after setting its mask When adding new filter to flower classifier, fl_change() inserts it to handle_idr before initializing filter extensions and assigning it a mask. Normally this ordering doesn't matter because all flower classifier ops callbacks assume rtnl lock protection. However, when filter has an action that doesn't have its kernel module loaded, rtnl lock is released before call to request_module(). During this time the filter can be accessed bu concurrent task before its initialization is completed, which can lead to a crash. Example case of NULL pointer dereference in concurrent dump: Task 1 Task 2 tc_new_tfilter() fl_change() idr_alloc_u32(fnew) fl_set_parms() tcf_exts_validate() tcf_action_init() tcf_action_init_1() rtnl_unlock() request_module() ... rtnl_lock() tc_dump_tfilter() tcf_chain_dump() fl_walk() idr_get_next_ul() tcf_node_dump() tcf_fill_node() fl_dump() mask = &f->mask->key; <- NULL ptr rtnl_lock() Extension initialization and mask assignment don't depend on fnew->handle that is allocated by idr_alloc_u32(). Move idr allocation code after action creation and mask assignment in fl_change() to prevent concurrent access to not fully initialized filter when rtnl lock is released to load action module. Fixes: 01683a146999 ("net: sched: refactor flower walk to iterate over idr") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 27300a3e76c7..c04247b403ed 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1348,46 +1348,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - if (!handle) { - handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - INT_MAX, GFP_KERNEL); - } else if (!fold) { - /* user specifies a handle and it doesn't exist */ - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - handle, GFP_KERNEL); - } - if (err) - goto errout; - fnew->handle = handle; - if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { err = -EINVAL; - goto errout_idr; + goto errout; } } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, tp->chain->tmplt_priv, extack); if (err) - goto errout_idr; + goto errout; err = fl_check_assign_mask(head, fnew, fold, mask); if (err) - goto errout_idr; + goto errout; + + if (!handle) { + handle = 1; + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + INT_MAX, GFP_KERNEL); + } else if (!fold) { + /* user specifies a handle and it doesn't exist */ + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + handle, GFP_KERNEL); + } + if (err) + goto errout_mask; + fnew->handle = handle; if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) { err = -EEXIST; - goto errout_mask; + goto errout_idr; } err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, fnew->mask->filter_ht_params); if (err) - goto errout_mask; + goto errout_idr; if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); @@ -1426,12 +1426,13 @@ errout_mask_ht: rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, fnew->mask->filter_ht_params); -errout_mask: - fl_mask_put(head, fnew->mask, false); - errout_idr: if (!fold) idr_remove(&head->handle_idr, fnew->handle); + +errout_mask: + fl_mask_put(head, fnew->mask, false); + errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); -- cgit v1.2.3 From 6caabe7f197d3466d238f70915d65301f1716626 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Wed, 6 Mar 2019 22:45:01 +0800 Subject: net: hsr: fix memory leak in hsr_dev_finalize() If hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER) failed to add port, it directly returns res and forgets to free the node that allocated in hsr_create_self_node(), and forgets to delete the node->mac_list linked in hsr->self_node_db. BUG: memory leak unreferenced object 0xffff8881cfa0c780 (size 64): comm "syz-executor.0", pid 2077, jiffies 4294717969 (age 2415.377s) hex dump (first 32 bytes): e0 c7 a0 cf 81 88 ff ff 00 02 00 00 00 00 ad de ................ 00 e6 49 cd 81 88 ff ff c0 9b 87 d0 81 88 ff ff ..I............. backtrace: [<00000000e2ff5070>] hsr_dev_finalize+0x736/0x960 [hsr] [<000000003ed2e597>] hsr_newlink+0x2b2/0x3e0 [hsr] [<000000003fa8c6b6>] __rtnl_newlink+0xf1f/0x1600 net/core/rtnetlink.c:3182 [<000000001247a7ad>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3240 [<00000000e7d1b61d>] rtnetlink_rcv_msg+0x54e/0xb90 net/core/rtnetlink.c:5130 [<000000005556bd3a>] netlink_rcv_skb+0x129/0x340 net/netlink/af_netlink.c:2477 [<00000000741d5ee6>] netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] [<00000000741d5ee6>] netlink_unicast+0x49a/0x650 net/netlink/af_netlink.c:1336 [<000000009d56f9b7>] netlink_sendmsg+0x88b/0xdf0 net/netlink/af_netlink.c:1917 [<0000000046b35c59>] sock_sendmsg_nosec net/socket.c:621 [inline] [<0000000046b35c59>] sock_sendmsg+0xc3/0x100 net/socket.c:631 [<00000000d208adc9>] __sys_sendto+0x33e/0x560 net/socket.c:1786 [<00000000b582837a>] __do_sys_sendto net/socket.c:1798 [inline] [<00000000b582837a>] __se_sys_sendto net/socket.c:1794 [inline] [<00000000b582837a>] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:1794 [<00000000c866801d>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 [<00000000fea382d9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e01dacb3>] 0xffffffffffffffff Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Reported-by: Hulk Robot Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 4 +++- net/hsr/hsr_framereg.c | 12 ++++++++++++ net/hsr/hsr_framereg.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b8cd43c9ed5b..c4676bacb8db 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -486,7 +486,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - return res; + goto err_add_port; res = register_netdevice(hsr_dev); if (res) @@ -506,6 +506,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], fail: hsr_for_each_port(hsr, port) hsr_del_port(port); +err_add_port: + hsr_del_node(&hsr->self_node_db); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 286ceb41ac0c..9af16cb68f76 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } +void hsr_del_node(struct list_head *self_node_db) +{ + struct hsr_node *node; + + rcu_read_lock(); + node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); + rcu_read_unlock(); + if (node) { + list_del_rcu(&node->mac_list); + kfree(node); + } +} /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; * seq_out is used to initialize filtering of outgoing duplicate frames diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 370b45998121..531fd3dfcac1 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -16,6 +16,7 @@ struct hsr_node; +void hsr_del_node(struct list_head *self_node_db); struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], u16 seq_out); struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, -- cgit v1.2.3 From 6466e715651f9f358e60c5ea4880e4731325827f Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 6 Mar 2019 13:01:36 -0500 Subject: tcp: do not report TCP_CM_INQ of 0 for closed connections Returning 0 as inq to userspace indicates there is no more data to read, and the application needs to wait for EPOLLIN. For a connection that has received FIN from the remote peer, however, the application must continue reading until getting EOF (return value of 0 from tcp_recvmsg) or an error, if edge-triggered epoll (EPOLLET) is being used. Otherwise, the application will never receive a new EPOLLIN, since there is no epoll edge after the FIN. Return 1 when there is no data left on the queue but the connection has received FIN, so that the applications continue reading. Fixes: b75eba76d3d72 (tcp: send in-queue bytes in cmsg upon read) Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dbb08140cdc9..6baa6dc1b13b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1937,6 +1937,11 @@ static int tcp_inq_hint(struct sock *sk) inq = tp->rcv_nxt - tp->copied_seq; release_sock(sk); } + /* After receiving a FIN, tell the user-space to continue reading + * by returning a non-zero inq. + */ + if (inq == 0 && sock_flag(sk, SOCK_DONE)) + inq = 1; return inq; } -- cgit v1.2.3 From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Mar 2019 14:35:15 -0500 Subject: bpf: only test gso type on gso packets BPF can adjust gso only for tcp bytestreams. Fail on other gso types. But only on gso packets. It does not touch this field if !gso_size. Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols") Signed-off-by: Willem de Bruijn Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 5ceba98069d4..f274620945ff 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); -- cgit v1.2.3 From 915905f8b1d452e70ee6d8637c3f0fb55a39691d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Mar 2019 09:31:26 -0800 Subject: xsk: fix potential crash in xsk_diag_put_umem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes two typos in xsk_diag_put_umem() syzbot reported the following crash : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 7641 Comm: syz-executor946 Not tainted 5.0.0-rc7+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline] RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline] RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143 Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203 RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014 RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174 R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000 R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40 FS: 00000000011ea880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 000000008fa13000 CR4: 00000000001406e0 Call Trace: netlink_dump+0x55d/0xfb0 net/netlink/af_netlink.c:2252 __netlink_dump_start+0x5b4/0x7e0 net/netlink/af_netlink.c:2360 netlink_dump_start include/linux/netlink.h:226 [inline] xsk_diag_handler_dump+0x1b2/0x250 net/xdp/xsk_diag.c:170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x322/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 sock_diag_rcv+0x2b/0x40 net/core/sock_diag.c:274 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:632 sock_write_iter+0x27c/0x3e0 net/socket.c:923 call_write_iter include/linux/fs.h:1863 [inline] do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680 do_iter_write fs/read_write.c:956 [inline] do_iter_write+0x184/0x610 fs/read_write.c:937 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001 do_writev+0xf6/0x290 fs/read_write.c:1036 __do_sys_writev fs/read_write.c:1109 [inline] __se_sys_writev fs/read_write.c:1106 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440139 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffcc966cc18 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440139 RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 0000000000000004 R11: 0000000000000246 R12: 00000000004019c0 R13: 0000000000401a50 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: ---[ end trace 460a3c24d0a656c9 ]--- RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline] RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline] RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143 Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203 RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014 RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174 R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000 R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40 FS: 00000000011ea880(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000001d22000 CR3: 000000008fa13000 CR4: 00000000001406f0 Fixes: a36b38aa2af6 ("xsk: add sock_diag interface for AF_XDP") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Björn Töpel Cc: Daniel Borkmann Cc: Magnus Karlsson Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 661d007c3b28..d5e06c8e0cbf 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -68,9 +68,9 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); if (!err && umem->fq) - err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); + err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb); if (!err && umem->cq) { - err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, + err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING, nlskb); } return err; -- cgit v1.2.3 From ea0371f7879987cff70e21d808e3e9fea624c051 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Mon, 4 Mar 2019 16:27:08 -0800 Subject: net: fix GSO in bpf_lwt_push_ip_encap GSO needs inner headers and inner protocol set properly to work. skb->inner_mac_header: skb_reset_inner_headers() assigns the current mac header value to inner_mac_header; but it is not set at the point, so we need to call skb_reset_inner_mac_header, otherwise gre_gso_segment fails: it does int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); ... if (unlikely(!pskb_may_pull(skb, tnl_hlen))) ... skb->inner_protocol should also be correctly set. Fixes: ca78801a81e0 ("bpf: handle GSO in bpf_lwt_push_encap") Signed-off-by: Peter Oskolkov Reviewed-by: David Ahern Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- net/core/lwt_bpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index cf2f8897ca19..126d31ff5ee3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) /* push the encap headers and fix pointers */ skb_reset_inner_headers(skb); + skb_reset_inner_mac_header(skb); /* mac header is not yet set */ + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_push(skb, len); if (ingress) -- cgit v1.2.3 From e8e3437762ad938880dd48a3c52d702e7cf3c124 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 7 Mar 2019 11:35:43 +0100 Subject: bpf: Stop the psock parser before canceling its work We might have never enabled (started) the psock's parser, in which case it will not get stopped when destroying the psock. This leads to a warning when trying to cancel parser's work from psock's deferred destructor: [ 405.325769] WARNING: CPU: 1 PID: 3216 at net/strparser/strparser.c:526 strp_done+0x3c/0x40 [ 405.326712] Modules linked in: [last unloaded: test_bpf] [ 405.327359] CPU: 1 PID: 3216 Comm: kworker/1:164 Tainted: G W 5.0.0 #42 [ 405.328294] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 [ 405.329712] Workqueue: events sk_psock_destroy_deferred [ 405.330254] RIP: 0010:strp_done+0x3c/0x40 [ 405.330706] Code: 28 e8 b8 d5 6b ff 48 8d bb 80 00 00 00 e8 9c d5 6b ff 48 8b 7b 18 48 85 ff 74 0d e8 1e a5 e8 ff 48 c7 43 18 00 00 00 00 5b c3 <0f> 0b eb cf 66 66 66 66 90 55 89 f5 53 48 89 fb 48 83 c7 28 e8 0b [ 405.332862] RSP: 0018:ffffc900026bbe50 EFLAGS: 00010246 [ 405.333482] RAX: ffffffff819323e0 RBX: ffff88812cb83640 RCX: ffff88812cb829e8 [ 405.334228] RDX: 0000000000000001 RSI: ffff88812cb837e8 RDI: ffff88812cb83640 [ 405.335366] RBP: ffff88813fd22680 R08: 0000000000000000 R09: 000073746e657665 [ 405.336472] R10: 8080808080808080 R11: 0000000000000001 R12: ffff88812cb83600 [ 405.337760] R13: 0000000000000000 R14: ffff88811f401780 R15: ffff88812cb837e8 [ 405.338777] FS: 0000000000000000(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000 [ 405.339903] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 405.340821] CR2: 00007fb11489a6b8 CR3: 000000012d4d6000 CR4: 00000000000406e0 [ 405.341981] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 405.343131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 405.344415] Call Trace: [ 405.344821] sk_psock_destroy_deferred+0x23/0x1b0 [ 405.345585] process_one_work+0x1ae/0x3e0 [ 405.346110] worker_thread+0x3c/0x3b0 [ 405.346576] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 405.347187] kthread+0x11d/0x140 [ 405.347601] ? __kthread_parkme+0x80/0x80 [ 405.348108] ret_from_fork+0x35/0x40 [ 405.348566] ---[ end trace a4a3af4026a327d4 ]--- Stop psock's parser just before canceling its work. Fixes: 1d79895aef18 ("sk_msg: Always cancel strp work before freeing the psock") Reported-by: kernel test robot Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ae6f06e45737..cc94d921476c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ + strp_stop(&psock->parser.strp); strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); -- cgit v1.2.3 From 3499e87ea0413ee5b2cc028f4c8ed4d424bc7f98 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 16:58:35 +0100 Subject: ethtool: reduce stack usage with clang clang inlines the dev_ethtool() more aggressively than gcc does, leading to a larger amount of used stack space: net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=] Marking the sub-functions that require the most stack space as noinline_for_stack gives us reasonable behavior on all compilers. Signed-off-by: Arnd Bergmann Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d4918ffddda8..b1eb32419732 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int ret; @@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, return 0; } -static int ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int i, ret = 0; @@ -2405,7 +2407,7 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, +static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; -- cgit v1.2.3 From f9d19a7494e5341a7f256823e32788ae560ca22f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 09:57:42 -0700 Subject: net: atm: Use IS_ENABLED in atm_dev_ioctl When building with -Wsometimes-uninitialized, Clang warns: net/atm/resources.c:256:6: warning: variable 'number' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] net/atm/resources.c:212:7: warning: variable 'iobuf_len' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] Clang won't realize that compat is 0 when CONFIG_COMPAT is not set until the constant folding stage, which happens after this semantic analysis. Use IS_ENABLED instead so that the zero is present at the semantic analysis stage, which eliminates this warning. Link: https://github.com/ClangBuiltLinux/linux/issues/386 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- net/atm/resources.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/atm/resources.c b/net/atm/resources.c index bada395ecdb1..3e9f6391319e 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -203,13 +203,9 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) int __user *sioc_len; int __user *iobuf_len; -#ifndef CONFIG_COMPAT - compat = 0; /* Just so the compiler _knows_ */ -#endif - switch (cmd) { case ATM_GETNAMES: - if (compat) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT struct compat_atm_iobuf __user *ciobuf = arg; compat_uptr_t cbuf; @@ -253,7 +249,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) break; } - if (compat) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT struct compat_atmif_sioc __user *csioc = arg; compat_uptr_t carg; -- cgit v1.2.3 From 0805a4b894a8daaf76ad99934563d8ecfc6e7aed Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 11:11:26 -0700 Subject: net: atm: Add another IS_ENABLED(CONFIG_COMPAT) in atm_dev_ioctl I removed compat's universal assignment to 0, which allows this if statement to fall through when compat is passed with a value other than 0. Fixes: f9d19a7494e5 ("net: atm: Use IS_ENABLED in atm_dev_ioctl") Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- net/atm/resources.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/resources.c b/net/atm/resources.c index 3e9f6391319e..889349c6d90d 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -413,7 +413,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) } /* fall through */ default: - if (compat) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT if (!dev->ops->compat_ioctl) { error = -EINVAL; -- cgit v1.2.3 From 1e027960edfaa6a43f9ca31081729b716598112b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2019 09:36:33 -0800 Subject: net/hsr: fix possible crash in add_timer() syzbot found another add_timer() issue, this time in net/hsr [1] Let's use mod_timer() which is safe. [1] kernel BUG at kernel/time/timer.c:1136! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15909 Comm: syz-executor.3 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 kobject: 'loop2' (00000000f5629718): kobject_uevent_env RIP: 0010:add_timer kernel/time/timer.c:1136 [inline] RIP: 0010:add_timer+0x654/0xbe0 kernel/time/timer.c:1134 Code: 0f 94 c5 31 ff 44 89 ee e8 09 61 0f 00 45 84 ed 0f 84 77 fd ff ff e8 bb 5f 0f 00 e8 07 10 a0 ff e9 68 fd ff ff e8 ac 5f 0f 00 <0f> 0b e8 a5 5f 0f 00 0f 0b e8 9e 5f 0f 00 4c 89 b5 58 ff ff ff e9 RSP: 0018:ffff8880656eeca0 EFLAGS: 00010246 kobject: 'loop2' (00000000f5629718): fill_kobj_path: path = '/devices/virtual/block/loop2' RAX: 0000000000040000 RBX: 1ffff1100caddd9a RCX: ffffc9000c436000 RDX: 0000000000040000 RSI: ffffffff816056c4 RDI: ffff88806a2f6cc8 RBP: ffff8880656eed58 R08: ffff888067f4a300 R09: ffff888067f4abc8 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88806a2f6cc0 R13: dffffc0000000000 R14: 0000000000000001 R15: ffff8880656eed30 FS: 00007fc2019bf700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000738000 CR3: 0000000067e8e000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_check_announce net/hsr/hsr_device.c:99 [inline] hsr_check_carrier_and_operstate+0x567/0x6f0 net/hsr/hsr_device.c:120 hsr_netdev_notify+0x297/0xa00 net/hsr/hsr_main.c:51 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] dev_open net/core/dev.c:1436 [inline] dev_open+0x143/0x160 net/core/dev.c:1424 team_port_add drivers/net/team/team.c:1203 [inline] team_add_slave+0xa07/0x15d0 drivers/net/team/team.c:1933 do_set_master net/core/rtnetlink.c:2358 [inline] do_set_master+0x1d4/0x230 net/core/rtnetlink.c:2332 do_setlink+0x966/0x3510 net/core/rtnetlink.c:2493 rtnl_setlink+0x271/0x3b0 net/core/rtnetlink.c:2747 rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:632 sock_write_iter+0x27c/0x3e0 net/socket.c:923 call_write_iter include/linux/fs.h:1869 [inline] do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680 do_iter_write fs/read_write.c:956 [inline] do_iter_write+0x184/0x610 fs/read_write.c:937 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001 do_writev+0xf6/0x290 fs/read_write.c:1036 __do_sys_writev fs/read_write.c:1109 [inline] __se_sys_writev fs/read_write.c:1106 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457f29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc2019bec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457f29 RDX: 0000000000000001 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc2019bf6d4 R13: 00000000004c4a60 R14: 00000000004dd218 R15: 00000000ffffffff Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c4676bacb8db..a97bf326b231 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, + jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) @@ -332,6 +331,7 @@ static void hsr_announce(struct timer_list *t) { struct hsr_priv *hsr; struct hsr_port *master; + unsigned long interval; hsr = from_timer(hsr, t, announce_timer); @@ -343,18 +343,16 @@ static void hsr_announce(struct timer_list *t) hsr->protVersion); hsr->announce_count++; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); } else { send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, hsr->protVersion); - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); } if (is_admin_up(master->dev)) - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } -- cgit v1.2.3 From ee60ad219f5c7c4fb2f047f88037770063ef785f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Mar 2019 14:50:54 +0800 Subject: route: set the deleted fnhe fnhe_daddr to 0 in ip_del_fnhe to fix a race The race occurs in __mkroute_output() when 2 threads lookup a dst: CPU A CPU B find_exception() find_exception() [fnhe expires] ip_del_fnhe() [fnhe is deleted] rt_bind_exception() In rt_bind_exception() it will bind a deleted fnhe with the new dst, and this dst will get no chance to be freed. It causes a dev defcnt leak and consecutive dmesg warnings: unregister_netdevice: waiting for ethX to become free. Usage count = 1 Especially thanks Jon to identify the issue. This patch fixes it by setting fnhe_daddr to 0 in ip_del_fnhe() to stop binding the deleted fnhe with a new dst when checking fnhe's fnhe_daddr and daddr in rt_bind_exception(). It works as both ip_del_fnhe() and rt_bind_exception() are protected by fnhe_lock and the fhne is freed by kfree_rcu(). Fixes: deed49df7390 ("route: check and remove route cache when we get route") Signed-off-by: Jon Maxwell Signed-off-by: Xin Long Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8ca3642f0d9b..a5da63e5faa2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1303,6 +1303,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + /* set fnhe_daddr to 0 to ensure it won't bind with + * new dsts in rt_bind_exception(). + */ + fnhe->fnhe_daddr = 0; fnhe_flush_routes(fnhe); kfree_rcu(fnhe, rcu); break; -- cgit v1.2.3 From 2e990dfd13974d9eae493006f42ffb48707970ef Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Mar 2019 15:49:16 +0800 Subject: sctp: remove sched init from sctp_stream_init syzbot reported a NULL-ptr deref caused by that sched->init() in sctp_stream_init() set stream->rr_next = NULL. kasan: GPF could be caused by NULL-ptr deref or user memory access RIP: 0010:sctp_sched_rr_dequeue+0xd3/0x170 net/sctp/stream_sched_rr.c:141 Call Trace: sctp_outq_dequeue_data net/sctp/outqueue.c:90 [inline] sctp_outq_flush_data net/sctp/outqueue.c:1079 [inline] sctp_outq_flush+0xba2/0x2790 net/sctp/outqueue.c:1205 All sched info is saved in sout->ext now, in sctp_stream_init() sctp_stream_alloc_out() will not change it, there's no need to call sched->init() again, since sctp_outq_init() has already done it. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: syzbot+4c9934f20522c0efd657@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 2936ed17bf9e..3b47457862cc 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -230,8 +230,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; - sched->init(stream); - in: sctp_stream_interleave_init(stream); if (!incnt) -- cgit v1.2.3 From 930c9f9125c85b5134b3e711bc252ecc094708e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 8 Mar 2019 12:48:39 +0000 Subject: rxrpc: Fix client call connect/disconnect race rxrpc_disconnect_client_call() reads the call's connection ID protocol value (call->cid) as part of that function's variable declarations. This is bad because it's not inside the locked section and so may race with someone granting use of the channel to the call. This manifests as an assertion failure (see below) where the call in the presumed channel (0 because call->cid wasn't set when we read it) doesn't match the call attached to the channel we were actually granted (if 1, 2 or 3). Fix this by moving the read and dependent calculations inside of the channel_lock section. Also, only set the channel number and pointer variables if cid is not zero (ie. unset). This problem can be induced by injecting an occasional error in rxrpc_wait_for_channel() before the call to schedule(). Make two further changes also: (1) Add a trace for wait failure in rxrpc_connect_call(). (2) Drop channel_lock before BUG'ing in the case of the assertion failure. The failure causes a trace akin to the following: rxrpc: Assertion failed - 18446612685268945920(0xffff8880beab8c00) == 18446612685268621312(0xffff8880bea69800) is false ------------[ cut here ]------------ kernel BUG at net/rxrpc/conn_client.c:824! ... RIP: 0010:rxrpc_disconnect_client_call+0x2bf/0x99d ... Call Trace: rxrpc_connect_call+0x902/0x9b3 ? wake_up_q+0x54/0x54 rxrpc_new_client_call+0x3a0/0x751 ? rxrpc_kernel_begin_call+0x141/0x1bc ? afs_alloc_call+0x1b5/0x1b5 rxrpc_kernel_begin_call+0x141/0x1bc afs_make_call+0x20c/0x525 ? afs_alloc_call+0x1b5/0x1b5 ? __lock_is_held+0x40/0x71 ? lockdep_init_map+0xaf/0x193 ? lockdep_init_map+0xaf/0x193 ? __lock_is_held+0x40/0x71 ? yfs_fs_fetch_data+0x33b/0x34a yfs_fs_fetch_data+0x33b/0x34a afs_fetch_data+0xdc/0x3b7 afs_read_dir+0x52d/0x97f afs_dir_iterate+0xa0/0x661 ? iterate_dir+0x63/0x141 iterate_dir+0xa2/0x141 ksys_getdents64+0x9f/0x11b ? filldir+0x111/0x111 ? do_syscall_64+0x3e/0x1a0 __x64_sys_getdents64+0x16/0x19 do_syscall_64+0x7d/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller --- net/rxrpc/conn_client.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index b2adfa825363..f307a05076e1 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, ret = rxrpc_wait_for_channel(call, gfp); if (ret < 0) { + trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); rxrpc_disconnect_client_call(call); goto out; } @@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) */ void rxrpc_disconnect_client_call(struct rxrpc_call *call) { - unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_channel *chan = NULL; struct rxrpc_net *rxnet = conn->params.local->rxnet; + unsigned int channel = -1; + u32 cid; + spin_lock(&conn->channel_lock); + + cid = call->cid; + if (cid) { + channel = cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; - spin_lock(&conn->channel_lock); - /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow * immediately unless someone else grabs it in the meantime. @@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out; } - ASSERTCMP(rcu_access_pointer(chan->call), ==, call); + if (rcu_access_pointer(chan->call) != call) { + spin_unlock(&conn->channel_lock); + BUG(); + } /* If a client call was exposed to the world, we save the result for * retransmission. -- cgit v1.2.3 From 89664c623617b1d34447a927ac7871ddf3db29d3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 17:54:53 +0800 Subject: sctp: sctp_sock_migrate() returns error if sctp_bind_addr_dup() fails It should fail to create the new sk if sctp_bind_addr_dup() fails when accepting or peeloff an association. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 533207dbeae9..44f2acb3d433 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -102,9 +102,9 @@ static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk); static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); -static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, - struct sctp_association *assoc, - enum sctp_socket_type type); +static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, + struct sctp_association *assoc, + enum sctp_socket_type type); static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; @@ -4891,7 +4891,11 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ - sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); + error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); + if (error) { + sk_common_release(newsk); + newsk = NULL; + } out: release_sock(sk); @@ -5639,7 +5643,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ - sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + err = sctp_sock_migrate(sk, sock->sk, asoc, + SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + if (err) { + sock_release(sock); + sock = NULL; + } *sockp = sock; @@ -9171,9 +9180,9 @@ static inline void sctp_copy_descendant(struct sock *sk_to, /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ -static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, - struct sctp_association *assoc, - enum sctp_socket_type type) +static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, + struct sctp_association *assoc, + enum sctp_socket_type type) { struct sctp_sock *oldsp = sctp_sk(oldsk); struct sctp_sock *newsp = sctp_sk(newsk); @@ -9182,6 +9191,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; + int err; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -9210,8 +9220,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly */ - sctp_bind_addr_dup(&newsp->ep->base.bind_addr, - &oldsp->ep->base.bind_addr, GFP_KERNEL); + err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr, + &oldsp->ep->base.bind_addr, GFP_KERNEL); + if (err) + return err; /* Move any messages in the old socket's receive queue that are for the * peeled off association to the new socket's receive queue. @@ -9296,6 +9308,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } release_sock(newsk); + + return 0; } -- cgit v1.2.3 From 60208f79139af0e2f84747d04a2f3321f174a398 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 17:54:54 +0800 Subject: sctp: move up sctp_auth_init_hmacs() in sctp_endpoint_init() sctp_auth_init_hmacs() is called only when ep->auth_enable is set. It better to move up sctp_auth_init_hmacs() and remove auth_enable check in it and check auth_enable only once in sctp_endpoint_init(). Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/auth.c | 6 ------ net/sctp/endpointola.c | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 5b537613946f..39d72e58b8e5 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -471,12 +471,6 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) struct crypto_shash *tfm = NULL; __u16 id; - /* If AUTH extension is disabled, we are done */ - if (!ep->auth_enable) { - ep->auth_hmacs = NULL; - return 0; - } - /* If the transforms are already allocated, we are done */ if (ep->auth_hmacs) return 0; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 40c7eb941bc9..0448b68fce74 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -107,6 +107,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, auth_chunks->param_hdr.length = htons(sizeof(struct sctp_paramhdr) + 2); } + + /* Allocate and initialize transorms arrays for supported + * HMACs. + */ + err = sctp_auth_init_hmacs(ep, gfp); + if (err) + goto nomem; } /* Initialize the base structure. */ @@ -150,15 +157,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, INIT_LIST_HEAD(&ep->endpoint_shared_keys); null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) - goto nomem; + goto nomem_shkey; list_add(&null_key->key_list, &ep->endpoint_shared_keys); - /* Allocate and initialize transorms arrays for supported HMACs. */ - err = sctp_auth_init_hmacs(ep, gfp); - if (err) - goto nomem_hmacs; - /* Add the null key to the endpoint shared keys list and * set the hmcas and chunks pointers. */ @@ -169,8 +171,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, return ep; -nomem_hmacs: - sctp_auth_destroy_keys(&ep->endpoint_shared_keys); +nomem_shkey: + sctp_auth_destroy_hmacs(ep->auth_hmacs); nomem: /* Free all allocations */ kfree(auth_hmacs); -- cgit v1.2.3 From c6f33e05225696fee3c901b7526b80f31848454e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 17:54:55 +0800 Subject: sctp: call sctp_auth_init_hmacs() in sctp_sock_migrate() New ep's auth_hmacs should be set if old ep's is set, in case that net->sctp.auth_enable has been changed to 0 by users and new ep's auth_hmacs couldn't be set in sctp_endpoint_init(). It can even crash kernel by doing: 1. on server: sysctl -w net.sctp.auth_enable=1, sysctl -w net.sctp.addip_enable=1, sysctl -w net.sctp.addip_noauth_enable=0, listen() on server, sysctl -w net.sctp.auth_enable=0. 2. on client: connect() to server. 3. on server: accept() the asoc, sysctl -w net.sctp.auth_enable=1. 4. on client: send() asconf packet to server. The call trace: [ 245.280251] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 245.286872] RIP: 0010:sctp_auth_calculate_hmac+0xa3/0x140 [sctp] [ 245.304572] Call Trace: [ 245.305091] [ 245.311287] sctp_sf_authenticate+0x110/0x160 [sctp] [ 245.312311] sctp_sf_eat_auth+0xf2/0x230 [sctp] [ 245.313249] sctp_do_sm+0x9a/0x2d0 [sctp] [ 245.321483] sctp_assoc_bh_rcv+0xed/0x1a0 [sctp] [ 245.322495] sctp_rcv+0xa66/0xc70 [sctp] It's because the old ep->auth_hmacs wasn't copied to the new ep while ep->auth_hmacs is used in sctp_auth_calculate_hmac() when processing the incoming