From 0e63208915a8d7590d0a6218dadb2a6a00ac705a Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@gmail.com>
Date: Mon, 4 Mar 2019 23:26:10 +0100
Subject: tipc: fix RDM/DGRAM connect() regression

Fix regression bug introduced in
commit 365ad353c256 ("tipc: reduce risk of user starvation during link
congestion")

Only signal -EDESTADDRREQ for RDM/DGRAM if we don't have a cached
sockaddr.

Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion")
Signed-off-by: Erik Hugne <erik.hugne@gmail.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e482b342bfa8..3274ef625dba 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1333,7 +1333,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 
 	if (unlikely(!dest)) {
 		dest = &tsk->peer;
-		if (!syn || dest->family != AF_TIPC)
+		if (!syn && dest->family != AF_TIPC)
 			return -EDESTADDRREQ;
 	}
 
-- 
cgit v1.2.3


From 4177c5d94264b57f426ef5c45a788808d1a1e536 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Tue, 5 Mar 2019 08:29:28 +0800
Subject: net/sched: act_tunnel_key: Fix double free dst_cache

dst_cache_destroy will be called in dst_release

dst_release-->dst_destroy_rcu-->dst_destroy-->metadata_dst_free
-->dst_cache_destroy

It should not call dst_cache_destroy before dst_release

Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support")
Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3beb4717d3b7..7c6591b991d5 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,14 +201,9 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
 {
 	if (!p)
 		return;
-	if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-#ifdef CONFIG_DST_CACHE
-		struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
-
-		dst_cache_destroy(&info->dst_cache);
-#endif
+	if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
 		dst_release(&p->tcft_enc_metadata->dst);
-	}
+
 	kfree_rcu(p, rcu);
 }
 
@@ -338,7 +333,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 						  &metadata->u.tun_info,
 						  opts_len, extack);
 			if (ret < 0)
-				goto release_dst_cache;
+				goto release_tun_meta;
 		}
 
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
@@ -354,14 +349,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 				     &act_tunnel_key_ops, bind, true);
 		if (ret) {
 			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
-			goto release_dst_cache;
+			goto release_tun_meta;
 		}
 
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
 		NL_SET_ERR_MSG(extack, "TC IDR already exists");
 		ret = -EEXIST;
-		goto release_dst_cache;
+		goto release_tun_meta;
 	}
 
 	t = to_tunnel_key(*a);
@@ -371,7 +366,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
 		ret = -ENOMEM;
 		exists = true;
-		goto release_dst_cache;
+		goto release_tun_meta;
 	}
 	params_new->tcft_action = parm->t_action;
 	params_new->tcft_enc_metadata = metadata;
@@ -388,12 +383,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 
 	return ret;
 
-release_dst_cache:
-#ifdef CONFIG_DST_CACHE
-	if (metadata)
-		dst_cache_destroy(&metadata->u.tun_info.dst_cache);
 release_tun_meta:
-#endif
 	if (metadata)
 		dst_release(&metadata->dst);
 
-- 
cgit v1.2.3


From 22c74764aa2943ecdf9f07c900d8a9c8ba6c9265 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 6 Mar 2019 10:42:53 +0100
Subject: ipv4/route: fail early when inet dev is missing

If a non local multicast packet reaches ip_route_input_rcu() while
the ingress device IPv4 private data (in_dev) is NULL, we end up
doing a NULL pointer dereference in IN_DEV_MFORWARD().

Since the later call to ip_route_input_mc() is going to fail if
!in_dev, we can fail early in such scenario and avoid the dangerous
code path.

v1 -> v2:
 - clarified the commit message, no code changes

Reported-by: Tianhao Zhao <tizhao@redhat.com>
Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 738ff0a1a048..8ca3642f0d9b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2149,12 +2149,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		int our = 0;
 		int err = -EINVAL;
 
-		if (in_dev)
-			our = ip_check_mc_rcu(in_dev, daddr, saddr,
-					      ip_hdr(skb)->protocol);
+		if (!in_dev)
+			return err;
+		our = ip_check_mc_rcu(in_dev, daddr, saddr,
+				      ip_hdr(skb)->protocol);
 
 		/* check l3 master if no match yet */
-		if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+		if (!our && netif_is_l3_slave(dev)) {
 			struct in_device *l3_in_dev;
 
 			l3_in_dev = __in_dev_get_rcu(skb->dev);
-- 
cgit v1.2.3


From f4b3ec4e6aa1a2ca437905a519ae08e8cf6af754 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 6 Mar 2019 10:25:42 +0000
Subject: iptunnel: NULL pointer deref for ip_md_tunnel_xmit

Naresh Kamboju noted the following oops during execution of selftest
tools/testing/selftests/bpf/test_tunnel.sh on x86_64:

[  274.120445] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000000
[  274.128285] #PF error: [INSTR]
[  274.131351] PGD 8000000414a0e067 P4D 8000000414a0e067 PUD 3b6334067 PMD 0
[  274.138241] Oops: 0010 [#1] SMP PTI
[  274.141734] CPU: 1 PID: 11464 Comm: ping Not tainted
5.0.0-rc4-next-20190129 #1
[  274.149046] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[  274.156526] RIP: 0010:          (null)
[  274.160280] Code: Bad RIP value.
[  274.163509] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286
[  274.168726] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000
[  274.175851] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0
[  274.182974] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c
[  274.190098] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000
[  274.197222] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400
[  274.204346] FS:  00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000)
knlGS:0000000000000000
[  274.212424] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  274.218162] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0
[  274.225292] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  274.232416] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  274.239541] Call Trace:
[  274.241988]  ? tnl_update_pmtu+0x296/0x3b0
[  274.246085]  ip_md_tunnel_xmit+0x1bc/0x520
[  274.250176]  gre_fb_xmit+0x330/0x390
[  274.253754]  gre_tap_xmit+0x128/0x180
[  274.257414]  dev_hard_start_xmit+0xb7/0x300
[  274.261598]  sch_direct_xmit+0xf6/0x290
[  274.265430]  __qdisc_run+0x15d/0x5e0
[  274.269007]  __dev_queue_xmit+0x2c5/0xc00
[  274.273011]  ? dev_queue_xmit+0x10/0x20
[  274.276842]  ? eth_header+0x2b/0xc0
[  274.280326]  dev_queue_xmit+0x10/0x20
[  274.283984]  ? dev_queue_xmit+0x10/0x20
[  274.287813]  arp_xmit+0x1a/0xf0
[  274.290952]  arp_send_dst.part.19+0x46/0x60
[  274.295138]  arp_solicit+0x177/0x6b0
[  274.298708]  ? mod_timer+0x18e/0x440
[  274.302281]  neigh_probe+0x57/0x70
[  274.305684]  __neigh_event_send+0x197/0x2d0
[  274.309862]  neigh_resolve_output+0x18c/0x210
[  274.314212]  ip_finish_output2+0x257/0x690
[  274.318304]  ip_finish_output+0x219/0x340
[  274.322314]  ? ip_finish_output+0x219/0x340
[  274.326493]  ip_output+0x76/0x240
[  274.329805]  ? ip_fragment.constprop.53+0x80/0x80
[  274.334510]  ip_local_out+0x3f/0x70
[  274.337992]  ip_send_skb+0x19/0x40
[  274.341391]  ip_push_pending_frames+0x33/0x40
[  274.345740]  raw_sendmsg+0xc15/0x11d0
[  274.349403]  ? __might_fault+0x85/0x90
[  274.353151]  ? _copy_from_user+0x6b/0xa0
[  274.357070]  ? rw_copy_check_uvector+0x54/0x130
[  274.361604]  inet_sendmsg+0x42/0x1c0
[  274.365179]  ? inet_sendmsg+0x42/0x1c0
[  274.368937]  sock_sendmsg+0x3e/0x50
[  274.372460]  ___sys_sendmsg+0x26f/0x2d0
[  274.376293]  ? lock_acquire+0x95/0x190
[  274.380043]  ? __handle_mm_fault+0x7ce/0xb70
[  274.384307]  ? lock_acquire+0x95/0x190
[  274.388053]  ? __audit_syscall_entry+0xdd/0x130
[  274.392586]  ? ktime_get_coarse_real_ts64+0x64/0xc0
[  274.397461]  ? __audit_syscall_entry+0xdd/0x130
[  274.401989]  ? trace_hardirqs_on+0x4c/0x100
[  274.406173]  __sys_sendmsg+0x63/0xa0
[  274.409744]  ? __sys_sendmsg+0x63/0xa0
[  274.413488]  __x64_sys_sendmsg+0x1f/0x30
[  274.417405]  do_syscall_64+0x55/0x190
[  274.421064]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  274.426113] RIP: 0033:0x7ff4ae0e6e87
[  274.429686] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00
00 00 00 8b 05 ca d9 2b 00 48 63 d2 48 63 ff 85 c0 75 10 b8 2e 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 53 48 89 f3 48 83 ec 10 48 89 7c
24 08
[  274.448422] RSP: 002b:00007ffcd9b76db8 EFLAGS: 00000246 ORIG_RAX:
000000000000002e
[  274.455978] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007ff4ae0e6e87
[  274.463104] RDX: 0000000000000000 RSI: 00000000006092e0 RDI: 0000000000000003
[  274.470228] RBP: 0000000000000000 R08: 00007ffcd9bc40a0 R09: 00007ffcd9bc4080
[  274.477349] R10: 000000000000060a R11: 0000000000000246 R12: 0000000000000003
[  274.484475] R13: 0000000000000016 R14: 00007ffcd9b77fa0 R15: 00007ffcd9b78da4
[  274.491602] Modules linked in: cls_bpf sch_ingress iptable_filter
ip_tables algif_hash af_alg x86_pkg_temp_thermal fuse [last unloaded:
test_bpf]
[  274.504634] CR2: 0000000000000000
[  274.507976] ---[ end trace 196d18386545eae1 ]---
[  274.512588] RIP: 0010:          (null)
[  274.516334] Code: Bad RIP value.
[  274.519557] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286
[  274.524775] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000
[  274.531921] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0
[  274.539082] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c
[  274.546205] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000
[  274.553329] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400
[  274.560456] FS:  00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000)
knlGS:0000000000000000
[  274.568541] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  274.574277] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0
[  274.581403] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  274.588535] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  274.595658] Kernel panic - not syncing: Fatal exception in interrupt
[  274.602046] Kernel Offset: 0x14400000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[  274.612827] ---[ end Kernel panic - not syncing: Fatal exception in
interrupt ]---
[  274.620387] ------------[ cut here ]------------

I'm also seeing the same failure on x86_64, and it reproduces
consistently.

>From poking around it looks like the skb's dst entry is being used
to calculate the mtu in:

mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;

...but because that dst_entry  has an "ops" value set to md_dst_ops,
the various ops (including mtu) are not set:

crash> struct sk_buff._skb_refdst ffff928f87447700 -x
      _skb_refdst = 0xffffcd6fbf5ea590
crash> struct dst_entry.ops 0xffffcd6fbf5ea590
  ops = 0xffffffffa0193800
crash> struct dst_ops.mtu 0xffffffffa0193800
  mtu = 0x0
crash>

I confirmed that the dst entry also has dst->input set to
dst_md_discard, so it looks like it's an entry that's been
initialized via __metadata_dst_init alright.

I think the fix here is to use skb_valid_dst(skb) - it checks
for  DST_METADATA also, and with that fix in place, the
problem - which was previously 100% reproducible - disappears.

The below patch resolves the panic and all bpf tunnel tests pass
without incident.

Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2756fb725bf0..a5d8cad18ead 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -515,9 +515,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 					- sizeof(struct iphdr) - tunnel_hlen;
 	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	skb_dst_update_pmtu(skb, mtu);
+	if (skb_valid_dst(skb))
+		skb_dst_update_pmtu(skb, mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		if (!skb_is_gso(skb) &&
@@ -530,9 +531,11 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+		struct rt6_info *rt6;
 		__be32 daddr;
 
+		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+					   NULL;
 		daddr = md ? dst : tunnel->parms.iph.daddr;
 
 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
-- 
cgit v1.2.3


From a10674bf2406afc2554f9c7d31b2dc65d6a27fd9 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 6 Mar 2019 14:10:22 +0300
Subject: tcp: detecting the misuse of .sendpage for Slab objects

sendpage was not designed for processing of the Slab pages,
in some situations it can trigger BUG_ON on receiving side.

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ad07dd71063d..dbb08140cdc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,6 +943,10 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 	ssize_t copied;
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
+	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+	    WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+		return -EINVAL;
+
 	/* Wait for a connection to finish. One exception is TCP Fast Open
 	 * (passive side) where data is allowed to be sent before a connection
 	 * is fully established.
-- 
cgit v1.2.3


From ecb3dea400d3beaf611ce76ac7a51d4230492cf2 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Wed, 6 Mar 2019 16:22:12 +0200
Subject: net: sched: flower: insert new filter to idr after setting its mask

When adding new filter to flower classifier, fl_change() inserts it to
handle_idr before initializing filter extensions and assigning it a mask.
Normally this ordering doesn't matter because all flower classifier ops
callbacks assume rtnl lock protection. However, when filter has an action
that doesn't have its kernel module loaded, rtnl lock is released before
call to request_module(). During this time the filter can be accessed bu
concurrent task before its initialization is completed, which can lead to a
crash.

Example case of NULL pointer dereference in concurrent dump:

Task 1                           Task 2

tc_new_tfilter()
 fl_change()
  idr_alloc_u32(fnew)
  fl_set_parms()
   tcf_exts_validate()
    tcf_action_init()
     tcf_action_init_1()
      rtnl_unlock()
      request_module()
      ...                        rtnl_lock()
      				 tc_dump_tfilter()
      				  tcf_chain_dump()
				   fl_walk()
				    idr_get_next_ul()
				    tcf_node_dump()
				     tcf_fill_node()
				      fl_dump()
				       mask = &f->mask->key; <- NULL ptr
      rtnl_lock()

Extension initialization and mask assignment don't depend on fnew->handle
that is allocated by idr_alloc_u32(). Move idr allocation code after action
creation and mask assignment in fl_change() to prevent concurrent access
to not fully initialized filter when rtnl lock is released to load action
module.

Fixes: 01683a146999 ("net: sched: refactor flower walk to iterate over idr")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 27300a3e76c7..c04247b403ed 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1348,46 +1348,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	if (!handle) {
-		handle = 1;
-		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
-				    INT_MAX, GFP_KERNEL);
-	} else if (!fold) {
-		/* user specifies a handle and it doesn't exist */
-		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
-				    handle, GFP_KERNEL);
-	}
-	if (err)
-		goto errout;
-	fnew->handle = handle;
-
 	if (tb[TCA_FLOWER_FLAGS]) {
 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
 
 		if (!tc_flags_valid(fnew->flags)) {
 			err = -EINVAL;
-			goto errout_idr;
+			goto errout;
 		}
 	}
 
 	err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
 			   tp->chain->tmplt_priv, extack);
 	if (err)
-		goto errout_idr;
+		goto errout;
 
 	err = fl_check_assign_mask(head, fnew, fold, mask);
 	if (err)
-		goto errout_idr;
+		goto errout;
+
+	if (!handle) {
+		handle = 1;
+		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+				    INT_MAX, GFP_KERNEL);
+	} else if (!fold) {
+		/* user specifies a handle and it doesn't exist */
+		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+				    handle, GFP_KERNEL);
+	}
+	if (err)
+		goto errout_mask;
+	fnew->handle = handle;
 
 	if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) {
 		err = -EEXIST;
-		goto errout_mask;
+		goto errout_idr;
 	}
 
 	err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
 				     fnew->mask->filter_ht_params);
 	if (err)
-		goto errout_mask;
+		goto errout_idr;
 
 	if (!tc_skip_hw(fnew->flags)) {
 		err = fl_hw_replace_filter(tp, fnew, extack);
@@ -1426,12 +1426,13 @@ errout_mask_ht:
 	rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
 			       fnew->mask->filter_ht_params);
 
-errout_mask:
-	fl_mask_put(head, fnew->mask, false);
-
 errout_idr:
 	if (!fold)
 		idr_remove(&head->handle_idr, fnew->handle);
+
+errout_mask:
+	fl_mask_put(head, fnew->mask, false);
+
 errout:
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
-- 
cgit v1.2.3


From 6caabe7f197d3466d238f70915d65301f1716626 Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Wed, 6 Mar 2019 22:45:01 +0800
Subject: net: hsr: fix memory leak in hsr_dev_finalize()

If hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER) failed to
add port, it directly returns res and forgets to free the node
that allocated in hsr_create_self_node(), and forgets to delete
the node->mac_list linked in hsr->self_node_db.

BUG: memory leak
unreferenced object 0xffff8881cfa0c780 (size 64):
  comm "syz-executor.0", pid 2077, jiffies 4294717969 (age 2415.377s)
  hex dump (first 32 bytes):
    e0 c7 a0 cf 81 88 ff ff 00 02 00 00 00 00 ad de  ................
    00 e6 49 cd 81 88 ff ff c0 9b 87 d0 81 88 ff ff  ..I.............
  backtrace:
    [<00000000e2ff5070>] hsr_dev_finalize+0x736/0x960 [hsr]
    [<000000003ed2e597>] hsr_newlink+0x2b2/0x3e0 [hsr]
    [<000000003fa8c6b6>] __rtnl_newlink+0xf1f/0x1600 net/core/rtnetlink.c:3182
    [<000000001247a7ad>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3240
    [<00000000e7d1b61d>] rtnetlink_rcv_msg+0x54e/0xb90 net/core/rtnetlink.c:5130
    [<000000005556bd3a>] netlink_rcv_skb+0x129/0x340 net/netlink/af_netlink.c:2477
    [<00000000741d5ee6>] netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
    [<00000000741d5ee6>] netlink_unicast+0x49a/0x650 net/netlink/af_netlink.c:1336
    [<000000009d56f9b7>] netlink_sendmsg+0x88b/0xdf0 net/netlink/af_netlink.c:1917
    [<0000000046b35c59>] sock_sendmsg_nosec net/socket.c:621 [inline]
    [<0000000046b35c59>] sock_sendmsg+0xc3/0x100 net/socket.c:631
    [<00000000d208adc9>] __sys_sendto+0x33e/0x560 net/socket.c:1786
    [<00000000b582837a>] __do_sys_sendto net/socket.c:1798 [inline]
    [<00000000b582837a>] __se_sys_sendto net/socket.c:1794 [inline]
    [<00000000b582837a>] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:1794
    [<00000000c866801d>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
    [<00000000fea382d9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
    [<00000000e01dacb3>] 0xffffffffffffffff

Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c   |  4 +++-
 net/hsr/hsr_framereg.c | 12 ++++++++++++
 net/hsr/hsr_framereg.h |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b8cd43c9ed5b..c4676bacb8db 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -486,7 +486,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 
 	res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
 	if (res)
-		return res;
+		goto err_add_port;
 
 	res = register_netdevice(hsr_dev);
 	if (res)
@@ -506,6 +506,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 fail:
 	hsr_for_each_port(hsr, port)
 		hsr_del_port(port);
+err_add_port:
+	hsr_del_node(&hsr->self_node_db);
 
 	return res;
 }
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 286ceb41ac0c..9af16cb68f76 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db,
 	return 0;
 }
 
+void hsr_del_node(struct list_head *self_node_db)
+{
+	struct hsr_node *node;
+
+	rcu_read_lock();
+	node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+	rcu_read_unlock();
+	if (node) {
+		list_del_rcu(&node->mac_list);
+		kfree(node);
+	}
+}
 
 /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
  * seq_out is used to initialize filtering of outgoing duplicate frames
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 370b45998121..531fd3dfcac1 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -16,6 +16,7 @@
 
 struct hsr_node;
 
+void hsr_del_node(struct list_head *self_node_db);
 struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 			      u16 seq_out);
 struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
-- 
cgit v1.2.3


From 6466e715651f9f358e60c5ea4880e4731325827f Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Wed, 6 Mar 2019 13:01:36 -0500
Subject: tcp: do not report TCP_CM_INQ of 0 for closed connections

Returning 0 as inq to userspace indicates there is no more data to
read, and the application needs to wait for EPOLLIN. For a connection
that has received FIN from the remote peer, however, the application
must continue reading until getting EOF (return value of 0
from tcp_recvmsg) or an error, if edge-triggered epoll (EPOLLET) is
being used. Otherwise, the application will never receive a new
EPOLLIN, since there is no epoll edge after the FIN.

Return 1 when there is no data left on the queue but the
connection has received FIN, so that the applications continue
reading.

Fixes: b75eba76d3d72 (tcp: send in-queue bytes in cmsg upon read)
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dbb08140cdc9..6baa6dc1b13b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1937,6 +1937,11 @@ static int tcp_inq_hint(struct sock *sk)
 		inq = tp->rcv_nxt - tp->copied_seq;
 		release_sock(sk);
 	}
+	/* After receiving a FIN, tell the user-space to continue reading
+	 * by returning a non-zero inq.
+	 */
+	if (inq == 0 && sock_flag(sk, SOCK_DONE))
+		inq = 1;
 	return inq;
 }
 
-- 
cgit v1.2.3


From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 6 Mar 2019 14:35:15 -0500
Subject: bpf: only test gso type on gso packets

BPF can adjust gso only for tcp bytestreams. Fail on other gso types.

But only on gso packets. It does not touch this field if !gso_size.

Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 5ceba98069d4..f274620945ff 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
-- 
cgit v1.2.3


From 915905f8b1d452e70ee6d8637c3f0fb55a39691d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Mar 2019 09:31:26 -0800
Subject: xsk: fix potential crash in xsk_diag_put_umem()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes two typos in xsk_diag_put_umem()

syzbot reported the following crash :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 7641 Comm: syz-executor946 Not tainted 5.0.0-rc7+ #95
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline]
RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline]
RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143
Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85
RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203
RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c
RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014
RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174
R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000
R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40
FS:  00000000011ea880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000080 CR3: 000000008fa13000 CR4: 00000000001406e0
Call Trace:
 netlink_dump+0x55d/0xfb0 net/netlink/af_netlink.c:2252
 __netlink_dump_start+0x5b4/0x7e0 net/netlink/af_netlink.c:2360
 netlink_dump_start include/linux/netlink.h:226 [inline]
 xsk_diag_handler_dump+0x1b2/0x250 net/xdp/xsk_diag.c:170
 __sock_diag_cmd net/core/sock_diag.c:232 [inline]
 sock_diag_rcv_msg+0x322/0x410 net/core/sock_diag.c:263
 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485
 sock_diag_rcv+0x2b/0x40 net/core/sock_diag.c:274
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:632
 sock_write_iter+0x27c/0x3e0 net/socket.c:923
 call_write_iter include/linux/fs.h:1863 [inline]
 do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680
 do_iter_write fs/read_write.c:956 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:937
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001
 do_writev+0xf6/0x290 fs/read_write.c:1036
 __do_sys_writev fs/read_write.c:1109 [inline]
 __se_sys_writev fs/read_write.c:1106 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x440139
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffcc966cc18 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440139
RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 0000000000000004 R11: 0000000000000246 R12: 00000000004019c0
R13: 0000000000401a50 R14: 0000000000000000 R15: 0000000000000000
Modules linked in:
---[ end trace 460a3c24d0a656c9 ]---
RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline]
RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline]
RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143
Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85
RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203
RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c
RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014
RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174
R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000
R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40
FS:  00000000011ea880(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000001d22000 CR3: 000000008fa13000 CR4: 00000000001406f0

Fixes: a36b38aa2af6 ("xsk: add sock_diag interface for AF_XDP")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Björn Töpel <bjorn.topel@intel.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk_diag.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 661d007c3b28..d5e06c8e0cbf 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -68,9 +68,9 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
 	err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
 
 	if (!err && umem->fq)
-		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+		err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb);
 	if (!err && umem->cq) {
-		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+		err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING,
 					nlskb);
 	}
 	return err;
-- 
cgit v1.2.3


From ea0371f7879987cff70e21d808e3e9fea624c051 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Mon, 4 Mar 2019 16:27:08 -0800
Subject: net: fix GSO in bpf_lwt_push_ip_encap

GSO needs inner headers and inner protocol set properly to work.

skb->inner_mac_header: skb_reset_inner_headers() assigns the current
mac header value to inner_mac_header; but it is not set at the point,
so we need to call skb_reset_inner_mac_header, otherwise gre_gso_segment
fails: it does

    int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
    ...
    if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
    ...

skb->inner_protocol should also be correctly set.

Fixes: ca78801a81e0 ("bpf: handle GSO in bpf_lwt_push_encap")
Signed-off-by: Peter Oskolkov <posk@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/lwt_bpf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index cf2f8897ca19..126d31ff5ee3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
 
 	/* push the encap headers and fix pointers */
 	skb_reset_inner_headers(skb);
+	skb_reset_inner_mac_header(skb);  /* mac header is not yet set */
+	skb_set_inner_protocol(skb, skb->protocol);
 	skb->encapsulation = 1;
 	skb_push(skb, len);
 	if (ingress)
-- 
cgit v1.2.3


From e8e3437762ad938880dd48a3c52d702e7cf3c124 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 7 Mar 2019 11:35:43 +0100
Subject: bpf: Stop the psock parser before canceling its work

We might have never enabled (started) the psock's parser, in which case it
will not get stopped when destroying the psock. This leads to a warning
when trying to cancel parser's work from psock's deferred destructor:

[  405.325769] WARNING: CPU: 1 PID: 3216 at net/strparser/strparser.c:526 strp_done+0x3c/0x40
[  405.326712] Modules linked in: [last unloaded: test_bpf]
[  405.327359] CPU: 1 PID: 3216 Comm: kworker/1:164 Tainted: G        W         5.0.0 #42
[  405.328294] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014
[  405.329712] Workqueue: events sk_psock_destroy_deferred
[  405.330254] RIP: 0010:strp_done+0x3c/0x40
[  405.330706] Code: 28 e8 b8 d5 6b ff 48 8d bb 80 00 00 00 e8 9c d5 6b ff 48 8b 7b 18 48 85 ff 74 0d e8 1e a5 e8 ff 48 c7 43 18 00 00 00 00 5b c3 <0f> 0b eb cf 66 66 66 66 90 55 89 f5 53 48 89 fb 48 83 c7 28 e8 0b
[  405.332862] RSP: 0018:ffffc900026bbe50 EFLAGS: 00010246
[  405.333482] RAX: ffffffff819323e0 RBX: ffff88812cb83640 RCX: ffff88812cb829e8
[  405.334228] RDX: 0000000000000001 RSI: ffff88812cb837e8 RDI: ffff88812cb83640
[  405.335366] RBP: ffff88813fd22680 R08: 0000000000000000 R09: 000073746e657665
[  405.336472] R10: 8080808080808080 R11: 0000000000000001 R12: ffff88812cb83600
[  405.337760] R13: 0000000000000000 R14: ffff88811f401780 R15: ffff88812cb837e8
[  405.338777] FS:  0000000000000000(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000
[  405.339903] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  405.340821] CR2: 00007fb11489a6b8 CR3: 000000012d4d6000 CR4: 00000000000406e0
[  405.341981] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  405.343131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  405.344415] Call Trace:
[  405.344821]  sk_psock_destroy_deferred+0x23/0x1b0
[  405.345585]  process_one_work+0x1ae/0x3e0
[  405.346110]  worker_thread+0x3c/0x3b0
[  405.346576]  ? pwq_unbound_release_workfn+0xd0/0xd0
[  405.347187]  kthread+0x11d/0x140
[  405.347601]  ? __kthread_parkme+0x80/0x80
[  405.348108]  ret_from_fork+0x35/0x40
[  405.348566] ---[ end trace a4a3af4026a327d4 ]---

Stop psock's parser just before canceling its work.

Fixes: 1d79895aef18 ("sk_msg: Always cancel strp work before freeing the psock")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/skmsg.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ae6f06e45737..cc94d921476c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
 	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
 
 	/* No sk_callback_lock since already detached. */
+	strp_stop(&psock->parser.strp);
 	strp_done(&psock->parser.strp);
 
 	cancel_work_sync(&psock->work);
-- 
cgit v1.2.3


From 3499e87ea0413ee5b2cc028f4c8ed4d424bc7f98 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 7 Mar 2019 16:58:35 +0100
Subject: ethtool: reduce stack usage with clang

clang inlines the dev_ethtool() more aggressively than gcc does, leading
to a larger amount of used stack space:

net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=]

Marking the sub-functions that require the most stack space as
noinline_for_stack gives us reasonable behavior on all compilers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d4918ffddda8..b1eb32419732 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static int ethtool_get_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_get_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int ret;
@@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev,
 	return 0;
 }
 
-static int ethtool_set_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_set_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int i, ret = 0;
@@ -2405,7 +2407,7 @@ roll_back:
 	return ret;
 }
 
-static int ethtool_set_per_queue(struct net_device *dev,
+static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,
 				 void __user *useraddr, u32 sub_cmd)
 {
 	struct ethtool_per_queue_op per_queue_opt;
-- 
cgit v1.2.3


From f9d19a7494e5341a7f256823e32788ae560ca22f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 09:57:42 -0700
Subject: net: atm: Use IS_ENABLED in atm_dev_ioctl

When building with -Wsometimes-uninitialized, Clang warns:

net/atm/resources.c:256:6: warning: variable 'number' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
net/atm/resources.c:212:7: warning: variable 'iobuf_len' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]

Clang won't realize that compat is 0 when CONFIG_COMPAT is not set until
the constant folding stage, which happens after this semantic analysis.
Use IS_ENABLED instead so that the zero is present at the semantic
analysis stage, which eliminates this warning.

Link: https://github.com/ClangBuiltLinux/linux/issues/386
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/resources.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/atm/resources.c b/net/atm/resources.c
index bada395ecdb1..3e9f6391319e 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -203,13 +203,9 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 	int __user *sioc_len;
 	int __user *iobuf_len;
 
-#ifndef CONFIG_COMPAT
-	compat = 0; /* Just so the compiler _knows_ */
-#endif
-
 	switch (cmd) {
 	case ATM_GETNAMES:
-		if (compat) {
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 			struct compat_atm_iobuf __user *ciobuf = arg;
 			compat_uptr_t cbuf;
@@ -253,7 +249,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 		break;
 	}
 
-	if (compat) {
+	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 		struct compat_atmif_sioc __user *csioc = arg;
 		compat_uptr_t carg;
-- 
cgit v1.2.3


From 0805a4b894a8daaf76ad99934563d8ecfc6e7aed Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 11:11:26 -0700
Subject: net: atm: Add another IS_ENABLED(CONFIG_COMPAT) in atm_dev_ioctl

I removed compat's universal assignment to 0, which allows this if
statement to fall through when compat is passed with a value other
than 0.

Fixes: f9d19a7494e5 ("net: atm: Use IS_ENABLED in atm_dev_ioctl")
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/resources.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/atm/resources.c b/net/atm/resources.c
index 3e9f6391319e..889349c6d90d 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -413,7 +413,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 		}
 		/* fall through */
 	default:
-		if (compat) {
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 			if (!dev->ops->compat_ioctl) {
 				error = -EINVAL;
-- 
cgit v1.2.3


From 1e027960edfaa6a43f9ca31081729b716598112b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 Mar 2019 09:36:33 -0800
Subject: net/hsr: fix possible crash in add_timer()

syzbot found another add_timer() issue, this time in net/hsr [1]

Let's use mod_timer() which is safe.

[1]
kernel BUG at kernel/time/timer.c:1136!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 15909 Comm: syz-executor.3 Not tainted 5.0.0+ #97
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
kobject: 'loop2' (00000000f5629718): kobject_uevent_env
RIP: 0010:add_timer kernel/time/timer.c:1136 [inline]
RIP: 0010:add_timer+0x654/0xbe0 kernel/time/timer.c:1134
Code: 0f 94 c5 31 ff 44 89 ee e8 09 61 0f 00 45 84 ed 0f 84 77 fd ff ff e8 bb 5f 0f 00 e8 07 10 a0 ff e9 68 fd ff ff e8 ac 5f 0f 00 <0f> 0b e8 a5 5f 0f 00 0f 0b e8 9e 5f 0f 00 4c 89 b5 58 ff ff ff e9
RSP: 0018:ffff8880656eeca0 EFLAGS: 00010246
kobject: 'loop2' (00000000f5629718): fill_kobj_path: path = '/devices/virtual/block/loop2'
RAX: 0000000000040000 RBX: 1ffff1100caddd9a RCX: ffffc9000c436000
RDX: 0000000000040000 RSI: ffffffff816056c4 RDI: ffff88806a2f6cc8
RBP: ffff8880656eed58 R08: ffff888067f4a300 R09: ffff888067f4abc8
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88806a2f6cc0
R13: dffffc0000000000 R14: 0000000000000001 R15: ffff8880656eed30
FS:  00007fc2019bf700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000738000 CR3: 0000000067e8e000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 hsr_check_announce net/hsr/hsr_device.c:99 [inline]
 hsr_check_carrier_and_operstate+0x567/0x6f0 net/hsr/hsr_device.c:120
 hsr_netdev_notify+0x297/0xa00 net/hsr/hsr_main.c:51
 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
 call_netdevice_notifiers net/core/dev.c:1765 [inline]
 dev_open net/core/dev.c:1436 [inline]
 dev_open+0x143/0x160 net/core/dev.c:1424
 team_port_add drivers/net/team/team.c:1203 [inline]
 team_add_slave+0xa07/0x15d0 drivers/net/team/team.c:1933
 do_set_master net/core/rtnetlink.c:2358 [inline]
 do_set_master+0x1d4/0x230 net/core/rtnetlink.c:2332
 do_setlink+0x966/0x3510 net/core/rtnetlink.c:2493
 rtnl_setlink+0x271/0x3b0 net/core/rtnetlink.c:2747
 rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192
 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485
 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:632
 sock_write_iter+0x27c/0x3e0 net/socket.c:923
 call_write_iter include/linux/fs.h:1869 [inline]
 do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680
 do_iter_write fs/read_write.c:956 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:937
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001
 do_writev+0xf6/0x290 fs/read_write.c:1036
 __do_sys_writev fs/read_write.c:1109 [inline]
 __se_sys_writev fs/read_write.c:1106 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457f29
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fc2019bec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457f29
RDX: 0000000000000001 RSI: 00000000200000c0 RDI: 0000000000000003
RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc2019bf6d4
R13: 00000000004c4a60 R14: 00000000004dd218 R15: 00000000ffffffff

Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Arvid Brodin <arvid.brodin@alten.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c4676bacb8db..a97bf326b231 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev,
 			&& (old_operstate != IF_OPER_UP)) {
 		/* Went up */
 		hsr->announce_count = 0;
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
-		add_timer(&hsr->announce_timer);
+		mod_timer(&hsr->announce_timer,
+			  jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
 	}
 
 	if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
@@ -332,6 +331,7 @@ static void hsr_announce(struct timer_list *t)
 {
 	struct hsr_priv *hsr;
 	struct hsr_port *master;
+	unsigned long interval;
 
 	hsr = from_timer(hsr, t, announce_timer);
 
@@ -343,18 +343,16 @@ static void hsr_announce(struct timer_list *t)
 				hsr->protVersion);
 		hsr->announce_count++;
 
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+		interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
 	} else {
 		send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
 				hsr->protVersion);
 
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+		interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
 	}
 
 	if (is_admin_up(master->dev))
-		add_timer(&hsr->announce_timer);
+		mod_timer(&hsr->announce_timer, jiffies + interval);
 
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From ee60ad219f5c7c4fb2f047f88037770063ef785f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 8 Mar 2019 14:50:54 +0800
Subject: route: set the deleted fnhe fnhe_daddr to 0 in ip_del_fnhe to fix a
 race

The race occurs in __mkroute_output() when 2 threads lookup a dst:

  CPU A                 CPU B
  find_exception()
                        find_exception() [fnhe expires]
                        ip_del_fnhe() [fnhe is deleted]
  rt_bind_exception()

In rt_bind_exception() it will bind a deleted fnhe with the new dst, and
this dst will get no chance to be freed. It causes a dev defcnt leak and
consecutive dmesg warnings:

  unregister_netdevice: waiting for ethX to become free. Usage count = 1

Especially thanks Jon to identify the issue.

This patch fixes it by setting fnhe_daddr to 0 in ip_del_fnhe() to stop
binding the deleted fnhe with a new dst when checking fnhe's fnhe_daddr
and daddr in rt_bind_exception().

It works as both ip_del_fnhe() and rt_bind_exception() are protected by
fnhe_lock and the fhne is freed by kfree_rcu().

Fixes: deed49df7390 ("route: check and remove route cache when we get route")
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8ca3642f0d9b..a5da63e5faa2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1303,6 +1303,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 		if (fnhe->fnhe_daddr == daddr) {
 			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
 				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			/* set fnhe_daddr to 0 to ensure it won't bind with
+			 * new dsts in rt_bind_exception().
+			 */
+			fnhe->fnhe_daddr = 0;
 			fnhe_flush_routes(fnhe);
 			kfree_rcu(fnhe, rcu);
 			break;
-- 
cgit v1.2.3


From 2e990dfd13974d9eae493006f42ffb48707970ef Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 8 Mar 2019 15:49:16 +0800
Subject: sctp: remove sched init from sctp_stream_init

syzbot reported a NULL-ptr deref caused by that sched->init() in
sctp_stream_init() set stream->rr_next = NULL.

  kasan: GPF could be caused by NULL-ptr deref or user memory access
  RIP: 0010:sctp_sched_rr_dequeue+0xd3/0x170 net/sctp/stream_sched_rr.c:141
  Call Trace:
    sctp_outq_dequeue_data net/sctp/outqueue.c:90 [inline]
    sctp_outq_flush_data net/sctp/outqueue.c:1079 [inline]
    sctp_outq_flush+0xba2/0x2790 net/sctp/outqueue.c:1205

All sched info is saved in sout->ext now, in sctp_stream_init()
sctp_stream_alloc_out() will not change it, there's no need to
call sched->init() again, since sctp_outq_init() has already
done it.

Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
Reported-by: syzbot+4c9934f20522c0efd657@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/stream.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 2936ed17bf9e..3b47457862cc 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -230,8 +230,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 	for (i = 0; i < stream->outcnt; i++)
 		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
-	sched->init(stream);
-
 in:
 	sctp_stream_interleave_init(stream);
 	if (!incnt)
-- 
cgit v1.2.3


From 930c9f9125c85b5134b3e711bc252ecc094708e3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Mar 2019 12:48:39 +0000
Subject: rxrpc: Fix client call connect/disconnect race

rxrpc_disconnect_client_call() reads the call's connection ID protocol
value (call->cid) as part of that function's variable declarations.  This
is bad because it's not inside the locked section and so may race with
someone granting use of the channel to the call.

This manifests as an assertion failure (see below) where the call in the
presumed channel (0 because call->cid wasn't set when we read it) doesn't
match the call attached to the channel we were actually granted (if 1, 2 or
3).

Fix this by moving the read and dependent calculations inside of the
channel_lock section.  Also, only set the channel number and pointer
variables if cid is not zero (ie. unset).

This problem can be induced by injecting an occasional error in
rxrpc_wait_for_channel() before the call to schedule().

Make two further changes also:

 (1) Add a trace for wait failure in rxrpc_connect_call().

 (2) Drop channel_lock before BUG'ing in the case of the assertion failure.

The failure causes a trace akin to the following:

rxrpc: Assertion failed - 18446612685268945920(0xffff8880beab8c00) == 18446612685268621312(0xffff8880bea69800) is false
------------[ cut here ]------------
kernel BUG at net/rxrpc/conn_client.c:824!
...
RIP: 0010:rxrpc_disconnect_client_call+0x2bf/0x99d
...
Call Trace:
 rxrpc_connect_call+0x902/0x9b3
 ? wake_up_q+0x54/0x54
 rxrpc_new_client_call+0x3a0/0x751
 ? rxrpc_kernel_begin_call+0x141/0x1bc
 ? afs_alloc_call+0x1b5/0x1b5
 rxrpc_kernel_begin_call+0x141/0x1bc
 afs_make_call+0x20c/0x525
 ? afs_alloc_call+0x1b5/0x1b5
 ? __lock_is_held+0x40/0x71
 ? lockdep_init_map+0xaf/0x193
 ? lockdep_init_map+0xaf/0x193
 ? __lock_is_held+0x40/0x71
 ? yfs_fs_fetch_data+0x33b/0x34a
 yfs_fs_fetch_data+0x33b/0x34a
 afs_fetch_data+0xdc/0x3b7
 afs_read_dir+0x52d/0x97f
 afs_dir_iterate+0xa0/0x661
 ? iterate_dir+0x63/0x141
 iterate_dir+0xa2/0x141
 ksys_getdents64+0x9f/0x11b
 ? filldir+0x111/0x111
 ? do_syscall_64+0x3e/0x1a0
 __x64_sys_getdents64+0x16/0x19
 do_syscall_64+0x7d/0x1a0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/conn_client.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index b2adfa825363..f307a05076e1 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 
 	ret = rxrpc_wait_for_channel(call, gfp);
 	if (ret < 0) {
+		trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
 		rxrpc_disconnect_client_call(call);
 		goto out;
 	}
@@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
  */
 void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 {
-	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
 	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_channel *chan = NULL;
 	struct rxrpc_net *rxnet = conn->params.local->rxnet;
+	unsigned int channel = -1;
+	u32 cid;
 
+	spin_lock(&conn->channel_lock);
+
+	cid = call->cid;
+	if (cid) {
+		channel = cid & RXRPC_CHANNELMASK;
+		chan = &conn->channels[channel];
+	}
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
 	call->conn = NULL;
 
-	spin_lock(&conn->channel_lock);
-
 	/* Calls that have never actually been assigned a channel can simply be
 	 * discarded.  If the conn didn't get used either, it will follow
 	 * immediately unless someone else grabs it in the meantime.
@@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 		goto out;
 	}
 
-	ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+	if (rcu_access_pointer(chan->call) != call) {
+		spin_unlock(&conn->channel_lock);
+		BUG();
+	}
 
 	/* If a client call was exposed to the world, we save the result for
 	 * retransmission.
-- 
cgit v1.2.3


From 89664c623617b1d34447a927ac7871ddf3db29d3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 3 Mar 2019 17:54:53 +0800
Subject: sctp: sctp_sock_migrate() returns error if sctp_bind_addr_dup() fails

It should fail to create the new sk if sctp_bind_addr_dup() fails
when accepting or peeloff an association.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 533207dbeae9..44f2acb3d433 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -102,9 +102,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk);
 static int sctp_do_bind(struct sock *, union sctp_addr *, int);
 static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
-			      struct sctp_association *assoc,
-			      enum sctp_socket_type type);
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			     struct sctp_association *assoc,
+			     enum sctp_socket_type type);
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
@@ -4891,7 +4891,11 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
 	 */
-	sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+	error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+	if (error) {
+		sk_common_release(newsk);
+		newsk = NULL;
+	}
 
 out:
 	release_sock(sk);
@@ -5639,7 +5643,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
 	 */
-	sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	err = sctp_sock_migrate(sk, sock->sk, asoc,
+				SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	if (err) {
+		sock_release(sock);
+		sock = NULL;
+	}
 
 	*sockp = sock;
 
@@ -9171,9 +9180,9 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
 /* Populate the fields of the newsk from the oldsk and migrate the assoc
  * and its messages to the newsk.
  */
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
-			      struct sctp_association *assoc,
-			      enum sctp_socket_type type)
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			     struct sctp_association *assoc,
+			     enum sctp_socket_type type)
 {
 	struct sctp_sock *oldsp = sctp_sk(oldsk);
 	struct sctp_sock *newsp = sctp_sk(newsk);
@@ -9182,6 +9191,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	struct sk_buff *skb, *tmp;
 	struct sctp_ulpevent *event;
 	struct sctp_bind_hashbucket *head;
+	int err;
 
 	/* Migrate socket buffer sizes and all the socket level options to the
 	 * new socket.
@@ -9210,8 +9220,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	/* Copy the bind_addr list from the original endpoint to the new
 	 * endpoint so that we can handle restarts properly
 	 */
-	sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
-				&oldsp->ep->base.bind_addr, GFP_KERNEL);
+	err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
+				 &oldsp->ep->base.bind_addr, GFP_KERNEL);
+	if (err)
+		return err;
 
 	/* Move any messages in the old socket's receive queue that are for the
 	 * peeled off association to the new socket's receive queue.
@@ -9296,6 +9308,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	}
 
 	release_sock(newsk);
+
+	return 0;
 }
 
 
-- 
cgit v1.2.3


From 60208f79139af0e2f84747d04a2f3321f174a398 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 3 Mar 2019 17:54:54 +0800
Subject: sctp: move up sctp_auth_init_hmacs() in sctp_endpoint_init()

sctp_auth_init_hmacs() is called only when ep->auth_enable is set.
It better to move up sctp_auth_init_hmacs() and remove auth_enable
check in it and check auth_enable only once in sctp_endpoint_init().

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c        |  6 ------
 net/sctp/endpointola.c | 18 ++++++++++--------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 5b537613946f..39d72e58b8e5 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -471,12 +471,6 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
 	struct crypto_shash *tfm = NULL;
 	__u16   id;
 
-	/* If AUTH extension is disabled, we are done */
-	if (!ep->auth_enable) {
-		ep->auth_hmacs = NULL;
-		return 0;
-	}
-
 	/* If the transforms are already allocated, we are done */
 	if (ep->auth_hmacs)
 		return 0;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 40c7eb941bc9..0448b68fce74 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -107,6 +107,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 			auth_chunks->param_hdr.length =
 					htons(sizeof(struct sctp_paramhdr) + 2);
 		}
+
+		/* Allocate and initialize transorms arrays for supported
+		 * HMACs.
+		 */
+		err = sctp_auth_init_hmacs(ep, gfp);
+		if (err)
+			goto nomem;
 	}
 
 	/* Initialize the base structure. */
@@ -150,15 +157,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	INIT_LIST_HEAD(&ep->endpoint_shared_keys);
 	null_key = sctp_auth_shkey_create(0, gfp);
 	if (!null_key)
-		goto nomem;
+		goto nomem_shkey;
 
 	list_add(&null_key->key_list, &ep->endpoint_shared_keys);
 
-	/* Allocate and initialize transorms arrays for supported HMACs. */
-	err = sctp_auth_init_hmacs(ep, gfp);
-	if (err)
-		goto nomem_hmacs;
-
 	/* Add the null key to the endpoint shared keys list and
 	 * set the hmcas and chunks pointers.
 	 */
@@ -169,8 +171,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 
 	return ep;
 
-nomem_hmacs:
-	sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
+nomem_shkey:
+	sctp_auth_destroy_hmacs(ep->auth_hmacs);
 nomem:
 	/* Free all allocations */
 	kfree(auth_hmacs);
-- 
cgit v1.2.3


From c6f33e05225696fee3c901b7526b80f31848454e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 3 Mar 2019 17:54:55 +0800
Subject: sctp: call sctp_auth_init_hmacs() in sctp_sock_migrate()

New ep's auth_hmacs should be set if old ep's is set, in case that
net->sctp.auth_enable has been changed to 0 by users and new ep's
auth_hmacs couldn't be set in sctp_endpoint_init().

It can even crash kernel by doing:

  1. on server: sysctl -w net.sctp.auth_enable=1,
                sysctl -w net.sctp.addip_enable=1,
                sysctl -w net.sctp.addip_noauth_enable=0,
                listen() on server,
                sysctl -w net.sctp.auth_enable=0.
  2. on client: connect() to server.
  3. on server: accept() the asoc,
                sysctl -w net.sctp.auth_enable=1.
  4. on client: send() asconf packet to server.

The call trace:

  [  245.280251] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
  [  245.286872] RIP: 0010:sctp_auth_calculate_hmac+0xa3/0x140 [sctp]
  [  245.304572] Call Trace:
  [  245.305091]  <IRQ>
  [  245.311287]  sctp_sf_authenticate+0x110/0x160 [sctp]
  [  245.312311]  sctp_sf_eat_auth+0xf2/0x230 [sctp]
  [  245.313249]  sctp_do_sm+0x9a/0x2d0 [sctp]
  [  245.321483]  sctp_assoc_bh_rcv+0xed/0x1a0 [sctp]
  [  245.322495]  sctp_rcv+0xa66/0xc70 [sctp]

It's because the old ep->auth_hmacs wasn't copied to the new ep while
ep->auth_hmacs is used in sctp_auth_calculate_hmac() when processing
the incoming