From a8d23cbbf6c9f515ed678204ad2962be7c336344 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 3 Oct 2019 17:02:01 +0200 Subject: batman-adv: Avoid free/alloc race when handling OGM2 buffer A B.A.T.M.A.N. V virtual interface has an OGM2 packet buffer which is initialized using data from the netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified data is sent out or the functions modifying the OGM2 buffer try to access already freed memory regions. Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_ogm.c | 41 +++++++++++++++++++++++++++++++++-------- net/batman-adv/types.h | 4 ++++ 2 files changed, 37 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index dc4f7430cb5a..8033f24f506c 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -256,14 +257,12 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send() - periodic worker broadcasting the own OGM - * @work: work queue item + * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the soft interface information */ -static void batadv_v_ogm_send(struct work_struct *work) +static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; - struct batadv_priv_bat_v *bat_v; - struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; @@ -271,8 +270,7 @@ static void batadv_v_ogm_send(struct work_struct *work) u16 tvlv_len = 0; int ret; - bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); - bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -363,6 +361,23 @@ out: return; } +/** + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM + * @work: work queue item + */ +static void batadv_v_ogm_send(struct work_struct *work) +{ + struct batadv_priv_bat_v *bat_v; + struct batadv_priv *bat_priv; + + bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); + bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + batadv_v_ogm_send_softif(bat_priv); + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); +} + /** * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface * @work: work queue item @@ -424,11 +439,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) - return; + goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** @@ -1050,6 +1069,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); + mutex_init(&bat_priv->bat_v.ogm_buff_mutex); + return 0; } @@ -1061,7 +1082,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; + + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index be7c02aa91e2..a9fb7b17f557 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include /* for linux/wait.h */ @@ -1539,6 +1540,9 @@ struct batadv_priv_bat_v { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ + struct mutex ogm_buff_mutex; + /** @ogm_wq: workqueue used to schedule OGM transmissions */ struct delayed_work ogm_wq; }; -- cgit v1.2.3 From 40e220b4218bb3d278e5e8cc04ccdfd1c7ff8307 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 3 Oct 2019 17:02:01 +0200 Subject: batman-adv: Avoid free/alloc race when handling OGM buffer Each slave interface of an B.A.T.M.A.N. IV virtual interface has an OGM packet buffer which is initialized using data from netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified/freed data is sent out or functions modifying the OGM buffer try to access already freed memory regions. Reported-by: syzbot+0cc629f19ccb8534935b@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 61 +++++++++++++++++++++++++++++++++++------ net/batman-adv/hard-interface.c | 2 ++ net/batman-adv/types.h | 3 ++ 3 files changed, 57 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index d78938e3e008..5b0b20e6da95 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -193,14 +195,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff; u32 random_seqno; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); - if (!ogm_buff) + if (!ogm_buff) { + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; + } hard_iface->bat_iv.ogm_buff = ogm_buff; @@ -212,35 +218,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; + + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ @@ -742,7 +772,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) } } -static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +/** + * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer + * @hard_iface: interface whose ogm buffer should be transmitted + */ +static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; @@ -753,9 +787,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || - hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) - return; + lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in @@ -823,6 +855,17 @@ out: batadv_hardif_put(primary_if); } +static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +{ + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) + return; + + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + batadv_iv_ogm_schedule_buff(hard_iface); + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); +} + /** * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface * @orig_node: originator which reproadcasted the OGMs directly diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c90e47342bb0..afb52282d5bd 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -929,6 +930,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); + mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a9fb7b17f557..4d7f1baee7b7 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -82,6 +82,9 @@ struct batadv_hard_iface_bat_iv { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ + struct mutex ogm_buff_mutex; }; /** -- cgit v1.2.3 From 9e8acd9c44a0dd52b2922eeb82398c04e356c058 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 9 Oct 2019 10:31:24 +0200 Subject: bpf: lwtunnel: Fix reroute supplying invalid dst The dst in bpf_input() has lwtstate field set. As it is of the LWTUNNEL_ENCAP_BPF type, lwtstate->data is struct bpf_lwt. When the bpf program returns BPF_LWT_REROUTE, ip_route_input_noref is directly called on this skb. This causes invalid memory access, as ip_route_input_slow calls skb_tunnel_info(skb) that expects the dst->lwstate->data to be struct ip_tunnel_info. This results to struct bpf_lwt being accessed as struct ip_tunnel_info. Drop the dst before calling the IP route input functions (both for IPv4 and IPv6). Reported by KASAN. Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c") Signed-off-by: Jiri Benc Signed-off-by: Alexei Starovoitov Acked-by: Peter Oskolkov Link: https://lore.kernel.org/bpf/111664d58fe4e9dd9c8014bb3d0b2dab93086a9e.1570609794.git.jbenc@redhat.com --- net/core/lwt_bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index f93785e5833c..74cfb8b5ab33 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -88,11 +88,16 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) int err = -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { + struct net_device *dev = skb_dst(skb)->dev; struct iphdr *iph = ip_hdr(skb); + dev_hold(dev); + skb_dst_drop(skb); err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb_dst(skb)->dev); + iph->tos, dev); + dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { + skb_dst_drop(skb); err = ipv6_stub->ipv6_route_input(skb); } else { err = -EAFNOSUPPORT; -- cgit v1.2.3 From e7a409c3f46cb0dbc7bfd4f6f9421d53e92614a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Oct 2019 09:26:37 -0700 Subject: ipv4: fix IPSKB_FRAG_PMTU handling with fragmentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch removes the iph field from the state structure, which is not properly initialized. Instead, add a new field to make the "do we want to set DF" be the state bit and move the code to set the DF flag from ip_frag_next(). Joint work with Pablo and Linus. Fixes: 19c3401a917b ("net: ipv4: place control buffer handling away from fragmentation iterators") Reported-by: Patrick Schönthaler Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Linus Torvalds Signed-off-by: David S. Miller --- net/bridge/netfilter/nf_conntrack_bridge.c | 2 +- net/ipv4/ip_output.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 506d6141e44e..809673222382 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -95,7 +95,7 @@ slow_path: * This may also be a clone skbuff, we could preserve the geometry for * the copies but probably not worth the effort. */ - ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state); + ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); while (state.left > 0) { struct sk_buff *skb2; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 814b9b8882a0..3d8baaaf7086 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -645,11 +645,12 @@ void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) EXPORT_SYMBOL(ip_fraglist_prepare); void ip_frag_init(struct sk_buff *skb, unsigned int hlen, - unsigned int ll_rs, unsigned int mtu, + unsigned int ll_rs, unsigned int mtu, bool DF, struct ip_frag_state *state) { struct iphdr *iph = ip_hdr(skb); + state->DF = DF; state->hlen = hlen; state->ll_rs = ll_rs; state->mtu = mtu; @@ -668,9 +669,6 @@ static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; - if (IPCB(from)->flags & IPSKB_FRAG_PMTU) - state->iph->frag_off |= htons(IP_DF); - /* ANK: dirty, but effective trick. Upgrade options only if * the segment to be fragmented was THE FIRST (otherwise, * options are already fixed) and make it ONCE @@ -738,6 +736,8 @@ struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state) */ iph = ip_hdr(skb2); iph->frag_off = htons((state->offset >> 3)); + if (state->DF) + iph->frag_off |= htons(IP_DF); /* * Added AC : If we are fragmenting a fragment that's not the @@ -883,7 +883,8 @@ slow_path: * Fragment the datagram. */ - ip_frag_init(skb, hlen, ll_rs, mtu, &state); + ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU, + &state); /* * Keep copying data until we run out. -- cgit v1.2.3 From d665c1281bc89ac85b8b0c058c22a3f94640a1d6 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 22 Oct 2019 07:57:42 +0800 Subject: net: sched: taprio: fix -Wmissing-prototypes warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get one warnings when build kernel W=1: net/sched/sch_taprio.c:1155:6: warning: no previous prototype for ‘taprio_offload_config_changed’ [-Wmissing-prototypes] Make the function static to fix this. Fixes: 9c66d1564676 ("taprio: Add support for hardware offloading") Signed-off-by: Yi Wang Acked-by: Vinicius Costa Gomes Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 6719a65169d4..2121187229cd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1152,7 +1152,7 @@ EXPORT_SYMBOL_GPL(taprio_offload_free); * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). * This is left as TODO. */ -void taprio_offload_config_changed(struct taprio_sched *q) +static void taprio_offload_config_changed(struct taprio_sched *q) { struct sched_gate_list *oper, *admin; -- cgit v1.2.3 From 9464cc37f3671ee69cb1c00662b5e1f113a96b23 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Mon, 21 Oct 2019 12:01:57 +0200 Subject: net: openvswitch: free vport unless register_netdevice() succeeds syzbot found the following crash on: HEAD commit: 1e78030e Merge tag 'mmc-v5.3-rc1' of git://git.kernel.org/.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=148d3d1a600000 kernel config: https://syzkaller.appspot.com/x/.config?x=30cef20daf3e9977 dashboard link: https://syzkaller.appspot.com/bug?extid=13210896153522fe1ee5 compiler: gcc (GCC) 9.0.0 20181231 (experimental) syz repro: https://syzkaller.appspot.com/x/repro.syz?x=136aa8c4600000 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=109ba792600000 ===================================================================== BUG: memory leak unreferenced object 0xffff8881207e4100 (size 128): comm "syz-executor032", pid 7014, jiffies 4294944027 (age 13.830s) hex dump (first 32 bytes): 00 70 16 18 81 88 ff ff 80 af 8c 22 81 88 ff ff .p.........".... 00 b6 23 17 81 88 ff ff 00 00 00 00 00 00 00 00 ..#............. backtrace: [<000000000eb78212>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb78212>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000000eb78212>] slab_alloc mm/slab.c:3319 [inline] [<000000000eb78212>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000006ea6c6>] kmalloc include/linux/slab.h:552 [inline] [<00000000006ea6c6>] kzalloc include/linux/slab.h:748 [inline] [<00000000006ea6c6>] ovs_vport_alloc+0x37/0xf0 net/openvswitch/vport.c:130 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 [<00000000c10abb2d>] __do_sys_sendmsg net/socket.c:2365 [inline] [<00000000c10abb2d>] __se_sys_sendmsg net/socket.c:2363 [inline] [<00000000c10abb2d>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2363 BUG: memory leak unreferenced object 0xffff88811723b600 (size 64): comm "syz-executor032", pid 7014, jiffies 4294944027 (age 13.830s) hex dump (first 32 bytes): 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 02 00 00 00 05 35 82 c1 .............5.. backtrace: [<00000000352f46d8>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000352f46d8>] slab_post_alloc_hook mm/slab.h:522 [inline] [<00000000352f46d8>] slab_alloc mm/slab.c:3319 [inline] [<00000000352f46d8>] __do_kmalloc mm/slab.c:3653 [inline] [<00000000352f46d8>] __kmalloc+0x169/0x300 mm/slab.c:3664 [<000000008e48f3d1>] kmalloc include/linux/slab.h:557 [inline] [<000000008e48f3d1>] ovs_vport_set_upcall_portids+0x54/0xd0 net/openvswitch/vport.c:343 [<00000000541e4f4a>] ovs_vport_alloc+0x7f/0xf0 net/openvswitch/vport.c:139 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 BUG: memory leak unreferenced object 0xffff8881228ca500 (size 128): comm "syz-executor032", pid 7015, jiffies 4294944622 (age 7.880s) hex dump (first 32 bytes): 00 f0 27 18 81 88 ff ff 80 ac 8c 22 81 88 ff ff ..'........".... 40 b7 23 17 81 88 ff ff 00 00 00 00 00 00 00 00 @.#............. backtrace: [<000000000eb78212>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb78212>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000000eb78212>] slab_alloc mm/slab.c:3319 [inline] [<000000000eb78212>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000006ea6c6>] kmalloc include/linux/slab.h:552 [inline] [<00000000006ea6c6>] kzalloc include/linux/slab.h:748 [inline] [<00000000006ea6c6>] ovs_vport_alloc+0x37/0xf0 net/openvswitch/vport.c:130 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 [<00000000c10abb2d>] __do_sys_sendmsg net/socket.c:2365 [inline] [<00000000c10abb2d>] __se_sys_sendmsg net/socket.c:2363 [inline] [<00000000c10abb2d>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2363 ===================================================================== The function in net core, register_netdevice(), may fail with vport's destruction callback either invoked or not. After commit 309b66970ee2 ("net: openvswitch: do not free vport if register_netdevice() is failed."), the duty to destroy vport is offloaded from the driver OTOH, which ends up in the memory leak reported. It is fixed by releasing vport unless device is registered successfully. To do that, the callback assignment is defered until device is registered. Reported-by: syzbot+13210896153522fe1ee5@syzkaller.appspotmail.com Fixes: 309b66970ee2 ("net: openvswitch: do not free vport if register_netdevice() is failed.") Cc: Taehee Yoo Cc: Greg Rose Cc: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Ying Xue Cc: Andrey Konovalov Signed-off-by: Hillf Danton Acked-by: Pravin B Shelar [sbrivio: this was sent to dev@openvswitch.org and never made its way to netdev -- resending original patch] Signed-off-by: Stefano Brivio Reviewed-by: Greg Rose Signed-off-by: Jakub Kicinski --- net/openvswitch/vport-internal_dev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 21c90d3a7ebf..58a7b8312c28 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_NO_QUEUE; netdev->needs_free_netdev = true; - netdev->priv_destructor = internal_dev_destructor; + netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; @@ -159,7 +159,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) struct internal_dev *internal_dev; struct net_device *dev; int err; - bool free_vport = true; vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms); if (IS_ERR(vport)) { @@ -190,10 +189,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); - if (err) { - free_vport = false; + if (err) goto error_unlock; - } + vport->dev->priv_destructor = internal_dev_destructor; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -207,8 +205,7 @@ error_unlock: error_free_netdev: free_netdev(dev); error_free_vport: - if (free_vport) - ovs_vport_free(vport); + ovs_vport_free(vport); error: return ERR_PTR(err); } -- cgit v1.2.3 From 6c5d9c2a6bedbb3c3c14253776320c0ee564f064 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 22 Oct 2019 15:44:40 +0100 Subject: ipv6: include for missing declarations Include for the missing declarations of various functions. Fixes the following sparse warnings: net/ipv6/addrconf_core.c:94:5: warning: symbol 'register_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:100:5: warning: symbol 'unregister_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:106:5: warning: symbol 'inet6addr_notifier_call_chain' was not declared. Should it be static? net/ipv6/addrconf_core.c:112:5: warning: symbol 'register_inet6addr_validator_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:118:5: warning: symbol 'unregister_inet6addr_validator_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:125:5: warning: symbol 'inet6addr_validator_notifier_call_chain' was not declared. Should it be static? net/ipv6/addrconf_core.c:237:6: warning: symbol 'in6_dev_finish_destroy' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 783f3c1466da..2fc079284ca4 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -7,6 +7,7 @@ #include #include #include +#include #include /* if ipv6 module registers this function is used by xfrm to force all -- cgit v1.2.3 From daf61b026f4686250e6afa619e6d7b49edc61df7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2019 11:03:15 +0200 Subject: netfilter: nf_flow_table: set timeout before insertion into hashes Other garbage collector might remove an entry not fully set up yet. [570953.958293] RIP: 0010:memcmp+0x9/0x50 [...] [570953.958567] flow_offload_hash_cmp+0x1e/0x30 [nf_flow_table] [570953.958585] flow_offload_lookup+0x8c/0x110 [nf_flow_table] [570953.958606] nf_flow_offload_ip_hook+0x135/0xb30 [nf_flow_table] [570953.958624] nf_flow_offload_inet_hook+0x35/0x37 [nf_flow_table_inet] [570953.958646] nf_hook_slow+0x3c/0xb0 [570953.958664] __netif_receive_skb_core+0x90f/0xb10 [570953.958678] ? ip_rcv_finish+0x82/0xa0 [570953.958692] __netif_receive_skb_one_core+0x3b/0x80 [570953.958711] __netif_receive_skb+0x18/0x60 [570953.958727] netif_receive_skb_internal+0x45/0xf0 [570953.958741] napi_gro_receive+0xcd/0xf0 [570953.958764] ixgbe_clean_rx_irq+0x432/0xe00 [ixgbe] [570953.958782] ixgbe_poll+0x27b/0x700 [ixgbe] [570953.958796] net_rx_action+0x284/0x3c0 [570953.958817] __do_softirq+0xcc/0x27c [570953.959464] irq_exit+0xe8/0x100 [570953.960097] do_IRQ+0x59/0xe0 [570953.960734] common_interrupt+0xf/0xf Fixes: 43c8f131184f ("netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 132f5228b431..128245efe84a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -202,6 +202,8 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { int err; + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, nf_flow_offload_rhash_params); @@ -218,7 +220,6 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) return err; } - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); -- cgit v1.2.3 From 085461c8976e6cb4d5b608a7b7062f394c51a253 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 18 Oct 2019 14:10:31 +0200 Subject: netfilter: nf_tables_offload: restore basechain deletion Unbind callbacks on chain deletion. Fixes: 8fc618c52d16 ("netfilter: nf_tables_offload: refactor the nft_flow_offload_chain function") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index e546f759b7a7..ad783f4840ef 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -347,7 +347,7 @@ int nft_flow_rule_offload_commit(struct net *net) policy = nft_trans_chain_policy(trans); err = nft_flow_offload_chain(trans->ctx.chain, &policy, - FLOW_BLOCK_BIND); + FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) -- cgit v1.2.3 From 55667441c84fa5e0911a0aac44fb059c15ba6da2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Oct 2019 07:57:46 -0700 Subject: net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet Reported-by: Jonathan Berger Reported-by: Amit Klein Reported-by: Benny Pinkas Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 38 ++++++++++++++++---------------------- net/sched/sch_hhf.c | 8 ++++---- net/sched/sch_sfb.c | 13 +++++++------ net/sched/sch_sfq.c | 14 ++++++++------ 4 files changed, 35 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7c09d87d3269..68eda10d0680 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1350,30 +1350,21 @@ out_bad: } EXPORT_SYMBOL(__skb_flow_dissect); -static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) -{ - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); - BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != sizeof(*flow) - sizeof(flow->addrs)); @@ -1388,7 +1379,7 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) diff -= sizeof(flow->addrs.tipckey); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return sizeof(*flow) - diff; } __be32 flow_get_u32_src(const struct flow_keys *flow) @@ -1454,14 +1445,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) } } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); - hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -1471,12 +1463,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -1524,7 +1517,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); @@ -1544,13 +1537,14 @@ void __skb_get_hash(struct sk_buff *skb) __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 23cd1c873a2c..be35f03b657b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -5,11 +5,11 @@ * Copyright (C) 2013 Nandita Dukkipati */ -#include #include #include #include #include +#include #include #include @@ -126,7 +126,7 @@ struct wdrr_bucket { struct hhf_sched_data { struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; - u32 perturbation; /* hash perturbation */ + siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_overlimit; /* number of times max qdisc packet * limit was hit @@ -264,7 +264,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_get_hash_perturb(skb, q->perturbation); + hash = skb_get_hash_perturb(skb, &q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; @@ -582,7 +582,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt, sch->limit = 1000; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_buckets); INIT_LIST_HEAD(&q->old_buckets); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index d448fe3068e5..4074c50ac3d7 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -45,7 +45,7 @@ struct sfb_bucket { * (Section 4.4 of SFB reference : moving hash functions) */ struct sfb_bins { - u32 perturbation; /* jhash perturbation */ + siphash_key_t perturbation; /* siphash key */ struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; }; @@ -217,7 +217,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) { - q->bins[slot].perturbation = prandom_u32(); + get_random_bytes(&q->bins[slot].perturbation, + sizeof(q->bins[slot].perturbation)); } static void sfb_swap_slot(struct sfb_sched_data *q) @@ -314,9 +315,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation); } else { - sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation); } @@ -352,7 +353,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Inelastic flow */ if (q->double_buffering) { sfbhash = skb_get_hash_perturb(skb, - q->bins[slot].perturbation); + &q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 68404a9d2ce4..c787d4d46017 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -117,7 +117,7 @@ struct sfq_sched_data { u8 headdrop; u8 maxdepth; /* limit of packets per flow */ - u32 perturbation; + siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ @@ -157,7 +157,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); + return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -607,9 +607,11 @@ static void sfq_perturbation(struct timer_list *t) struct sfq_sched_data *q = from_timer(q, t, perturb_timer); struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; + get_random_bytes(&nkey, sizeof(nkey)); spin_lock(root_lock); - q->perturbation = prandom_u32(); + q->perturbation = nkey; if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); @@ -688,7 +690,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); } sch_tree_unlock(sch); kfree(p); @@ -745,7 +747,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt, q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { int err = sfq_change(sch, opt); -- cgit v1.2.3 From 2afd23f78f39da84937006ecd24aa664a4ab052b Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 21 Oct 2019 10:16:58 +0200 Subject: xsk: Fix registration of Rx-only sockets Having Rx-only AF_XDP sockets can potentially lead to a crash in the system by a NULL pointer dereference in xsk_umem_consume_tx(). This function iterates through a list of all sockets tied to a umem and checks if there are any packets to send on the Tx ring. Rx-only sockets do not have a Tx ring, so this will cause a NULL pointer dereference. This will happen if you have registered one or more Rx-only sockets to a umem and the driver is checking the Tx ring even on Rx, or if the XDP_SHARED_UMEM mode is used and there is a mix of Rx-only and other sockets tied to the same umem. Fixed by only putting sockets with a Tx component on the list that xsk_umem_consume_tx() iterates over. Fixes: ac98d8aab61b ("xsk: wire upp Tx zero-copy functions") Reported-by: Kal Cutter Conley Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Acked-by: Jonathan Lemon Link: https://lore.kernel.org/bpf/1571645818-16244-1-git-send-email-magnus.karlsson@intel.com --- net/xdp/xdp_umem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 16d5f353163a..3049af269fbf 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -27,6 +27,9 @@ void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; + if (!xs->tx) + return; + spin_lock_irqsave(&umem->xsk_list_lock, flags); list_add_rcu(&xs->list, &umem->xsk_list); spin_unlock_irqrestore(&umem->xsk_list_lock, flags); @@ -36,6 +39,9 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; + if (!xs->tx) + return; + spin_lock_irqsave(&umem->xsk_list_lock, flags); list_del_rcu(&xs->list); spin_unlock_irqrestore(&umem->xsk_list_lock, flags); -- cgit v1.2.3 From 62931f59ce9cbabb934a431f48f2f1f441c605ac Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 19 Oct 2019 17:34:35 +0200 Subject: ipvs: don't ignore errors in case refcounting ip_vs module fails if the IPVS module is removed while the sync daemon is starting, there is a small gap where try_module_get() might fail getting the refcount inside ip_vs_use_count_inc(). Then, the refcounts of IPVS module are unbalanced, and the subsequent call to stop_sync_thread() causes the following splat: WARNING: CPU: 0 PID: 4013 at kernel/module.c:1146 module_put.part.44+0x15b/0x290 Modules linked in: ip_vs(-) nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 veth ip6table_filter ip6_tables iptable_filter binfmt_misc intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul ext4 mbcache jbd2 ghash_clmulni_intel snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_nhlt snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev pcspkr snd_timer virtio_balloon snd soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk failover virtio_console qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ata_piix ttm crc32c_intel serio_raw drm virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: nf_defrag_ipv6] CPU: 0 PID: 4013 Comm: modprobe Tainted: G W 5.4.0-rc1.upstream+ #741 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:module_put.part.44+0x15b/0x290 Code: 04 25 28 00 00 00 0f 85 18 01 00 00 48 83 c4 68 5b 5d 41 5c 41 5d 41 5e 41 5f c3 89 44 24 28 83 e8 01 89 c5 0f 89 57 ff ff ff <0f> 0b e9 78 ff ff ff 65 8b 1d 67 83 26 4a 89 db be 08 00 00 00 48 RSP: 0018:ffff888050607c78 EFLAGS: 00010297 RAX: 0000000000000003 RBX: ffffffffc1420590 RCX: ffffffffb5db0ef9 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffc1420590 RBP: 00000000ffffffff R08: fffffbfff82840b3 R09: fffffbfff82840b3 R10: 0000000000000001 R11: fffffbfff82840b2 R12: 1ffff1100a0c0f90 R13: ffffffffc1420200 R14: ffff88804f533300 R15: ffff88804f533ca0 FS: 00007f8ea9720740(0000) GS:ffff888053800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3245abe000 CR3: 000000004c28a006 CR4: 00000000001606f0 Call Trace: stop_sync_thread+0x3a3/0x7c0 [ip_vs] ip_vs_sync_net_cleanup+0x13/0x50 [ip_vs] ops_exit_list.isra.5+0x94/0x140 unregister_pernet_operations+0x29d/0x460 unregister_pernet_device+0x26/0x60 ip_vs_cleanup+0x11/0x38 [ip_vs] __x64_sys_delete_module+0x2d5/0x400 do_syscall_64+0xa5/0x4e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f8ea8bf0db7 Code: 73 01 c3 48 8b 0d b9 80 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 80 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffcd38d2fe8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000002436240 RCX: 00007f8ea8bf0db7 RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00000000024362a8 RBP: 0000000000000000 R08: 00007f8ea8eba060 R09: 00007f8ea8c658a0 R10: 00007ffcd38d2a60 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000001 R14: 00000000024362a8 R15: 0000000000000000 irq event stamp: 4538 hardirqs last enabled at (4537): [] quarantine_put+0x9e/0x170 hardirqs last disabled at (4538): [] trace_hardirqs_off_thunk+0x1a/0x20 softirqs last enabled at (4522): [] sk_common_release+0x169/0x2d0 softirqs last disabled at (4520): [] sk_common_release+0xbe/0x2d0 Check the return value of ip_vs_use_count_inc() and let its caller return proper error. Inside do_ip_vs_set_ctl() the module is already refcounted, we don't need refcount/derefcount there. Finally, in register_ip_vs_app() and start_sync_thread(), take the module refcount earlier and ensure it's released in the error path. Change since v1: - better return values in case of failure of ip_vs_use_count_inc(), thanks to Julian Anastasov - no need to increase/decrease the module refcount in ip_vs_set_ctl(), thanks to Julian Anastasov Signed-off-by: Davide Caratti Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 12 ++++++++++-- net/netfilter/ipvs/ip_vs_ctl.c | 14 ++++---------- net/netfilter/ipvs/ip_vs_pe.c | 3 ++- net/netfilter/ipvs/ip_vs_sched.c | 3 ++- net/netfilter/ipvs/ip_vs_sync.c | 13 ++++++++++--- 5 files changed, 28 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 4515056ef1c2..f9b16f2b2219 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -193,21 +193,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app * mutex_lock(&__ip_vs_app_mutex); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) { + err = -ENOENT; + goto out_unlock; + } + list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); - /* increase the module use count */ - ip_vs_use_count_inc(); out_unlock: mutex_unlock(&__ip_vs_app_mutex); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8b48e7ce1c2c..c8f81dd15c83 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1275,7 +1275,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service *svc = NULL; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; /* Lookup the scheduler by 'u->sched_name' */ if (strcmp(u->sched_name, "none")) { @@ -2435,9 +2436,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (copy_from_user(arg, user, len) != 0) return -EFAULT; - /* increase the module use count */ - ip_vs_use_count_inc(); - /* Handle daemons since they have another lock */ if (cmd == IP_VS_SO_SET_STARTDAEMON || cmd == IP_VS_SO_SET_STOPDAEMON) { @@ -2450,13 +2448,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = -EINVAL; if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)) <= 0) - goto out_dec; + return ret; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { ret = stop_sync_thread(ipvs, dm->state); } - goto out_dec; + return ret; } mutex_lock(&__ip_vs_mutex); @@ -2551,10 +2549,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) out_unlock: mutex_unlock(&__ip_vs_mutex); - out_dec: - /* decrease the module use count */ - ip_vs_use_count_dec(); - return ret; } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 8e104dff7abc..166c669f0763 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -68,7 +68,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) struct ip_vs_pe *tmp; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 2f9d5cd5daee..d4903723be7e 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -179,7 +179,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index a4a78c4b06de..8dc892a9dc91 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1762,6 +1762,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1892,9 +1896,6 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); - /* increase the module use count */ - ip_vs_use_count_inc(); - return 0; out: @@ -1924,11 +1925,17 @@ out: } kfree(ti); } + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; } -- cgit v1.2.3 From c24b75e0f9239e78105f81c5f03a751641eb07ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 09:53:03 -0700 Subject: ipvs: move old_secure_tcp into struct netns_ipvs syzbot reported the following issue : BUG: KCSAN: data-race in update_defense_level / update_defense_level read to 0xffffffff861a6260 of 4 bytes by task 3006 on cpu 1: update_defense_level+0x621/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:177 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 write to 0xffffffff861a6260 of 4 bytes by task 7333 on cpu 0: update_defense_level+0xa62/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:205 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 7333 Comm: kworker/0:5 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events defense_work_handler Indeed, old_secure_tcp is currently a static variable, while it needs to be a per netns variable. Fixes: a0840e2e165a ("IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8f81dd15c83..3cccc88ef817 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -93,7 +93,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net, static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; - static int old_secure_tcp = 0; int availmem; int nomem; int to_change = -1; @@ -174,35 +173,35 @@ static void update_defense_level(struct netns_ipvs *ipvs) spin_lock(&ipvs->securetcp_lock); switch (ipvs->sysctl_secure_tcp) { case 0: - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; break; case 1: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; ipvs->sysctl_secure_tcp = 2; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; } break; case 2: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; ipvs->sysctl_secure_tcp = 1; } break; case 3: - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; break; } - old_secure_tcp = ipvs->sysctl_secure_tcp; + ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, ipvs->sysctl_secure_tcp > 1); -- cgit v1.2.3 From a69a85da458f79088c38a38db034a4d64d9c32c3 Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 24 Oct 2019 15:52:45 +0800 Subject: netfilter: nft_payload: fix missing check for matching length in offloads Payload offload rule should also check the length of the match. Moreover, check for unsupported link-layer fields: nft --debug=netlink add rule firewall zones vlan id 100 ... [ payload load 2b @ link header + 0 => reg 1 ] this loads 2byte base on ll header and offset 0. This also fixes unsupported raw payload match. Fixes: 92ad6325cb89 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 22a80eb60222..5cb2d8908d2a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -161,13 +161,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ethhdr, h_source): + if (priv->len != ETH_ALEN) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, src, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_dest): + if (priv->len != ETH_ALEN) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, dst, ETH_ALEN, reg); break; + default: + return -EOPNOTSUPP; } return 0; @@ -181,14 +189,23 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct iphdr, saddr): + if (priv->len != sizeof(struct in_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, sizeof(struct in_addr), reg); break; case offsetof(struct iphdr, daddr): + if (priv->len != sizeof(struct in_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, sizeof(struct in_addr), reg); break; case offsetof(struct iphdr, protocol): + if (priv->len != sizeof(__u8)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); @@ -208,14 +225,23 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ipv6hdr, saddr): + if (priv->len != sizeof(struct in6_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, sizeof(struct in6_addr), reg); break; case offsetof(struct ipv6hdr, daddr): + if (priv->len != sizeof(struct in6_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, sizeof(struct in6_addr), reg); break; case offsetof(struct ipv6hdr, nexthdr): + if (priv->len != sizeof(__u8)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); @@ -255,10 +281,16 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct tcphdr, source): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct tcphdr, dest): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, sizeof(__be16), reg); break; @@ -277,10 +309,16 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct udphdr, source): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct udphdr, dest): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, sizeof(__be16), reg); break; -- cgit v1.2.3 From 82ecff655e7968151b0047f1b5de03b249e5c1c4 Mon Sep 17 00:00:00 2001 From: Takeshi Misawa Date: Sat, 19 Oct 2019 15:34:43 +0900 Subject: keys: Fix memory leak in copy_net_ns If copy_net_ns() failed after net_alloc(), net->key_domain is leaked. Fix this, by freeing key_domain in error path. syzbot report: BUG: memory leak unreferenced object 0xffff8881175007e0 (size 32): comm "syz-executor902", pid 7069, jiffies 4294944350 (age 28.400s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a83ed741>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000a83ed741>] slab_post_alloc_hook mm/slab.h:439 [inline] [<00000000a83ed741>] slab_alloc mm/slab.c:3326 [inline] [<00000000a83ed74