From c78296665c3d81f040117432ab9e1cb125521b0c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 26 Feb 2016 17:56:13 +0100 Subject: batman-adv: Check skb size before using encapsulated ETH+VLAN header The encapsulated ethernet and VLAN header may be outside the received ethernet frame. Thus the skb buffer size has to be checked before it can be parsed to find out if it encapsulates another batman-adv packet. Fixes: 420193573f11 ("batman-adv: softif bridge loop avoidance") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/soft-interface.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0710379491bf..8a136b6a1ff0 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -408,11 +408,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -424,8 +430,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() -- cgit v1.2.3 From e48474ed8a217b7f80f2a42bc05352406a06cb67 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 11 Mar 2016 16:01:09 +0100 Subject: batman-adv: init neigh node last seen field Signed-off-by: Marek Lindner [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index e4cbb0753e37..d52f67a0c057 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -663,6 +663,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; + neigh_node->last_seen = jiffies; /* extra reference for return */ kref_init(&neigh_node->refcount); -- cgit v1.2.3 From f2d23861b818d08bcd15cc1612ae94aa33b3931c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 19 Mar 2016 13:55:21 +0100 Subject: batman-adv: Deactivate TO_BE_ACTIVATED hardif on shutdown The shutdown of an batman-adv interface can happen with one of its slave interfaces still being in the BATADV_IF_TO_BE_ACTIVATED state. A possible reason for it is that the routing algorithm BATMAN_V was selected and batadv_schedule_bat_ogm was not yet called for this interface. This slave interface still has to be set to BATADV_IF_INACTIVE or the batman-adv interface will never reduce its usage counter and thus never gets shutdown. This problem can be simulated via: $ modprobe dummy $ modprobe batman-adv routing_algo=BATMAN_V $ ip link add bat0 type batadv $ ip link set dummy0 master bat0 $ ip link set dummy0 up $ ip link del bat0 unregister_netdevice: waiting for bat0 to become free. Usage count = 3 Reported-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b22b2775a0a5..c61d5b0b24d2 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -572,8 +572,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; - if (hard_iface->if_status == BATADV_IF_ACTIVE) - batadv_hardif_deactivate_interface(hard_iface); + batadv_hardif_deactivate_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; -- cgit v1.2.3 From d1a65f1741bfd9c69f9e4e2ad447a89b6810427d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 20 Mar 2016 12:27:53 +0100 Subject: batman-adv: Reduce refcnt of removed router when updating route _batadv_update_route rcu_derefences orig_ifinfo->router outside of a spinlock protected region to print some information messages to the debug log. But this pointer is not checked again when the new pointer is assigned in the spinlock protected region. Thus is can happen that the value of orig_ifinfo->router changed in the meantime and thus the reference counter of the wrong router gets reduced after the spinlock protected region. Just rcu_dereferencing the value of orig_ifinfo->router inside the spinlock protected region (which also set the new pointer) is enough to get the correct old router object. Fixes: e1a5382f978b ("batman-adv: Make orig_node->router an rcu protected pointer") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 4dd646a52f1a..b781bf753250 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -105,6 +105,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(orig_ifinfo); -- cgit v1.2.3 From c4fdb6cff2aa0ae740c5f19b6f745cbbe786d42f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 11 Mar 2016 14:04:49 +0100 Subject: batman-adv: Fix broadcast/ogm queue limit on a removed interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When removing a single interface while a broadcast or ogm packet is still pending then we will free the forward packet without releasing the queue slots again. This patch is supposed to fix this issue. Fixes: 6d5808d4ae1b ("batman-adv: Add missing hardif_free_ref in forw_packet_free") Signed-off-by: Linus Lüssing [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/send.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3ce06e0a91b1..76417850d3fc 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -675,6 +675,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -702,6 +705,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } -- cgit v1.2.3 From 6c1ea260f89709e0021d2c59f8fd2a104b5b1123 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 11 Apr 2016 19:34:49 +0200 Subject: libceph: make authorizer destruction independent of ceph_auth_client Starting the kernel client with cephx disabled and then enabling cephx and restarting userspace daemons can result in a crash: [262671.478162] BUG: unable to handle kernel paging request at ffffebe000000000 [262671.531460] IP: [] kfree+0x5a/0x130 [262671.584334] PGD 0 [262671.635847] Oops: 0000 [#1] SMP [262672.055841] CPU: 22 PID: 2961272 Comm: kworker/22:2 Not tainted 4.2.0-34-generic #39~14.04.1-Ubuntu [262672.162338] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS 2.4.3 07/09/2014 [262672.268937] Workqueue: ceph-msgr con_work [libceph] [262672.322290] task: ffff88081c2d0dc0 ti: ffff880149ae8000 task.ti: ffff880149ae8000 [262672.428330] RIP: 0010:[] [] kfree+0x5a/0x130 [262672.535880] RSP: 0018:ffff880149aeba58 EFLAGS: 00010286 [262672.589486] RAX: 000001e000000000 RBX: 0000000000000012 RCX: ffff8807e7461018 [262672.695980] RDX: 000077ff80000000 RSI: ffff88081af2be04 RDI: 0000000000000012 [262672.803668] RBP: ffff880149aeba78 R08: 0000000000000000 R09: 0000000000000000 [262672.912299] R10: ffffebe000000000 R11: ffff880819a60e78 R12: ffff8800aec8df40 [262673.021769] R13: ffffffffc035f70f R14: ffff8807e5b138e0 R15: ffff880da9785840 [262673.131722] FS: 0000000000000000(0000) GS:ffff88081fac0000(0000) knlGS:0000000000000000 [262673.245377] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [262673.303281] CR2: ffffebe000000000 CR3: 0000000001c0d000 CR4: 00000000001406e0 [262673.417556] Stack: [262673.472943] ffff880149aeba88 ffff88081af2be04 ffff8800aec8df40 ffff88081af2be04 [262673.583767] ffff880149aeba98 ffffffffc035f70f ffff880149aebac8 ffff8800aec8df00 [262673.694546] ffff880149aebac8 ffffffffc035c89e ffff8807e5b138e0 ffff8805b047f800 [262673.805230] Call Trace: [262673.859116] [] ceph_x_destroy_authorizer+0x1f/0x50 [libceph] [262673.968705] [] ceph_auth_destroy_authorizer+0x3e/0x60 [libceph] [262674.078852] [] put_osd+0x45/0x80 [libceph] [262674.134249] [] remove_osd+0xae/0x140 [libceph] [262674.189124] [] __reset_osd+0x103/0x150 [libceph] [262674.243749] [] kick_requests+0x223/0x460 [libceph] [262674.297485] [] ceph_osdc_handle_map+0x282/0x5e0 [libceph] [262674.350813] [] dispatch+0x4e/0x720 [libceph] [262674.403312] [] try_read+0x3d1/0x1090 [libceph] [262674.454712] [] ? dequeue_entity+0x152/0x690 [262674.505096] [] con_work+0xcb/0x1300 [libceph] [262674.555104] [] process_one_work+0x14e/0x3d0 [262674.604072] [] worker_thread+0x11a/0x470 [262674.652187] [] ? rescuer_thread+0x310/0x310 [262674.699022] [] kthread+0xd2/0xf0 [262674.744494] [] ? kthread_create_on_node+0x1c0/0x1c0 [262674.789543] [] ret_from_fork+0x3f/0x70 [262674.834094] [] ? kthread_create_on_node+0x1c0/0x1c0 What happens is the following: (1) new MON session is established (2) old "none" ac is destroyed (3) new "cephx" ac is constructed ... (4) old OSD session (w/ "none" authorizer) is put ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer) osd->o_auth.authorizer in the "none" case is just a bare pointer into ac, which contains a single static copy for all services. By the time we get to (4), "none" ac, freed in (2), is long gone. On top of that, a new vtable installed in (3) points us at ceph_x_destroy_authorizer(), so we end up trying to destroy a "none" authorizer with a "cephx" destructor operating on invalid memory! To fix this, decouple authorizer destruction from ac and do away with a single static "none" authorizer by making a copy for each OSD or MDS session. Authorizers themselves are independent of ac and so there is no reason for destroy_authorizer() to be an ac op. Make it an op on the authorizer itself by turning ceph_authorizer into a real struct. Fixes: http://tracker.ceph.com/issues/15447 Reported-by: Alan Zhang Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth.c | 8 ++---- net/ceph/auth_none.c | 71 ++++++++++++++++++++++++++++----------------------- net/ceph/auth_none.h | 3 +-- net/ceph/auth_x.c | 21 ++++++++------- net/ceph/auth_x.h | 1 + net/ceph/osd_client.c | 6 ++--- 6 files changed, 55 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 6b923bcaa2a4..2bc5965fdd1e 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -293,13 +293,9 @@ int ceph_auth_create_authorizer(struct ceph_auth_client *ac, } EXPORT_SYMBOL(ceph_auth_create_authorizer); -void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) +void ceph_auth_destroy_authorizer(struct ceph_authorizer *a) { - mutex_lock(&ac->mutex); - if (ac->ops && ac->ops->destroy_authorizer) - ac->ops->destroy_authorizer(ac, a); - mutex_unlock(&ac->mutex); + a->destroy(a); } EXPORT_SYMBOL(ceph_auth_destroy_authorizer); diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 8c93fa8d81bc..5f836f02ae36 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac) struct ceph_auth_none_info *xi = ac->private; xi->starting = true; - xi->built_authorizer = false; } static void destroy(struct ceph_auth_client *ac) @@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac, + struct ceph_none_authorizer *au) +{ + void *p = au->buf; + void *const end = p + sizeof(au->buf); + int ret; + + ceph_encode_8_safe(&p, end, 1, e_range); + ret = ceph_entity_name_encode(ac->name, &p, end); + if (ret < 0) + return ret; + + ceph_encode_64_safe(&p, end, ac->global_id, e_range); + au->buf_len = p - (void *)au->buf; + dout("%s built authorizer len %d\n", __func__, au->buf_len); + return 0; + +e_range: + return -ERANGE; +} + static int build_request(struct ceph_auth_client *ac, void *buf, void *end) { return 0; @@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result, return result; } +static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a) +{ + kfree(a); +} + /* - * build an 'authorizer' with our entity_name and global_id. we can - * reuse a single static copy since it is identical for all services - * we connect to. + * build an 'authorizer' with our entity_name and global_id. it is + * identical for all services we connect to. */ static int ceph_auth_none_create_authorizer( struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth) { - struct ceph_auth_none_info *ai = ac->private; - struct ceph_none_authorizer *au = &ai->au; - void *p, *end; + struct ceph_none_authorizer *au; int ret; - if (!ai->built_authorizer) { - p = au->buf; - end = p + sizeof(au->buf); - ceph_encode_8(&p, 1); - ret = ceph_entity_name_encode(ac->name, &p, end - 8); - if (ret < 0) - goto bad; - ceph_decode_need(&p, end, sizeof(u64), bad2); - ceph_encode_64(&p, ac->global_id); - au->buf_len = p - (void *)au->buf; - ai->built_authorizer = true; - dout("built authorizer len %d\n", au->buf_len); + au = kmalloc(sizeof(*au), GFP_NOFS); + if (!au) + return -ENOMEM; + + au->base.destroy = ceph_auth_none_destroy_authorizer; + + ret = ceph_auth_none_build_authorizer(ac, au); + if (ret) { + kfree(au); + return ret; } auth->authorizer = (struct ceph_authorizer *) au; @@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer( auth->authorizer_reply_buf_len = sizeof (au->reply_buf); return 0; - -bad2: - ret = -ERANGE; -bad: - return ret; -} - -static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) -{ - /* nothing to do */ } static const struct ceph_auth_client_ops ceph_auth_none_ops = { @@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, - .destroy_authorizer = ceph_auth_none_destroy_authorizer, }; int ceph_auth_none_init(struct ceph_auth_client *ac) @@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac) return -ENOMEM; xi->starting = true; - xi->built_authorizer = false; ac->protocol = CEPH_AUTH_NONE; ac->private = xi; diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h index 059a3ce4b53f..62021535ae4a 100644 --- a/net/ceph/auth_none.h +++ b/net/ceph/auth_none.h @@ -12,6 +12,7 @@ */ struct ceph_none_authorizer { + struct ceph_authorizer base; char buf[128]; int buf_len; char reply_buf[0]; @@ -19,8 +20,6 @@ struct ceph_none_authorizer { struct ceph_auth_none_info { bool starting; - bool built_authorizer; - struct ceph_none_authorizer au; /* we only need one; it's static */ }; int ceph_auth_none_init(struct ceph_auth_client *ac); diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 9e43a315e662..a0905f04bd13 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -565,6 +565,14 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, return -EAGAIN; } +static void ceph_x_destroy_authorizer(struct ceph_authorizer *a) +{ + struct ceph_x_authorizer *au = (void *)a; + + ceph_x_authorizer_cleanup(au); + kfree(au); +} + static int ceph_x_create_authorizer( struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth) @@ -581,6 +589,8 @@ static int ceph_x_create_authorizer( if (!au) return -ENOMEM; + au->base.destroy = ceph_x_destroy_authorizer; + ret = ceph_x_build_authorizer(ac, th, au); if (ret) { kfree(au); @@ -643,16 +653,6 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, return ret; } -static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) -{ - struct ceph_x_authorizer *au = (void *)a; - - ceph_x_authorizer_cleanup(au); - kfree(au); -} - - static void ceph_x_reset(struct ceph_auth_client *ac) { struct ceph_x_info *xi = ac->private; @@ -770,7 +770,6 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .create_authorizer = ceph_x_create_authorizer, .update_authorizer = ceph_x_update_authorizer, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, - .destroy_authorizer = ceph_x_destroy_authorizer, .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, .destroy = ceph_x_destroy, diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 40b1a3cf7397..21a5af904bae 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -26,6 +26,7 @@ struct ceph_x_ticket_handler { struct ceph_x_authorizer { + struct ceph_authorizer base; struct ceph_crypto_key session_key; struct ceph_buffer *buf; unsigned int service; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 32355d9d0103..40a53a70efdf 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1087,10 +1087,8 @@ static void put_osd(struct ceph_osd *osd) dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); if (atomic_dec_and_test(&osd->o_ref)) { - struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; - if (osd->o_auth.authorizer) - ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); + ceph_auth_destroy_authorizer(osd->o_auth.authorizer); kfree(osd); } } @@ -2984,7 +2982,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, struct ceph_auth_handshake *auth = &o->o_auth; if (force_new && auth->authorizer) { - ceph_auth_destroy_authorizer(ac, auth->authorizer); + ceph_auth_destroy_authorizer(auth->authorizer); auth->authorizer = NULL; } if (!auth->authorizer) { -- cgit v1.2.3 From e6436be21e77e3659b4ff7e357ab5a8342d132d2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Apr 2016 13:47:08 +0200 Subject: mac80211: fix statistics leak if dev_alloc_name() fails In the case that dev_alloc_name() fails, e.g. because the name was given by the user and already exists, we need to clean up properly and free the per-CPU statistics. Fix that. Cc: stable@vger.kernel.org Fixes: 5a490510ba5f ("mac80211: use per-CPU TX/RX statistics") Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 453b4e741780..e1cb22c16530 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } -- cgit v1.2.3 From a64b04d86d14c81f50f68e102f79ef301e3d0a0e Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 27 Apr 2016 11:29:06 +0200 Subject: gre: do not assign header_ops in collect metadata mode In ipgre mode (i.e. not gretap) with collect metadata flag set, the tunnel is incorrectly assumed to be mGRE in NBMA mode (see commit 6a5f44d7a048c). This is not the case, we're controlling the encapsulation addresses by lwtunnel metadata. And anyway, assigning dev->header_ops in collect metadata mode does not make sense. Although it would be more user firendly to reject requests that specify both the collect metadata flag and a remote/local IP address, this would break current users of gretap or introduce ugly code and differences in handling ipgre and gretap configuration. Keep the current behavior of remote/local IP address being ignored in such case. v3: Back to v1, added explanation paragraph. v2: Reject configuration specifying both remote/local address and collect metadata flag. Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index af5d1f38217f..d0abde4236af 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -893,7 +893,7 @@ static int ipgre_tunnel_init(struct net_device *dev) netif_keep_dst(dev); dev->addr_len = 4; - if (iph->daddr) { + if (iph->daddr && !tunnel->collect_md) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { if (!iph->saddr) @@ -902,8 +902,9 @@ static int ipgre_tunnel_init(struct net_device *dev) dev->header_ops = &ipgre_header_ops; } #endif - } else + } else if (!tunnel->collect_md) { dev->header_ops = &ipgre_header_ops; + } return ip_tunnel_init(dev); } -- cgit v1.2.3 From 2090714e1d6e80979dd6926be22b0de9ca432273 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 27 Apr 2016 11:29:07 +0200 Subject: gre: build header correctly for collect metadata tunnels In ipgre (i.e. not gretap) + collect metadata mode, the skb was assumed to contain Ethernet header and was encapsulated as ETH_P_TEB. This is not the case, the interface is ARPHRD_IPGRE and the protocol to be used for encapsulation is skb->protocol. Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0abde4236af..f973e0a58993 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -523,7 +523,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb, return ip_route_output_key(net, fl); } -static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) +static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, + __be16 proto) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -575,7 +576,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); - build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), + build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -616,7 +617,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, const struct iphdr *tnl_params; if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; } @@ -660,7 +661,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; } -- cgit v1.2.3 From 946b636f1730c64e05ff7fe8cf7136422fa8ea70 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 27 Apr 2016 14:08:01 +0200 Subject: gre: reject GUE and FOU in collect metadata mode The collect metadata mode does not support GUE nor FOU. This might be implemented later; until then, we should reject such config. I think this is okay to be changed. It's unlikely anyone has such configuration (as it doesn't work anyway) and we may need a way to distinguish whether it's supported or not by the kernel later. For backwards compatibility with iproute2, it's not possible to just check the attribute presence (iproute2 always includes the attribute), the actual value has to be checked, too. Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f973e0a58993..f502d34bcb40 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -948,6 +948,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) if (flags & (GRE_VERSION|GRE_ROUTING)) return -EINVAL; + if (data[IFLA_GRE_COLLECT_METADATA] && + data[IFLA_GRE_ENCAP_TYPE] && + nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) + return -EINVAL; + return 0; } -- cgit v1.2.3 From 2871734e85e920503d49b3a8bc0afbe0773b6036 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 12 Mar 2016 11:12:59 +0100 Subject: batman-adv: fix DAT candidate selection (must use vid) Now that DAT is VLAN aware, it must use the VID when computing the DHT address of the candidate nodes where an entry is going to be stored/retrieved. Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware") Signed-off-by: Antonio Quartulli [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/distributed-arp-table.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index e96d7c745b4a..3e6b2624f980 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -568,6 +568,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT + * @vid: VLAN identifier * * An originator O is selected if and only if its DHT_ID value is one of three * closest values (from the LEFT, with wrap around if needed) then the hash @@ -576,7 +577,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * -batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, + unsigned short vid) { int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; @@ -592,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) return NULL; dat.ip = ip_dst; - dat.vid = 0; + dat.vid = vid; ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); @@ -612,6 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key + * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * * This function copies the skb with pskb_copy() and is sent as unicast packet @@ -622,7 +625,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, - int packet_subtype) + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -631,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *tmp_skb; struct batadv_dat_candidate *cand; - cand = batadv_dat_select_candidates(bat_priv, ip); + cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) goto out; @@ -1022,7 +1025,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_GET); } out: @@ -1150,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); } /** -- cgit v1.2.3 From b6cf5d499fddbfcffe751e81fb9f1a07d6348026 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 14 Apr 2016 09:37:05 +0800 Subject: batman-adv: B.A.T.M.A.N V - make sure iface is reactivated upon NETDEV_UP event At the moment there is no explicit reactivation of an hard-interface upon NETDEV_UP event. In case of B.A.T.M.A.N. IV the interface is reactivated as soon as the next OGM is scheduled for sending, but this mechanism does not work with B.A.T.M.A.N. V. The latter does not rely on the same scheduling mechanism as its predecessor and for this reason the hard-interface remains deactivated forever after being brought down once. This patch fixes the reactivation mechanism by adding a new routing API which explicitly allows each algorithm to perform any needed operation upon interface re-activation. Such API is optional and is implemented by B.A.T.M.A.N. V only and it just takes care of setting the iface status to ACTIVE Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_v.c | 12 ++++++++++++ net/batman-adv/hard-interface.c | 3 +++ net/batman-adv/types.h | 3 +++ 3 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 3315b9a598af..4026f198a734 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -32,10 +32,21 @@ #include "bat_v_elp.h" #include "bat_v_ogm.h" +#include "hard-interface.h" #include "hash.h" #include "originator.h" #include "packet.h" +static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) +{ + /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can + * set the interface as ACTIVE right away, without any risk of race + * condition + */ + if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) + hard_iface->if_status = BATADV_IF_ACTIVE; +} + static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) { int ret; @@ -274,6 +285,7 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", + .bat_iface_activate = batadv_v_iface_activate, .bat_iface_enable = batadv_v_iface_enable, .bat_iface_disable = batadv_v_iface_disable, .bat_iface_update_mac = batadv_v_iface_update_mac, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c61d5b0b24d2..0a7deaf2670a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -407,6 +407,9 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); + if (bat_priv->bat_algo_ops->bat_iface_activate) + bat_priv->bat_algo_ops->bat_iface_activate(hard_iface); + out: if (primary_if) batadv_hardif_put(primary_if); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9abfb3e73c34..443e9b84e07d 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1250,6 +1250,8 @@ struct batadv_forw_packet { * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm + * @bat_iface_activate: start routing mechanisms when hard-interface is brought + * up * @bat_iface_enable: init routing info when hard-interface is enabled * @bat_iface_disable: de-init routing info when hard-interface is disabled * @bat_iface_update_mac: (re-)init mac addresses of the protocol information @@ -1277,6 +1279,7 @@ struct batadv_forw_packet { struct batadv_algo_ops { struct hlist_node list; char *name; + void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface); int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); -- cgit v1.2.3 From a33d970d0b54b09746d5540af8271fad4eb10229 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 11 Mar 2016 16:44:05 +0100 Subject: batman-adv: Fix reference counting of vlan object for tt_local_entry The batadv_tt_local_entry was specific to a batadv_softif_vlan and held an implicit reference to it. But this reference was never stored in form of a pointer in the tt_local_entry itself. Instead batadv_tt_local_remove, batadv_tt_local_table_free and batadv_tt_local_purge_pending_clients depend on a consistent state of bat_priv->softif_vlan_list and that batadv_softif_vlan_get always returns the batadv_softif_vlan object which it has a reference for. But batadv_softif_vlan_get cannot guarantee that because it is working only with rcu_read_lock on this list. It can therefore happen that an vid is in this list twice or that batadv_softif_vlan_get cannot find the batadv_softif_vlan for an vid due to some other list operations taking place at the same time. Instead add a batadv_softif_vlan pointer directly in batadv_tt_local_entry which will be used for the reference counter decremented on release of batadv_tt_local_entry. Fixes: 35df3b298fc8 ("batman-adv: fix TT VLAN inconsistency on VLAN re-add") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 42 ++++---------------------------------- net/batman-adv/types.h | 2 ++ 2 files changed, 6 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0b43e86328a5..9b4551a86535 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -215,6 +215,8 @@ static void batadv_tt_local_entry_release(struct kref *ref) tt_local_entry = container_of(ref, struct batadv_tt_local_entry, common.refcount); + batadv_softif_vlan_put(tt_local_entry->vlan); + kfree_rcu(tt_local_entry, common.rcu); } @@ -673,6 +675,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, kref_get(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; + tt_local->vlan = vlan; /* the batman interface mac and multicast addresses should never be * purged @@ -991,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; - struct batadv_softif_vlan *vlan; struct hlist_head *head; unsigned short vid; u32 i; @@ -1027,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) last_seen_msecs = last_seen_msecs % 1000; no_purge = tt_common_entry->flags & np_flag; - - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan) { - seq_printf(seq, "Cannot retrieve VLAN %d\n", - BATADV_PRINT_VID(vid)); - continue; - } - seq_printf(seq, " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, @@ -1052,9 +1046,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), no_purge ? 0 : last_seen_secs, no_purge ? 0 : last_seen_msecs, - vlan->tt.crc); - - batadv_softif_vlan_put(vlan); + tt_local->vlan->tt.crc); } rcu_read_unlock(); } @@ -1099,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, { struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - struct batadv_softif_vlan *vlan; void *tt_entry_exists; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); @@ -1139,14 +1130,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, /* extra call to free the local tt entry */ batadv_tt_local_entry_put(tt_local_entry); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan) - goto out; - - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - out: if (tt_local_entry) batadv_tt_local_entry_put(tt_local_entry); @@ -1219,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; - struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; u32 i; @@ -1241,14 +1223,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) struct batadv_tt_local_entry, common); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, - tt_common_entry->vid); - if (vlan) { - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - } - batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); @@ -3309,7 +3283,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; - struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -3339,13 +3312,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_tt_local_entry, common); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); - if (vlan) { - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - } - batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 443e9b84e07d..65afd090ab3e 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1073,10 +1073,12 @@ struct batadv_tt_common_entry { * struct batadv_tt_local_entry - translation table local entry data * @common: general translation table data * @last_seen: timestamp used for purging stale tt local entries + * @vlan: soft-interface vlan of the entry */ struct batadv_tt_local_entry { struct batadv_tt_common_entry common; unsigned long last_seen; + struct batadv_softif_vlan *vlan; }; /** -- cgit v1.2.3 From abe59c65225ccd63a5964e2f2a73dd2995b948e7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 11 Mar 2016 16:44:06 +0100 Subject: batman-adv: Fix reference counting of hardif_neigh_node object for neigh_node The batadv_neigh_node was specific to a batadv_hardif_neigh_node and held an implicit reference to it. But this reference was never stored in form of a pointer in the batadv_neigh_node itself. Instead batadv_neigh_node_release depends on a consistent state of hard_iface->neigh_list and that batadv_hardif_neigh_get always returns the batadv_hardif_neigh_node object which it has a reference for. But batadv_hardif_neigh_get cannot guarantee that because it is working only with rcu_read_lock on this list. It can therefore happen that a neigh_addr is in this list twice or that batadv_hardif_neigh_get cannot find the batadv_hardif_neigh_node for an neigh_addr due to some other list operations taking place at the same time. Instead add a batadv_hardif_neigh_node pointer directly in batadv_neigh_node which will be used for the reference counter decremented on release of batadv_neigh_node. Fixes: cef63419f7db ("batman-adv: add list of unique single hop neighbors per hard-interface") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 16 +++++----------- net/batman-adv/types.h | 2 ++ 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index d52f67a0c057..c355a824713c 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -250,7 +250,6 @@ static void batadv_neigh_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; - struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; @@ -262,13 +261,7 @@ static void batadv_neigh_node_release(struct kref *ref) batadv_neigh_ifinfo_put(neigh_ifinfo); } - hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, - neigh_node->addr); - if (hardif_neigh) { - /* batadv_hardif_neigh_get() increases refcount too */ - batadv_hardif_neigh_put(hardif_neigh); - batadv_hardif_neigh_put(hardif_neigh); - } + batadv_hardif_neigh_put(neigh_node->hardif_neigh); if (bao->bat_neigh_free) bao->bat_neigh_free(neigh_node); @@ -665,6 +658,10 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, neigh_node->orig_node = orig_node; neigh_node->last_seen = jiffies; + /* increment unique neighbor refcount */ + kref_get(&hardif_neigh->refcount); + neigh_node->hardif_neigh = hardif_neigh; + /* extra reference for return */ kref_init(&neigh_node->refcount); kref_get(&neigh_node->refcount); @@ -673,9 +670,6 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); spin_unlock_bh(&orig_node->neigh_list_lock); - /* increment unique neighbor refcount */ - kref_get(&hardif_neigh->refcount); - batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 65afd090ab3e..1e47fbe8bb7b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -433,6 +433,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard-interface * @last_seen: when last packet via this neighbor was received + * @hardif_neigh: hardif_neigh of this neighbor * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ @@ -444,6 +445,7 @@ struct batadv_neigh_node { spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ struct batadv_hard_iface *if_incoming; unsigned long last_seen; + struct batadv_hardif_neigh_node *hardif_neigh; struct kref refcount; struct rcu_head rcu; }; -- cgit v1.2.3 From f27337e16f2d0e52a8d05ea599ed13cd266ac291 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Apr 2016 11:04:51 +0200 Subject: ip_tunnel: fix preempt warning in ip tunnel creation/updating After the commit e09acddf873b ("ip_tunnel: replace dst_cache with generic implementation"), a preemption debug warning is triggered on ip4 tunnels updating; the dst cache helper needs to be invoked in unpreemptible context. We don't need to load the cache on tunnel update, so this commit fixes the warning replacing the load with a dst cache reset, which is preempt safe. Fixes: e09acddf873b ("ip_tunnel: replace dst_cache with generic implementation") Reported-by: Eric Dumazet Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6aad0192443d..a69ed94bda1b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, - fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) dev->flags |= IFF_POINTOPOINT; + + dst_cache_reset(&tunnel->dst_cache); } if (!tdev && tunnel->parms.link) -- cgit v1.2.3 From 018f8258582381bcce484312f0e9ec2970d0383e Mon Sep 17 00:00:00 2001 From: Wang Shanker Date: Fri, 29 Apr 2016 01:29:43 +0800 Subject: net: l2tp: fix reversed udp6 checksum flags This patch fixes a bug which causes the behavior of whether to ignore udp6 checksum of udp6 encapsulated l2tp tunnel contrary to what userspace program requests. When the flag `L2TP_ATTR_UDP_ZERO_CSUM6_RX` is set by userspace, it is expected that udp6 checksums of received packets of the l2tp tunnel to create should be ignored. In `l2tp_netlink.c`: `l2tp_nl_cmd_tunnel_create()`, `cfg.udp6_zero_rx_checksums` is set according to the flag, and then passed to `l2tp_core.c`: `l2tp_tunnel_create()` and then `l2tp_tunnel_sock_create()`. In `l2tp_tunnel_sock_create()`, `udp_conf.use_udp6_rx_checksums` is set the same to `cfg.udp6_zero_rx_checksums`. However, if we want the checksum to be ignored, `udp_conf.use_udp6_rx_checksums` should be set to `false`, i.e. be set to the contrary. Similarly, the same should be done to `udp_conf.use_udp6_tx_checksums`. Signed-off-by: Miao Wang Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index afca2eb4dfa7..6edfa9980314 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, sizeof(udp_conf.peer_ip6)); udp_conf.use_udp6_tx_checksums = - cfg->udp6_zero_tx_checksums; + ! cfg->udp6_zero_tx_checksums; udp_conf.use_udp6_rx_checksums = - cfg->udp6_zero_rx_checksums; + ! cfg->udp6_zero_rx_checksums; } else #endif { -- cgit v1.2.3 From 90e5d0db2b221f0cbbb91e9e61fdb7dbb9e1afc2 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 28 Apr 2016 19:24:32 -0400 Subject: soreuseport: Fix TCP listener hash collision I forgot to include a check for listener port equality when deciding if two sockets should belong to the same reuseport group. This was not caught previously because it's only necessary when two listening sockets for the same user happen to hash to the same listener bucket. The same error does not exist in the UDP path. Fixes: c125e80b8868("soreuseport: fast reuseport TCP socket selection") Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bc68eced0105..0d9e9d7bb029 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -470,6 +470,7 @@ static int inet_reuseport_add_sock(struct sock *sk, const struct sock *sk2, bool match_wildcard)) { + struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); @@ -479,6 +480,7 @@ static int inet_reuseport_add_sock(struct sock *sk, sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && + inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && saddr_same(sk, sk2, false)) return reuseport_add_sock(sk, sk2); -- cgit v1.2.3 From efe790502be85c60daa65c8aa51f05c333186e12 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Fri, 29 Apr 2016 10:40:24 -0400 Subject: tipc: only process unicast on intended node We have observed complete lock up of broadcast-link transmission due to unacknowledged packets never being removed from the 'transmq' queue. This is traced to nodes having their ack field set beyond the sequence number of packets that have actually been transmitted to them. Consider an example where node 1 has sent 10 packets to node 2 on a link and node 3 has sent 20 packets to node 2 on another link. We see examples of an ack from node 2 destined for node 3 being treated as an ack from node 2 at node 1. This leads to the ack on the node 1 to node 2 link being increased to 20 even though we have only sent 10 packets. When node 1 does get around to sending further packets, none of the packets with sequence numbers less than 21 are actually removed from the transmq. To resolve this we reinstate some code lost in commit d999297c3dbb ("tipc: reduce locking scope during packet reception") which ensures that only messages destined for the receiving node are processed by that node. This prevents the sequence numbers from getting out of sync and resolves the packet leakage, thereby resolving the broadcast-link transmission lock-ups we observed. While we are aware that this change only patches over a root problem that we still haven't identified, this is a sanity test that it is always legitimate to do. It will remain in the code even after we identify and fix the real problem. Reviewed-by: Chris Packham Reviewed-by: John Thompson Signed-off-by: Hamish Martin Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178fd3850..9aaa1bc566ae 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1444,6 +1444,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int bearer_id = b->identity; struct tipc_link_entry *le; u16 bc_ack = msg_bcast_ack(hdr); + u32 self = tipc_own_addr(net); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1460,6 +1461,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) return tipc_node_bc_rcv(net, skb, bearer_id); } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + /* Locate neighboring node that sent packet */ n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) -- cgit v1.2.3 From b7f8fe251e4609e2a437bd2c2dea01e61db6849c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 29 Apr 2016 23:31:32 +0200 Subject: gre: do not pull header in ICMP error processing iptunnel_pull_header expects that IP header was already pulled; with this expectation, it pulls the tunnel header. This is not true in gre_err. Furthermore, ipv4_update_pmtu and ipv4_redirect expect that skb->data points to the IP header. We cannot pull the tunnel header in this path. It's just a matter of not calling iptunnel_pull_header - we don't need any of its effects. Fixes: bda7bb463436 ("gre: Allow multiple protocol listener for gre protocol.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f502d34bcb40..205a2b8a5a84 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -179,6 +179,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -238,7 +239,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -341,7 +342,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -419,6 +420,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -428,7 +430,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) -- cgit v1.2.3 From 6071bd1aa13ed9e41824bafad845b7b7f4df5cfd Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 2 May 2016 12:20:15 -0400 Subject: netem: Segment GSO packets on enqueue This was recently reported to me, and reproduced on the latest net kernel, when attempting to run netperf from a host that had a netem qdisc attached to the egress interface: [ 788.073771] ---------------------[ cut here ]--------------------------- [ 788.096716] WARNING: at net/core/dev.c:2253 skb_warn_bad_offload+0xcd/0xda() [ 788.129521] bnx2: caps=(0x00000001801949b3, 0x0000000000000000) len=2962 data_len=0 gso_size=1448 gso_type=1 ip_summed=3 [ 788.182150] Modules linked in: sch_netem kvm_amd kvm crc32_pclmul ipmi_ssif ghash_clmulni_intel sp5100_tco amd64_edac_mod aesni_intel lrw gf128mul glue_helper ablk_helper edac_mce_amd cryptd pcspkr sg edac_core hpilo ipmi_si i2c_piix4 k10temp fam15h_power hpwdt ipmi_msghandler shpchp acpi_power_meter pcc_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ahci ata_generic pata_acpi ttm libahci crct10dif_pclmul pata_atiixp tg3 libata crct10dif_common drm crc32c_intel ptp serio_raw bnx2 r8169 hpsa pps_core i2c_core mii dm_mirror dm_region_hash dm_log dm_mod [ 788.465294] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G W ------------ 3.10.0-327.el7.x86_64 #1 [ 788.511521] Hardware name: HP ProLiant DL385p Gen8, BIOS A28 12/17/2012 [ 788.542260] ffff880437c036b8 f7afc56532a53db9 ffff880437c03670 ffffffff816351f1 [ 788.576332] ffff880437c036a8 ffffffff8107b200 ffff880633e74200 ffff880231674000 [ 788.611943] 0000000000000001 0000000000000003 0000000000000000 ffff880437c03710 [ 788.647241] Call Trace: [ 788.658817] [] dump_stack+0x19/0x1b [ 788.686193] [] warn_slowpath_common+0x70/0xb0 [ 788.713803] [] warn_slowpath_fmt+0x5c/0x80 [ 788.741314] [] ? ___ratelimit+0x93/0x100 [ 788.767018] [] skb_warn_bad_offload+0xcd/0xda [ 788.796117] [] skb_checksum_help+0x17c/0x190 [ 788.823392] [] netem_enqueue+0x741/0x7c0 [sch_netem] [ 788.854487] [] dev_queue_xmit+0x2a8/0x570 [ 788.880870] [] ip_finish_output+0x53d/0x7d0 ... The problem occurs because netem is not prepared to handle GSO packets (as it uses skb_checksum_help in its enqueue path, which cannot manipulate these frames). The solution I think is to simply segment the skb in a simmilar fashion to the way we do in __dev_queue_xmit (via validate_xmit_skb), with some minor changes. When we decide to corrupt an skb, if the frame is GSO, we segment it, corrupt the first segment, and enqueue the remaining ones. tested successfully by myself on the latest net kernel, to which this applies Signed-off-by: Neil Horman CC: Jamal Hadi Salim CC: "David S. Miller" CC: netem@lists.linux-foundation.org CC: eric.dumazet@gmail.com CC: stephen@networkplumber.org Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9640bb39a5d2..4befe97a9034 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) sch->q.qlen++; } +/* netem can't properly corrupt a megapacket (like we get from GSO), so instead + * when we statistically choose to corrupt one, we instead segment it, returning + * the first packet to be corrupted, and re-enqueue the remaining frames + */ +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct sk_buff *segs; + netdev_features_t features = netif_skb_features(skb); + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) { + qdisc_reshape_fail(skb, sch); + return NULL; + } + consume_skb(skb); + return segs; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; + struct sk_buff *segs = NULL; + unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); + int nb = 0; int count = 1; + int rc = NET_XMIT_SUCCESS; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) @@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (skb_is_gso(skb)) { + segs = netem_segment(skb, sch); + if (!segs) + return NET_XMIT_DROP; + } else { + segs = skb; + } + + skb = segs; + segs = segs->next; + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) - return qdisc_drop(skb, sch); + skb_checksum_help(skb))) { + rc = qdisc_drop(skb, sch); + goto finish_segs; + } skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); @@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.requeues++; } +finish_segs: + if (segs) { + while (segs) { + skb2 = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + last_len = segs->len; + rc = qdisc_enqueue(segs, sch); + if (rc != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(rc)) + qdisc_qstats_drop(sch); + } else { + nb++; + len += last_len; + } + segs = skb2; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + } return NET_XMIT_SUCCESS; } -- cgit v1.2.3 From 996e802187889f1cd412e6929c9344b92ccb78c4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 2 May 2016 09:25:10 -0700 Subject: net: Disable segmentation if checksumming is not supported In the case of the mlx4 and mlx5 driver they do not support IPv6 checksum offload for tunnels. With this being the case we should disable GSO in addition to the checksum offload features when we find that a device cannot perform a checksum on a given packet type. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 77a71cd68535..5c925ac50b95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2802,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_CSUM_MASK; + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } -- cgit v1.2.3 From eb192840266fab3e3da644018121eed30153355d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 2 May 2016 11:24:51 -0700 Subject: RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock There is a race condition between rds_send_xmit -> rds_tcp_xmit and the code that deals with resolution of duelling syns added by commit 241b271952eb ("RDS-TCP: Reset tcp callbacks if re-using an outgoing socket in rds_tcp_accept_one()"). Specifically, we may end up derefencing a null pointer in rds_send_xmit if we have the interleaving sequence: rds_tcp_accept_one rds_send_xmit conn is RDS_CONN_UP, so invoke rds_tcp_xmit