diff options
| -rw-r--r-- | MAINTAINERS | 8 | ||||
| -rw-r--r-- | drivers/net/Kconfig | 16 | ||||
| -rw-r--r-- | drivers/net/Makefile | 1 | ||||
| -rw-r--r-- | drivers/net/amt.c | 3296 | ||||
| -rw-r--r-- | include/net/amt.h | 385 | ||||
| -rw-r--r-- | include/uapi/linux/amt.h | 62 | ||||
| -rw-r--r-- | tools/testing/selftests/net/Makefile | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/net/amt.sh | 284 | ||||
| -rw-r--r-- | tools/testing/selftests/net/config | 1 |
9 files changed, 4054 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 3b85f039fbf9..917f360b3ece 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1020,6 +1020,14 @@ S: Maintained F: Documentation/devicetree/bindings/iio/light/ams,as73211.yaml F: drivers/iio/light/as73211.c +AMT (Automatic Multicast Tunneling) +M: Taehee Yoo <ap420073@gmail.com> +L: netdev@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git +F: drivers/net/amt.c + ANALOG DEVICES INC AD7192 DRIVER M: Alexandru Tachici <alexandru.tachici@analog.com> L: linux-iio@vger.kernel.org diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index dd335ae1122b..034dbd487c33 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -291,6 +291,22 @@ config GTP To compile this drivers as a module, choose M here: the module will be called gtp. +config AMT + tristate "Automatic Multicast Tunneling (AMT)" + depends on INET && IP_MULTICAST + select NET_UDP_TUNNEL + help + This allows one to create AMT(Automatic Multicast Tunneling) + virtual interfaces that provide multicast tunneling. + There are two roles, Gateway, and Relay. + Gateway Encapsulates IGMP/MLD traffic from listeners to the Relay. + Gateway Decapsulates multicast traffic from the Relay to Listeners. + Relay Encapsulates multicast traffic from Sources to Gateway. + Relay Decapsulates IGMP/MLD traffic from Gateway. + + To compile this drivers as a module, choose M here: the module + will be called amt. + config MACSEC tristate "IEEE 802.1AE MAC-level encryption (MACsec)" select CRYPTO diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 739838623cf6..50b23e71065f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o obj-$(CONFIG_MACSEC) += macsec.o +obj-$(CONFIG_AMT) += amt.o obj-$(CONFIG_MACVLAN) += macvlan.o obj-$(CONFIG_MACVTAP) += macvtap.o obj-$(CONFIG_MII) += mii.o diff --git a/drivers/net/amt.c b/drivers/net/amt.c new file mode 100644 index 000000000000..60a7053a9cf7 --- /dev/null +++ b/drivers/net/amt.c @@ -0,0 +1,3296 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (c) 2021 Taehee Yoo <ap420073@gmail.com> */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/udp.h> +#include <linux/jhash.h> +#include <linux/if_tunnel.h> +#include <linux/net.h> +#include <linux/igmp.h> +#include <linux/workqueue.h> +#include <net/net_namespace.h> +#include <net/protocol.h> +#include <net/ip.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/icmp.h> +#include <net/mld.h> +#include <net/amt.h> +#include <uapi/linux/amt.h> +#include <linux/security.h> +#include <net/gro_cells.h> +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/if_inet6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#include <net/inet_common.h> +#include <net/ip6_checksum.h> + +static struct workqueue_struct *amt_wq; + +static HLIST_HEAD(source_gc_list); +/* Lock for source_gc_list */ +static spinlock_t source_gc_lock; +static struct delayed_work source_gc_wq; +static char *status_str[] = { + "AMT_STATUS_INIT", + "AMT_STATUS_SENT_DISCOVERY", + "AMT_STATUS_RECEIVED_DISCOVERY", + "AMT_STATUS_SENT_ADVERTISEMENT", + "AMT_STATUS_RECEIVED_ADVERTISEMENT", + "AMT_STATUS_SENT_REQUEST", + "AMT_STATUS_RECEIVED_REQUEST", + "AMT_STATUS_SENT_QUERY", + "AMT_STATUS_RECEIVED_QUERY", + "AMT_STATUS_SENT_UPDATE", + "AMT_STATUS_RECEIVED_UPDATE", +}; + +static char *type_str[] = { + "AMT_MSG_DISCOVERY", + "AMT_MSG_ADVERTISEMENT", + "AMT_MSG_REQUEST", + "AMT_MSG_MEMBERSHIP_QUERY", + "AMT_MSG_MEMBERSHIP_UPDATE", + "AMT_MSG_MULTICAST_DATA", + "AMT_MSG_TEARDOWM", +}; + +static char *action_str[] = { + "AMT_ACT_GMI", + "AMT_ACT_GMI_ZERO", + "AMT_ACT_GT", + "AMT_ACT_STATUS_FWD_NEW", + "AMT_ACT_STATUS_D_FWD_NEW", + "AMT_ACT_STATUS_NONE_NEW", +}; + +static struct igmpv3_grec igmpv3_zero_grec; + +#if IS_ENABLED(CONFIG_IPV6) +#define MLD2_ALL_NODE_INIT { { { 0xff, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } } +static struct in6_addr mld2_all_node = MLD2_ALL_NODE_INIT; +static struct mld2_grec mldv2_zero_grec; +#endif + +static struct amt_skb_cb *amt_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct amt_skb_cb) + sizeof(struct qdisc_skb_cb) > + sizeof_field(struct sk_buff, cb)); + + return (struct amt_skb_cb *)((void *)skb->cb + + sizeof(struct qdisc_skb_cb)); +} + +static void __amt_source_gc_work(void) +{ + struct amt_source_node *snode; + struct hlist_head gc_list; + struct hlist_node *t; + + spin_lock_bh(&source_gc_lock); + hlist_move_list(&source_gc_list, &gc_list); + spin_unlock_bh(&source_gc_lock); + + hlist_for_each_entry_safe(snode, t, &gc_list, node) { + hlist_del_rcu(&snode->node); + kfree_rcu(snode, rcu); + } +} + +static void amt_source_gc_work(struct work_struct *work) +{ + __amt_source_gc_work(); + + spin_lock_bh(&source_gc_lock); + mod_delayed_work(amt_wq, &source_gc_wq, + msecs_to_jiffies(AMT_GC_INTERVAL)); + spin_unlock_bh(&source_gc_lock); +} + +static bool amt_addr_equal(union amt_addr *a, union amt_addr *b) +{ + return !memcmp(a, b, sizeof(union amt_addr)); +} + +static u32 amt_source_hash(struct amt_tunnel_list *tunnel, union amt_addr *src) +{ + u32 hash = jhash(src, sizeof(*src), tunnel->amt->hash_seed); + + return reciprocal_scale(hash, tunnel->amt->hash_buckets); +} + +static bool amt_status_filter(struct amt_source_node *snode, + enum amt_filter filter) +{ + bool rc = false; + + switch (filter) { + case AMT_FILTER_FWD: + if (snode->status == AMT_SOURCE_STATUS_FWD && + snode->flags == AMT_SOURCE_OLD) + rc = true; + break; + case AMT_FILTER_D_FWD: + if (snode->status == AMT_SOURCE_STATUS_D_FWD && + snode->flags == AMT_SOURCE_OLD) + rc = true; + break; + case AMT_FILTER_FWD_NEW: + if (snode->status == AMT_SOURCE_STATUS_FWD && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + case AMT_FILTER_D_FWD_NEW: + if (snode->status == AMT_SOURCE_STATUS_D_FWD && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + case AMT_FILTER_ALL: + rc = true; + break; + case AMT_FILTER_NONE_NEW: + if (snode->status == AMT_SOURCE_STATUS_NONE && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + case AMT_FILTER_BOTH: + if ((snode->status == AMT_SOURCE_STATUS_D_FWD || + snode->status == AMT_SOURCE_STATUS_FWD) && + snode->flags == AMT_SOURCE_OLD) + rc = true; + break; + case AMT_FILTER_BOTH_NEW: + if ((snode->status == AMT_SOURCE_STATUS_D_FWD || + snode->status == AMT_SOURCE_STATUS_FWD) && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + default: + WARN_ON_ONCE(1); + break; + } + + return rc; +} + +static struct amt_source_node *amt_lookup_src(struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + enum amt_filter filter, + union amt_addr *src) +{ + u32 hash = amt_source_hash(tunnel, src); + struct amt_source_node *snode; + + hlist_for_each_entry_rcu(snode, &gnode->sources[hash], node) + if (amt_status_filter(snode, filter) && + amt_addr_equal(&snode->source_addr, src)) + return snode; + + return NULL; +} + +static u32 amt_group_hash(struct amt_tunnel_list *tunnel, union amt_addr *group) +{ + u32 hash = jhash(group, sizeof(*group), tunnel->amt->hash_seed); + + return reciprocal_scale(hash, tunnel->amt->hash_buckets); +} + +static struct amt_group_node *amt_lookup_group(struct amt_tunnel_list *tunnel, + union amt_addr *group, + union amt_addr *host, + bool v6) +{ + u32 hash = amt_group_hash(tunnel, group); + struct amt_group_node *gnode; + + hlist_for_each_entry_rcu(gnode, &tunnel->groups[hash], node) { + if (amt_addr_equal(&gnode->group_addr, group) && + amt_addr_equal(&gnode->host_addr, host) && + gnode->v6 == v6) + return gnode; + } + + return NULL; +} + +static void amt_destroy_source(struct amt_source_node *snode) +{ + struct amt_group_node *gnode = snode->gnode; + struct amt_tunnel_list *tunnel; + + tunnel = gnode->tunnel_list; + + if (!gnode->v6) { + netdev_dbg(snode->gnode->amt->dev, + "Delete source %pI4 from %pI4\n", + &snode->source_addr.ip4, + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + } else { + netdev_dbg(snode->gnode->amt->dev, + "Delete source %pI6 from %pI6\n", + &snode->source_addr.ip6, + &gnode->group_addr.ip6); +#endif + } + + cancel_delayed_work(&snode->source_timer); + hlist_del_init_rcu(&snode->node); + tunnel->nr_sources--; + gnode->nr_sources--; + spin_lock_bh(&source_gc_lock); + hlist_add_head_rcu(&snode->node, &source_gc_list); + spin_unlock_bh(&source_gc_lock); +} + +static void amt_del_group(struct amt_dev *amt, struct amt_group_node *gnode) +{ + struct amt_source_node *snode; + struct hlist_node *t; + int i; + + if (cancel_delayed_work(&gnode->group_timer)) + dev_put(amt->dev); + hlist_del_rcu(&gnode->node); + gnode->tunnel_list->nr_groups--; + + if (!gnode->v6) + netdev_dbg(amt->dev, "Leave group %pI4\n", + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(amt->dev, "Leave group %pI6\n", + &gnode->group_addr.ip6); +#endif + for (i = 0; i < amt->hash_buckets; i++) + hlist_for_each_entry_safe(snode, t, &gnode->sources[i], node) + amt_destroy_source(snode); + + /* tunnel->lock was acquired outside of amt_del_group() + * But rcu_read_lock() was acquired too so It's safe. + */ + kfree_rcu(gnode, rcu); +} + +/* If a source timer expires with a router filter-mode for the group of + * INCLUDE, the router concludes that traffic from this particular + * source is no longer desired on the attached network, and deletes the + * associated source record. + */ +static void amt_source_work(struct work_struct *work) +{ + struct amt_source_node *snode = container_of(to_delayed_work(work), + struct amt_source_node, + source_timer); + struct amt_group_node *gnode = snode->gnode; + struct amt_dev *amt = gnode->amt; + struct amt_tunnel_list *tunnel; + + tunnel = gnode->tunnel_list; + spin_lock_bh(&tunnel->lock); + rcu_read_lock(); + if (gnode->filter_mode == MCAST_INCLUDE) { + amt_destroy_source(snode); + if (!gnode->nr_sources) + amt_del_group(amt, gnode); + } else { + /* When a router filter-mode for a group is EXCLUDE, + * source records are only deleted when the group timer expires + */ + snode->status = AMT_SOURCE_STATUS_D_FWD; + } + rcu_read_unlock(); + spin_unlock_bh(&tunnel->lock); +} + +static void amt_act_src(struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + struct amt_source_node *snode, + enum amt_act act) +{ + struct amt_dev *amt = tunnel->amt; + + switch (act) { + case AMT_ACT_GMI: + mod_delayed_work(amt_wq, &snode->source_timer, + msecs_to_jiffies(amt_gmi(amt))); + break; + case AMT_ACT_GMI_ZERO: + cancel_delayed_work(&snode->source_timer); + break; + case AMT_ACT_GT: + mod_delayed_work(amt_wq, &snode->source_timer, + gnode->group_timer.timer.expires); + break; + case AMT_ACT_STATUS_FWD_NEW: + snode->status = AMT_SOURCE_STATUS_FWD; + snode->flags = AMT_SOURCE_NEW; + break; + case AMT_ACT_STATUS_D_FWD_NEW: + snode->status = AMT_SOURCE_STATUS_D_FWD; + snode->flags = AMT_SOURCE_NEW; + break; + case AMT_ACT_STATUS_NONE_NEW: + cancel_delayed_work(&snode->source_timer); + snode->status = AMT_SOURCE_STATUS_NONE; + snode->flags = AMT_SOURCE_NEW; + break; + default: + WARN_ON_ONCE(1); + return; + } + + if (!gnode->v6) + netdev_dbg(amt->dev, "Source %pI4 from %pI4 Acted %s\n", + &snode->source_addr.ip4, + &gnode->group_addr.ip4, + action_str[act]); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(amt->dev, "Source %pI6 from %pI6 Acted %s\n", + &snode->source_addr.ip6, + &gnode->group_addr.ip6, + action_str[act]); +#endif +} + +static struct amt_source_node *amt_alloc_snode(struct amt_group_node *gnode, + union amt_addr *src) +{ + struct amt_source_node *snode; + + snode = kzalloc(sizeof(*snode), GFP_ATOMIC); + if (!snode) + return NULL; + + memcpy(&snode->source_addr, src, sizeof(union amt_addr)); + snode->gnode = gnode; + snode->status = AMT_SOURCE_STATUS_NONE; + snode->flags = AMT_SOURCE_NEW; + INIT_HLIST_NODE(&snode->node); + INIT_DELAYED_WORK(&snode->source_timer, amt_source_work); + + return snode; +} + +/* RFC 3810 - 7.2.2. Definition of Filter Timers + * + * Router Mode Filter Timer Actions/Comments + * ----------- ----------------- ---------------- + * + * INCLUDE Not Used All listeners in + * INCLUDE mode. + * + * EXCLUDE Timer > 0 At least one listener + * in EXCLUDE mode. + * + * EXCLUDE Timer == 0 No more listeners in + * EXCLUDE mode for the + * multicast address. + * If the Requested List + * is empty, delete + * Multicast Address + * Record. If not, switch + * to INCLUDE filter mode; + * the sources in the + * Requested List are + * moved to the Include + * List, and the Exclude + * List is deleted. + */ +static void amt_group_work(struct work_struct *work) +{ + struct amt_group_node *gnode = container_of(to_delayed_work(work), + struct amt_group_node, + group_timer); + struct amt_tunnel_list *tunnel = gnode->tunnel_list; + struct amt_dev *amt = gnode->amt; + struct amt_source_node *snode; + bool delete_group = true; + struct hlist_node *t; + int i, buckets; + + buckets = amt->hash_buckets; + + spin_lock_bh(&tunnel->lock); + if (gnode->filter_mode == MCAST_INCLUDE) { + /* Not Used */ + spin_unlock_bh(&tunnel->lock); + goto out; + } + + rcu_read_lock(); + for (i = 0; i < buckets; i++) { + hlist_for_each_entry_safe(snode, t, + &gnode->sources[i], node) { + if (!delayed_work_pending(&snode->source_timer) || + snode->status == AMT_SOURCE_STATUS_D_FWD) { + amt_destroy_source(snode); + } else { + delete_group = false; + snode->status = AMT_SOURCE_STATUS_FWD; + } + } + } + if (delete_group) + amt_del_group(amt, gnode); + else + gnode->filter_mode = MCAST_INCLUDE; + rcu_read_unlock(); + spin_unlock_bh(&tunnel->lock); +out: + dev_put(amt->dev); +} + +/* Non-existant group is created as INCLUDE {empty}: + * + * RFC 3376 - 5.1. Action on Change of Interface State + * + * If no interface state existed for that multicast address before + * the change (i.e., the change consisted of creating a new + * per-interface record), or if no state exists after the change + * (i.e., the change consisted of deleting a per-interface record), + * then the "non-existent" state is considered to have a filter mode + * of INCLUDE and an empty source list. + */ +static struct amt_group_node *amt_add_group(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + union amt_addr *group, + union amt_addr *host, + bool v6) +{ + struct amt_group_node *gnode; + u32 hash; + int i; + + if (tunnel->nr_groups >= amt->max_groups) + return ERR_PTR(-ENOSPC); + + gnode = kzalloc(sizeof(*gnode) + + (sizeof(struct hlist_head) * amt->hash_buckets), + GFP_ATOMIC); + if (unlikely(!gnode)) + return ERR_PTR(-ENOMEM); + + gnode->amt = amt; + gnode->group_addr = *group; + gnode->host_addr = *host; + gnode->v6 = v6; + gnode->tunnel_list = tunnel; + gnode->filter_mode = MCAST_INCLUDE; + INIT_HLIST_NODE(&gnode->node); + INIT_DELAYED_WORK(&gnode->group_timer, amt_group_work); + for (i = 0; i < amt->hash_buckets; i++) + INIT_HLIST_HEAD(&gnode->sources[i]); + + hash = amt_group_hash(tunnel, group); + hlist_add_head_rcu(&gnode->node, &tunnel->groups[hash]); + tunnel->nr_groups++; + + if (!gnode->v6) + netdev_dbg(amt->dev, "Join group %pI4\n", + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(amt->dev, "Join group %pI6\n", + &gnode->group_addr.ip6); +#endif + + return gnode; +} + +static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt) +{ + u8 ra[AMT_IPHDR_OPTS] = { IPOPT_RA, 4, 0, 0 }; + int hlen = LL_RESERVED_SPACE(amt->dev); + int tlen = amt->dev->needed_tailroom; + struct igmpv3_query *ihv3; + void *csum_start = NULL; + __sum16 *csum = NULL; + struct sk_buff *skb; + struct ethhdr *eth; + struct iphdr *iph; + unsigned int len; + int offset; + + len = hlen + tlen + sizeof(*iph) + AMT_IPHDR_OPTS + sizeof(*ihv3); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) + return NULL; + + skb_reserve(skb, hlen); + skb_push(skb, sizeof(*eth)); + skb->protocol = htons(ETH_P_IP); + skb_reset_mac_header(skb); + skb->priority = TC_PRIO_CONTROL; + skb_put(skb, sizeof(*iph)); + skb_put_data(skb, ra, sizeof(ra)); + skb_put(skb, sizeof(*ihv3)); + skb_pull(skb, sizeof(*eth)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr) + AMT_IPHDR_OPTS) >> 2; + iph->tos = AMT_TOS; + iph->tot_len = htons(sizeof(*iph) + AMT_IPHDR_OPTS + sizeof(*ihv3)); + iph->frag_off = htons(IP_DF); + iph->ttl = 1; + iph->id = 0; + iph->protocol = IPPROTO_IGMP; + iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); + iph->saddr = htonl(INADDR_ANY); + ip_send_check(iph); + + eth = eth_hdr(skb); + ether_addr_copy(eth->h_source, amt->dev->dev_addr); + ip_eth_mc_map(htonl(INADDR_ALLHOSTS_GROUP), eth->h_dest); + eth->h_proto = htons(ETH_P_IP); + + ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS); + skb_reset_transport_header(skb); + ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; + ihv3->code = 1; + ihv3->group = 0; + ihv3->qqic = amt->qi; + ihv3->nsrcs = 0; + ihv3->resv = 0; + ihv3->suppress = false; + ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv; + ihv3->csum = 0; + csum = &ihv3->csum; + csum_start = (void *)ihv3; + *csum = ip_compute_csum(csum_start, sizeof(*ihv3)); + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + skb->ip_summed = CHECKSUM_NONE; + + skb_push(skb, sizeof(*eth) + sizeof(*iph) + AMT_IPHDR_OPTS); + + return skb; +} + +static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status, + bool validate) +{ + if (validate && amt->status >= status) + return; + netdev_dbg(amt->dev, "Update GW status %s -> %s", + status_str[amt->status], status_str[status]); + amt->status = status; +} + +static void __amt_update_relay_status(struct amt_tunnel_list *tunnel, + enum amt_status status, + bool validate) +{ + if (validate && tunnel->status >= status) + return; + netdev_dbg(tunnel->amt->dev, + "Update Tunnel(IP = %pI4, PORT = %u) status %s -> %s", + &tunnel->ip4, ntohs(tunnel->source_port), + status_str[tunnel->status], status_str[status]); + tunnel->status = status; +} + +static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status, + bool validate) +{ + spin_lock_bh(&amt->lock); + __amt_update_gw_status(amt, status, validate); + spin_unlock_bh(&amt->lock); +} + +static void amt_update_relay_status(struct amt_tunnel_list *tunnel, + enum amt_status status, bool validate) +{ + spin_lock_bh(&tunnel->lock); + __amt_update_relay_status(tunnel, status, validate); + spin_unlock_bh(&tunnel->lock); +} + +static void amt_send_discovery(struct amt_dev *amt) +{ + struct amt_header_discovery *amtd; + int hlen, tlen, offset; + struct socket *sock; + struct udphdr *udph; + struct sk_buff *skb; + struct iphdr *iph; + struct rtable *rt; + struct flowi4 fl4; + u32 len; + int err; + + rcu_read_lock(); + sock = rcu_dereference(amt->sock); + if (!sock) + goto out; + + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev)) + goto out; + + rt = ip_route_output_ports(amt->net, &fl4, sock->sk, + amt->discovery_ip, amt->local_ip, + amt->gw_port, amt->relay_port, + IPPROTO_UDP, 0, + amt->stream_dev->ifindex); + if (IS_ERR(rt)) { + amt->dev->stats.tx_errors++; + goto out; + } + + hlen = LL_RESERVED_SPACE(amt->dev); + tlen = amt->dev->needed_tailroom; + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtd); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) { + ip_rt_put(rt); + amt->dev->stats.tx_errors++; + goto out; + } + + skb->priority = TC_PRIO_CONTROL; + skb_dst_set(skb, &rt->dst); + + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtd); + skb_reset_network_header(skb); + skb_put(skb, len); + amtd = skb_pull(skb, sizeof(*iph) + sizeof(*udph)); + amtd->version = 0; + amtd->type = AMT_MSG_DISCOVERY; + amtd->reserved = 0; + amtd->nonce = amt->nonce; + skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + udph->source = amt->gw_port; + udph->dest = amt->relay_port; + udph->len = htons(sizeof(*udph) + sizeof(*amtd)); + udph->check = 0; + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + udph->check = csum_tcpudp_magic(amt->local_ip, amt->discovery_ip, + sizeof(*udph) + sizeof(*amtd), + IPPROTO_UDP, skb->csum); + + skb_push(skb, sizeof(*iph)); + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr)) >> 2; + iph->tos = AMT_TOS; + iph->frag_off = 0; + iph->ttl = ip4_dst_hoplimit(&rt->dst); + iph->daddr = amt->discovery_ip; + iph->saddr = amt->local_ip; + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(len); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(amt->net, skb, NULL); + ip_send_check(iph); + err = ip_local_out(amt->net, sock->sk, skb); + if (unlikely(net_xmit_eval(err))) + amt->dev->stats.tx_errors++; + + spin_lock_bh(&amt->lock); + __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true); + spin_unlock_bh(&amt->lock); +out: + rcu_read_unlock(); +} + +static void amt_send_request(struct amt_dev *amt, bool v6) +{ + struct amt_header_request *amtrh; + int hlen, tlen, offset; + struct socket *sock; + struct udphdr *udph; + struct sk_buff *skb; + struct iphdr *iph; + struct rtable *rt; + struct flowi4 fl4; + u32 len; + int err; + + rcu_read_lock(); + sock = rcu_dereference(amt->sock); + if (!sock) + goto out; + + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev)) + goto out; + + rt = ip_route_output_ports(amt->net, &fl4, sock->sk, + amt->remote_ip, amt->local_ip, + amt->gw_port, amt->relay_port, + IPPROTO_UDP, 0, + amt->stream_dev->ifindex); + if (IS_ERR(rt)) { + amt->dev->stats.tx_errors++; + goto out; + } + + hlen = LL_RESERVED_SPACE(amt->dev); + tlen = amt->dev->needed_tailroom; + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) { + ip_rt_put(rt); + amt->dev->stats.tx_errors++; + goto out; + } + + skb->priority = TC_PRIO_CONTROL; + skb_dst_set(skb, &rt->dst); + + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh); + skb_reset_network_header(skb); + skb_put(skb, len); + amtrh = skb_pull(skb, sizeof(*iph) + sizeof(*udph)); + amtrh->version = 0; + amtrh->type = AMT_MSG_REQUEST; + amtrh->reserved1 = 0; + amtrh->p = v6; + amtrh->reserved2 = 0; + amtrh->nonce = amt->nonce; + skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + udph->source = amt->gw_port; + udph->dest = amt->relay_port; + udph->len = htons(sizeof(*amtrh) + sizeof(*udph)); + udph->check = 0; + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + udph->check = csum_tcpudp_magic(amt->local_ip, amt->remote_ip, + sizeof(*udph) + sizeof(*amtrh), + IPPROTO_UDP, skb->csum); + + skb_push(skb, sizeof(*iph)); + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr)) >> 2; + iph->tos = AMT_TOS; + iph->frag_off = 0; + iph->ttl = ip4_dst_hoplimit(&rt->dst); + iph->daddr = amt->remote_ip; + iph->saddr = amt->local_ip; + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(len); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(amt->net, skb, NULL); + ip_send_check(iph); + err = ip_local_out(amt->net, sock->sk, skb); + if (unlikely(net_xmit_eval(err))) + amt->dev->stats.tx_errors++; + +out: + rcu_read_unlock(); +} + +static void amt_send_igmp_gq(struct amt_dev *amt, + struct amt_tunnel_list *tunnel) +{ + struct sk_buff *skb; + + skb = amt_build_igmp_gq(amt); + if (!skb) + return; + + amt_skb_cb(skb)->tunnel = tunnel; + dev_queue_xmit(skb); +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct sk_buff *amt_build_mld_gq(struct amt_dev *amt) +{ + u8 ra[AMT_IP6HDR_OPTS] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, + 2, 0, 0, IPV6_TLV_PAD1, IPV6_TLV_PAD1 }; + int hlen = LL_RESERVED_SPACE(amt->dev); + int tlen = amt->dev->needed_tailroom; + struct mld2_query *mld2q; + void *csum_start = NULL; + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *eth; + u32 len; + + len = hlen + tlen + sizeof(*ip6h) + sizeof(ra) + sizeof(*mld2q); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) + return NULL; + + skb_reserve(skb, hlen); + skb_push(skb, sizeof(*eth)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->priority = TC_PRIO_CONTROL; + skb->protocol = htons(ETH_P_IPV6); + skb_put_zero(skb, sizeof(*ip6h)); + skb_put_data(skb, ra, sizeof(ra)); + skb_put_zero(skb, sizeof(*mld2q)); + skb_pull(skb, sizeof(*eth)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(sizeof(ra) + sizeof(*mld2q)); + ip6h->nexthdr = NEXTHDR_HOP; + ip6h->hop_limit = 1; + ip6h->daddr = mld2_all_node; + ip6_flow_hdr(ip6h, 0, 0); + + if (ipv6_dev_get_saddr(amt->net, amt->dev, &ip6h->daddr, 0, + &ip6h->saddr)) { + amt->dev->stats.tx_errors++; + kfree_skb(skb); + return NULL; + } + + eth->h_proto = htons(ETH_P_IPV6); + ether_addr_copy(eth->h_source, amt->dev->dev_addr); + ipv6_eth_mc_map(&mld2_all_node, eth->h_dest); + + skb_pull(skb, sizeof(*ip6h) + sizeof(ra)); + skb_reset_transport_header(skb); + mld2q = (struct mld2_query *)icmp6_hdr(skb); + mld2q->mld2q_mrc = htons(1); + mld2q->mld2q_type = ICMPV6_MGM_QUERY; + mld2q->mld2q_code = 0; + mld2q->mld2q_cksum = 0; + mld2q->mld2q_resv1 = 0; + mld2q->mld2q_resv2 = 0; + mld2q->mld2q_suppress = 0; + mld2q->mld2q_qrv = amt->qrv; + mld2q->mld2q_nsrcs = 0; + mld2q->mld2q_qqic = amt->qi; + csum_start = (void *)mld2q; + mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mld2q), + IPPROTO_ICMPV6, + csum_partial(csum_start, + sizeof(*mld2q), 0)); + + skb->ip_summed = CHECKSUM_NONE; + skb_push(skb, sizeof(*eth) + sizeof(*ip6h) + sizeof(ra)); + return skb; +} + +static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel) +{ + struct sk_buff *skb; + + skb = amt_build_mld_gq(amt); + if (!skb) + return; + + amt_skb_cb(skb)->tunnel = tunnel; + dev_queue_xmit(skb); +} +#else +static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel) +{ +} +#endif + +static void amt_secret_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + secret_wq); + + spin_lock_bh(&amt->lock); + get_random_bytes(&amt->key, sizeof(siphash_key_t)); + spin_unlock_bh(&amt->lock); + mod_delayed_work(amt_wq, &amt->secret_wq, + msecs_to_jiffies(AMT_SECRET_TIMEOUT)); +} + +static void amt_discovery_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + discovery_wq); + + spin_lock_bh(&amt->lock); + if (amt->status > AMT_STATUS_SENT_DISCOVERY) + goto out; + get_random_bytes(&amt->nonce, sizeof(__be32)); + spin_unlock_bh(&amt->lock); + + amt_send_discovery(amt); + spin_lock_bh(&amt->lock); +out: + mod_delayed_work(amt_wq, &amt->discovery_wq, + msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); + spin_unlock_bh(&amt->lock); +} + +static void amt_req_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + req_wq); + u32 exp; + + spin_lock_bh(&amt->lock); + if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT) + goto out; + + if (amt->req_cnt++ > AMT_MAX_REQ_COUNT) { + netdev_dbg(amt->dev, "Gateway is not ready"); + amt->qi = AMT_INIT_REQ_TIMEOUT; + amt->ready4 = false; + amt->ready6 = false; + amt->remote_ip = 0; + __amt_update_gw_status(amt, AMT_STATUS_INIT, false); + amt->req_cnt = 0; + } + spin_unlock_bh(&amt->lock); + + amt_send_request(amt, false); + amt_send_request(amt, true); + amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true); + spin_lock_bh(&amt->lock); +out: + exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT); + mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000)); + spin_unlock_bh(&amt->lock); +} + +static bool amt_send_membership_update(struct amt_dev *amt, + struct sk_buff *skb, + bool v6) +{ + struct amt_header_membership_update *amtmu; + struct socket *sock; + struct iphdr *iph; + struct flowi4 fl4; + struct rtable *rt; + int err; + + sock = rcu_dereference_bh(amt->sock); + if (!sock) + return true; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(amt->dev) + sizeof(*amtmu) + + sizeof(*iph) + sizeof(struct udphdr)); + if (err) + return true; + + skb_reset_inner_headers(skb); + memset(&fl4, 0, sizeof(struct flowi4)); + fl4.flowi4_oif = amt->stream_dev->ifindex; + fl4.daddr = amt->remote_ip; + fl4.saddr = amt->local_ip; + fl4.flowi4_tos = AMT_TOS; + fl4.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(amt->net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(amt->dev, "no route to %pI4\n", &amt->remote_ip); + return true; + } + + amtmu = skb_push(skb, sizeof(*amtmu)); + amtmu->version = 0; + amtmu->type = AMT_MSG_MEMBERSHIP_UPDATE; + amtmu->reserved = 0; + amtmu->nonce = amt->nonce; + amtmu->response_mac = amt->mac; + + if (!v6) + skb_set_inner_protocol(skb, htons(ETH_P_IP)); + else |
