summaryrefslogtreecommitdiff
path: root/drivers/net/vxlan
diff options
context:
space:
mode:
authorRoopa Prabhu <roopa@nvidia.com>2022-03-01 05:04:28 +0000
committerDavid S. Miller <davem@davemloft.net>2022-03-01 08:38:01 +0000
commit6765393614ea8e2c0a7b953063513823f87c9115 (patch)
tree6ecf13d18774f83539cc9f9edbf6faeeeb25e884 /drivers/net/vxlan
parentf2b77012ddd5b2532d262f100be3394ceae3ea59 (diff)
downloadlinux-6765393614ea8e2c0a7b953063513823f87c9115.tar.gz
linux-6765393614ea8e2c0a7b953063513823f87c9115.tar.bz2
linux-6765393614ea8e2c0a7b953063513823f87c9115.zip
vxlan: move to its own directory
vxlan.c has grown too long. This patch moves it to its own directory. subsequent patches add new functionality in new files. Signed-off-by: Roopa Prabhu <roopa@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/vxlan')
-rw-r--r--drivers/net/vxlan/Makefile7
-rw-r--r--drivers/net/vxlan/vxlan_core.c4834
2 files changed, 4841 insertions, 0 deletions
diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile
new file mode 100644
index 000000000000..567266133593
--- /dev/null
+++ b/drivers/net/vxlan/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the vxlan driver
+#
+
+obj-$(CONFIG_VXLAN) += vxlan.o
+
+vxlan-objs := vxlan_core.o
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
new file mode 100644
index 000000000000..d0dc90d3dac2
--- /dev/null
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -0,0 +1,4834 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VXLAN: Virtual eXtensible Local Area Network
+ *
+ * Copyright (c) 2012-2013 Vyatta Inc.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/if_ether.h>
+#include <linux/ethtool.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/gro.h>
+#include <net/ipv6_stubs.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/rtnetlink.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/tun_proto.h>
+#include <net/vxlan.h>
+#include <net/nexthop.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#define VXLAN_VERSION "0.1"
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
+#define FDB_AGE_DEFAULT 300 /* 5 min */
+#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */
+
+/* UDP port for VXLAN traffic.
+ * The IANA assigned port is 4789, but the Linux default is 8472
+ * for compatibility with early adopters.
+ */
+static unsigned short vxlan_port __read_mostly = 8472;
+module_param_named(udp_port, vxlan_port, ushort, 0444);
+MODULE_PARM_DESC(udp_port, "Destination UDP port");
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+static unsigned int vxlan_net_id;
+static struct rtnl_link_ops vxlan_link_ops;
+
+static const u8 all_zeros_mac[ETH_ALEN + 2];
+
+static int vxlan_sock_add(struct vxlan_dev *vxlan);
+
+static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
+
+/* per-network namespace private data for this module */
+struct vxlan_net {
+ struct list_head vxlan_list;
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock;
+ struct notifier_block nexthop_notifier_block;
+};
+
+/* Forwarding table entry */
+struct vxlan_fdb {
+ struct hlist_node hlist; /* linked list of entries */
+ struct rcu_head rcu;
+ unsigned long updated; /* jiffies */
+ unsigned long used;
+ struct list_head remotes;
+ u8 eth_addr[ETH_ALEN];
+ u16 state; /* see ndm_state */
+ __be32 vni;
+ u16 flags; /* see ndm_flags and below */
+ struct list_head nh_list;
+ struct nexthop __rcu *nh;
+ struct vxlan_dev __rcu *vdev;
+};
+
+#define NTF_VXLAN_ADDED_BY_USER 0x100
+
+/* salt for hash table */
+static u32 vxlan_salt __read_mostly;
+
+static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
+{
+ return vs->flags & VXLAN_F_COLLECT_METADATA ||
+ ip_tunnel_collect_metadata();
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline
+bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
+{
+ if (a->sa.sa_family != b->sa.sa_family)
+ return false;
+ if (a->sa.sa_family == AF_INET6)
+ return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
+ else
+ return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+}
+
+static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
+{
+ if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ ip->sin6.sin6_addr = nla_get_in6_addr(nla);
+ ip->sa.sa_family = AF_INET6;
+ return 0;
+ } else if (nla_len(nla) >= sizeof(__be32)) {
+ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ ip->sa.sa_family = AF_INET;
+ return 0;
+ } else {
+ return -EAFNOSUPPORT;
+ }
+}
+
+static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+ const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
+ else
+ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+}
+
+#else /* !CONFIG_IPV6 */
+
+static inline
+bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
+{
+ return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+}
+
+static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
+{
+ if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ return -EAFNOSUPPORT;
+ } else if (nla_len(nla) >= sizeof(__be32)) {
+ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ ip->sa.sa_family = AF_INET;
+ return 0;
+ } else {
+ return -EAFNOSUPPORT;
+ }
+}
+
+static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+ const union vxlan_addr *ip)
+{
+ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+}
+#endif
+
+/* Virtual Network hash table head */
+static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
+{
+ return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
+}
+
+/* Socket hash table head */
+static inline struct hlist_head *vs_head(struct net *net, __be16 port)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+
+ return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+/* First remote destination for a forwarding entry.
+ * Guaranteed to be non-NULL because remotes are never deleted.
+ */
+static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
+{
+ if (rcu_access_pointer(fdb->nh))
+ return NULL;
+ return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
+}
+
+static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
+{
+ if (rcu_access_pointer(fdb->nh))
+ return NULL;
+ return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
+}
+
+/* Find VXLAN socket based on network namespace, address family, UDP port,
+ * enabled unshareable flags and socket device binding (see l3mdev with
+ * non-default VRF).
+ */
+static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
+ __be16 port, u32 flags, int ifindex)
+{
+ struct vxlan_sock *vs;
+
+ flags &= VXLAN_F_RCV_FLAGS;
+
+ hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
+ if (inet_sk(vs->sock->sk)->inet_sport == port &&
+ vxlan_get_sk_family(vs) == family &&
+ vs->flags == flags &&
+ vs->sock->sk->sk_bound_dev_if == ifindex)
+ return vs;
+ }
+ return NULL;
+}
+
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
+ __be32 vni)
+{
+ struct vxlan_dev_node *node;
+
+ /* For flow based devices, map all packets to VNI 0 */
+ if (vs->flags & VXLAN_F_COLLECT_METADATA)
+ vni = 0;
+
+ hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
+ if (node->vxlan->default_dst.remote_vni != vni)
+ continue;
+
+ if (IS_ENABLED(CONFIG_IPV6)) {
+ const struct vxlan_config *cfg = &node->vxlan->cfg;
+
+ if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
+ cfg->remote_ifindex != ifindex)
+ continue;
+ }
+
+ return node->vxlan;
+ }
+
+ return NULL;
+}
+
+/* Look up VNI in a per net namespace table */
+static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
+ __be32 vni, sa_family_t family,
+ __be16 port, u32 flags)
+{
+ struct vxlan_sock *vs;
+
+ vs = vxlan_find_sock(net, family, port, flags, ifindex);
+ if (!vs)
+ return NULL;
+
+ return vxlan_vs_find_vni(vs, ifindex, vni);
+}
+
+/* Fill in neighbour message in skbuff. */
+static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
+ const struct vxlan_fdb *fdb,
+ u32 portid, u32 seq, int type, unsigned int flags,
+ const struct vxlan_rdst *rdst)
+{
+ unsigned long now = jiffies;
+ struct nda_cacheinfo ci;
+ bool send_ip, send_eth;
+ struct nlmsghdr *nlh;
+ struct nexthop *nh;
+ struct ndmsg *ndm;
+ int nh_family;
+ u32 nh_id;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ndm = nlmsg_data(nlh);
+ memset(ndm, 0, sizeof(*ndm));
+
+ send_eth = send_ip = true;
+
+ rcu_read_lock();
+ nh = rcu_dereference(fdb->nh);
+ if (nh) {
+ nh_family = nexthop_get_family(nh);
+ nh_id = nh->id;
+ }
+ rcu_read_unlock();
+
+ if (type == RTM_GETNEIGH) {
+ if (rdst) {
+ send_ip = !vxlan_addr_any(&rdst->remote_ip);
+ ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
+ } else if (nh) {
+ ndm->ndm_family = nh_family;
+ }
+ send_eth = !is_zero_ether_addr(fdb->eth_addr);
+ } else
+ ndm->ndm_family = AF_BRIDGE;
+ ndm->ndm_state = fdb->state;
+ ndm->ndm_ifindex = vxlan->dev->ifindex;
+ ndm->ndm_flags = fdb->flags;
+ if (rdst && rdst->offloaded)
+ ndm->ndm_flags |= NTF_OFFLOADED;
+ ndm->ndm_type = RTN_UNICAST;
+
+ if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
+ nla_put_s32(skb, NDA_LINK_NETNSID,
+ peernet2id(dev_net(vxlan->dev), vxlan->net)))
+ goto nla_put_failure;
+
+ if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
+ goto nla_put_failure;
+ if (nh) {
+ if (nla_put_u32(skb, NDA_NH_ID, nh_id))
+ goto nla_put_failure;
+ } else if (rdst) {
+ if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
+ &rdst->remote_ip))
+ goto nla_put_failure;
+
+ if (rdst->remote_port &&
+ rdst->remote_port != vxlan->cfg.dst_port &&
+ nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+ goto nla_put_failure;
+ if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
+ nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
+ goto nla_put_failure;
+ if (rdst->remote_ifindex &&
+ nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
+ goto nla_put_failure;
+ }
+
+ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
+ nla_put_u32(skb, NDA_SRC_VNI,
+ be32_to_cpu(fdb->vni)))
+ goto nla_put_failure;
+
+ ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
+ ci.ndm_confirmed = 0;
+ ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
+ ci.ndm_refcnt = 0;
+
+ if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static inline size_t vxlan_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
+ + nla_total_size(sizeof(__be16)) /* NDA_PORT */
+ + nla_total_size(sizeof(__be32)) /* NDA_VNI */
+ + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
+ + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
+ + nla_total_size(sizeof(struct nda_cacheinfo));
+}
+
+static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd, int type)
+{
+ struct net *net = dev_net(vxlan->dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+}
+
+static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
+ const struct vxlan_fdb *fdb,
+ const struct vxlan_rdst *rd,
+ struct netlink_ext_ack *extack,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ fdb_info->info.dev = vxlan->dev;
+ fdb_info->info.extack = extack;
+ fdb_info->remote_ip = rd->remote_ip;
+ fdb_info->remote_port = rd->remote_port;
+ fdb_info->remote_vni = rd->remote_vni;
+ fdb_info->remote_ifindex = rd->remote_ifindex;
+ memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
+ fdb_info->vni = fdb->vni;
+ fdb_info->offloaded = rd->offloaded;
+ fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
+}
+
+static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
+ struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd,
+ bool adding,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_vxlan_fdb_info info;
+ enum switchdev_notifier_type notifier_type;
+ int ret;
+
+ if (WARN_ON(!rd))
+ return 0;
+
+ notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
+ : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
+ vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
+ ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
+ &info.info, extack);
+ return notifier_to_errno(ret);
+}
+
+static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd, int type, bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (swdev_notify && rd) {
+ switch (type) {
+ case RTM_NEWNEIGH:
+ err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
+ true, extack);
+ if (err)
+ return err;
+ break;
+ case RTM_DELNEIGH:
+ vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
+ false, extack);
+ break;
+ }
+ }
+
+ __vxlan_fdb_notify(vxlan, fdb, rd, type);
+ return 0;
+}
+
+static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb f = {
+ .state = NUD_STALE,
+ };
+ struct vxlan_rdst remote = {
+ .remote_ip = *ipa, /* goes to NDA_DST */
+ .remote_vni = cpu_to_be32(VXLAN_N_VID),
+ };
+
+ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
+}
+
+static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
+{
+ struct vxlan_fdb f = {
+ .state = NUD_STALE,
+ };
+ struct vxlan_rdst remote = { };
+
+ memcpy(f.eth_addr, eth_addr, ETH_ALEN);
+
+ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
+}
+
+/* Hash Ethernet address */
+static u32 eth_hash(const unsigned char *addr)
+{
+ u64 value = get_unaligned((u64 *)addr);
+
+ /* only want 6 bytes */
+#ifdef __BIG_ENDIAN
+ value >>= 16;
+#else
+ value <<= 16;
+#endif
+ return hash_64(value, FDB_HASH_BITS);
+}
+
+static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
+{
+ /* use 1 byte of OUI and 3 bytes of NIC */
+ u32 key = get_unaligned((u32 *)(addr + 2));
+
+ return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
+}
+
+static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
+{
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
+ return eth_vni_hash(mac, vni);
+ else
+ return eth_hash(mac);
+}
+
+/* Hash chain to use given mac address */
+static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
+ const u8 *mac, __be32 vni)
+{
+ return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
+}
+
+/* Look up Ethernet address in forwarding table */
+static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
+ const u8 *mac, __be32 vni)
+{
+ struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
+ struct vxlan_fdb *f;
+
+ hlist_for_each_entry_rcu(f, head, hlist) {
+ if (ether_addr_equal(mac, f->eth_addr)) {
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
+ if (vni == f->vni)
+ return f;
+ } else {
+ return f;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
+ const u8 *mac, __be32 vni)
+{
+ struct vxlan_fdb *f;
+
+ f = __vxlan_find_mac(vxlan, mac, vni);
+ if (f && f->used != jiffies)
+ f->used = jiffies;
+
+ return f;
+}
+
+/* caller should hold vxlan->hash_lock */
+static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
+ union vxlan_addr *ip, __be16 port,
+ __be32 vni, __u32 ifindex)
+{
+ struct vxlan_rdst *rd;
+
+ list_for_each_entry(rd, &f->remotes, list) {
+ if (vxlan_addr_equal(&rd->remote_ip, ip) &&
+ rd->remote_port == port &&
+ rd->remote_vni == vni &&
+ rd->remote_ifindex == ifindex)
+ return rd;
+ }
+
+ return NULL;
+}
+
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ u8 eth_addr[ETH_ALEN + 2] = { 0 };
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ int rc = 0;
+
+ if (is_multicast_ether_addr(mac) ||
+ is_zero_ether_addr(mac))
+ return -EINVAL;
+
+ ether_addr_copy(eth_addr, mac);
+
+ rcu_read_lock();
+
+ f = __vxlan_find_mac(vxlan, eth_addr, vni);
+ if (!f) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ rdst = first_remote_rcu(f);
+ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
+
+out:
+ rcu_read_unlock();
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
+
+static int vxlan_fdb_notify_one(struct notifier_block *nb,
+ const struct vxlan_dev *vxlan,
+ const struct vxlan_fdb *f,
+ const struct vxlan_rdst *rdst,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_vxlan_fdb_info fdb_info;
+ int rc;
+
+ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
+ rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+ &fdb_info);
+ return notifier_to_errno(rc);
+}
+
+int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan;
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ unsigned int h;
+ int rc = 0;
+
+ if (!netif_is_vxlan(dev))
+ return -EINVAL;
+ vxlan = netdev_priv(dev);
+
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_bh(&vxlan->hash_lock[h]);
+ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
+ if (f->vni == vni) {
+ list_for_each_entry(rdst, &f->remotes, list) {
+ rc = vxlan_fdb_notify_one(nb, vxlan,
+ f, rdst,
+ extack);
+ if (rc)
+ goto unlock;
+ }
+ }
+ }
+ spin_unlock_bh(&vxlan->hash_lock[h]);
+ }
+ return 0;
+
+unlock:
+ spin_unlock_bh(&vxlan->hash_lock[h]);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
+
+void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
+{
+ struct vxlan_dev *vxlan;
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ unsigned int h;
+
+ if (!netif_is_vxlan(dev))
+ return;
+ vxlan = netdev_priv(dev);
+
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_bh(&vxlan->hash_lock[h]);
+ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
+ if (f->vni == vni)
+ list_for_each_entry(rdst, &f->remotes, list)
+ rdst->offloaded = false;
+ spin_unlock_bh(&vxlan->hash_lock[h]);
+ }
+
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
+
+/* Replace destination of unicast mac */
+static int vxlan_fdb_replace(struct vxlan_fdb *f,
+ union vxlan_addr *ip, __be16 port, __be32 vni,
+ __u32 ifindex, struct vxlan_rdst *oldrd)
+{
+ struct vxlan_rdst *rd;
+
+ rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
+ if (rd)
+ return 0;
+
+ rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
+ if (!rd)
+ return 0;
+
+ *oldrd = *rd;
+ dst_cache_reset(&rd->dst_cache);
+ rd->remote_ip = *ip;
+ rd->remote_port = port;
+ rd->remote_vni = vni;
+ rd->remote_ifindex = ifindex;
+ rd->offloaded = false;
+ return 1;
+}
+
+/* Add/update destinations for multicast */
+static int vxlan_fdb_append(struct vxlan_fdb *f,
+ union vxlan_addr *ip, __be16 port, __be32 vni,
+ __u32 ifindex, struct vxlan_rdst **rdp)
+{
+ struct vxlan_rdst *rd;
+
+ rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
+ if (rd)
+ return 0;
+
+ rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
+ if (rd == NULL)
+ return -ENOBUFS;
+
+ if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
+ kfree(rd);
+ return -ENOBUFS;
+ }
+
+ rd->remote_ip = *ip;
+ rd->remote_port = port;
+ rd->offloaded = false;
+ rd->remote_vni = vni;
+ rd->remote_ifindex = ifindex;
+
+ list_add_tail_rcu(&rd->list, &f->remotes);
+
+ *rdp = rd;
+ return 1;
+}
+
+static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
+ unsigned int off,
+ struct vxlanhdr *vh, size_t hdrlen,
+ __be32 vni_field,
+ struct gro_remcsum *grc,
+ bool nopartial)
+{
+ size_t start, offset;
+
+ if (skb->remcsum_offload)
+ return vh;
+
+ if (!NAPI_GRO_CB(skb)->csum_valid)
+ return NULL;
+
+ start = vxlan_rco_start(vni_field);
+ offset = start + vxlan_rco_offset(vni_field);
+
+ vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
+ start, offset, grc, nopartial);
+
+ skb->remcsum_offload = 1;
+
+ return vh;
+}
+
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
+ struct vxlanhdr *vh, *vh2;
+ unsigned int hlen, off_vx;
+ int flush = 1;
+ struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
+ __be32 flags;
+ struct gro_remcsum grc;
+
+ skb_gro_remcsum_init(&grc);
+
+ off_vx = skb_gro_offset(skb);
+ hlen = off_vx + sizeof(*vh);
+ vh = skb_gro_header_fast(skb, off_vx);
+ if (skb_gro_header_hard(skb, hlen)) {
+ vh = skb_gro_header_slow(skb, hlen, off_vx);
+ if (unlikely(!vh))
+ goto out;
+ }
+
+ skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+
+ flags = vh->vx_flags;
+
+ if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
+ vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
+ vh->vx_vni, &grc,
+ !!(vs->flags &
+ VXLAN_F_REMCSUM_NOPARTIAL));
+
+ if (!vh)
+ goto out;
+ }
+
+ skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ vh2 = (struct vxlanhdr *)(p->data + off_vx);
+ if (vh->vx_flags != vh2->vx_flags ||
+ vh->vx_vni != vh2->vx_vni) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ pp = call_gro_receive(eth_gro_receive, head, skb);
+ flush = 0;
+
+out:
+ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
+
+ return pp;
+}
+
+static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
+{
+ /* Sets 'skb->inner_mac_header' since we are always called with
+ * 'skb->encapsulation' set.
+ */
+ return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
+}
+
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
+ __u16 state, __be32 src_vni,
+ __u16 ndm_flags)
+{
+ struct vxlan_fdb *f;
+
+ f = kmalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return NULL;
+ f->state = state;
+ f->flags = ndm_flags;
+ f->updated = f->used = jiffies;
+ f->vni = src_vni;
+ f->nh = NULL;
+ RCU_INIT_POINTER(f->vdev, vxlan);
+ INIT_LIST_HEAD(&f->nh_list);
+ INIT_LIST_HEAD(&f->remotes);
+ memcpy(f->eth_addr, mac, ETH_ALEN);
+
+ return f;
+}
+
+static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
+ __be32 src_vni, struct vxlan_fdb *f)
+{
+ ++vxlan->addrcnt;
+ hlist_add_head_rcu(&f->hlist,
+ vxlan_fdb_head(vxlan, mac, src_vni));
+}
+
+static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ u32 nhid, struct netlink_ext_ack *extack)
+{
+ struct nexthop *old_nh = rtnl_dereference(fdb->nh);
+ struct nexthop *nh;
+ int err = -EINVAL;
+
+ if (old_nh && old_nh->id == nhid)
+ return 0;
+
+ nh = nexthop_find_by_id(vxlan->net, nhid);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto err_inval;
+ }
+
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ nh = NULL;
+ goto err_inval;
+ }
+ if (!nexthop_is_fdb(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
+ goto err_inval;
+ }
+
+ if (!nexthop_is_multipath(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
+ goto err_inval;
+ }
+
+ /* check nexthop group family */
+ switch (vxlan->default_dst.remote_ip.sa.sa_family) {
+ case AF_INET:
+ if (!nexthop_has_v4(nh)) {
+ err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+ goto err_inval;
+ }
+ break;
+ case AF_INET6:
+ if (nexthop_has_v4(nh)) {
+ err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+ goto err_inval;
+ }
+ }
+ }
+
+ if (old_nh) {
+ list_del_rcu(&fdb->nh_list);
+ nexthop_put(old_nh);
+ }
+ rcu_assign_pointer(fdb->nh, nh);
+ list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
+ return 1;
+
+err_inval:
+ if (nh)
+ nexthop_put(nh);
+ return err;
+}
+
+static int vxlan_fdb_create(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __be16 port, __be32 src_vni,
+ __be32 vni, __u32 ifindex, __u16 ndm_flags,
+ u32 nhid, struct vxlan_fdb **fdb,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_rdst *rd = NULL;
+ struct vxlan_fdb *f;
+ int rc;
+
+ if (vxlan->cfg.addrmax &&
+ vxlan->addrcnt >= vxlan->cfg.addrmax)
+ return -ENOSPC;
+
+ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+ f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+ if (!f)
+ return -ENOMEM;
+
+ if (nhid)
+ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+ else
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+ if (rc < 0)
+ goto errout;
+
+ *fdb = f;
+
+ return 0;
+
+errout:
+ kfree(f);
+ return rc;
+}
+
+static void __vxlan_fdb_free(struct vxlan_fdb *f)
+{
+ struct vxlan_rdst *rd, *nd;
+ struct nexthop *nh;
+
+ nh = rcu_dereference_raw(f->nh);
+ if (nh) {
+ rcu_assign_pointer(f->nh, NULL);
+ rcu_assign_pointer(f->vdev, NULL);
+ nexthop_put(nh);
+ }
+
+ list_for_each_entry_safe(rd, nd, &f->remotes, list) {
+ dst_cache_destroy(&rd->dst_cache);
+ kfree(rd);
+ }
+ kfree(f);
+}
+
+static void vxlan_fdb_free(struct rcu_head *head)
+{
+ struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+
+ __vxlan_fdb_free(f);
+}
+
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+ bool do_notify, bool swdev_notify)
+{
+ struct vxlan_rdst *rd;
+
+ netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
+
+ --vxlan->addrcnt;
+ if (do_notify) {
+ if (rcu_access_pointer(f->nh))
+ vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
+ swdev_notify, NULL);
+ else
+ list_for_each_entry(rd, &f->remotes, list)
+ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+ swdev_notify, NULL);
+ }
+
+ hlist_del_rcu(&f->hlist);
+ list_del_rcu(&f->nh_list);
+ call_rcu(&f->rcu, vxlan_fdb_free);
+}
+
+static void vxlan_dst_free(struct rcu_head *head)
+{
+ struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+ dst_cache_destroy(&rd->dst_cache);
+ kfree(rd);
+}
+
+static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
+ union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags,
+ struct vxlan_fdb *f, u32 nhid,
+ bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
+ struct vxlan_rdst *rd = NULL;
+ struct vxlan_rdst oldrd;
+ int notify = 0;
+ int rc = 0;
+ int err;
+
+ if (nhid && !rcu_access_pointer(f->nh)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot replace an existing non nexthop fdb with a nexthop");
+ return -EOPNOTSUPP;
+ }
+
+ if (nhid && (flags & NLM_F_APPEND)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot append to a nexthop fdb");
+ return -EOPNOTSUPP;
+ }
+
+ /* Do not allow an externally learned entry to take over an entry added
+ * by the user.
+ */
+ if (!(fdb_flags & NTF_EXT_LEARNED) ||
+ !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
+ if (f->state != state) {
+ f->state = state;
+ f->updated = jiffies;
+ notify = 1;
+ }
+ if (f->flags != fdb_flags) {
+ f->flags = fdb_flags;
+ f->updated = jiffies;
+ notify = 1;
+ }
+ }
+
+ if ((flags & NLM_F_REPLACE)) {
+ /* Only change unicasts */
+ if (!(is_multicast_ether_addr(f->eth_addr) ||
+ is_zero_ether_addr(f->eth_addr))) {
+ if (nhid) {
+ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+ if (rc < 0)
+ return rc;
+ } else {
+ rc = vxlan_fdb_replace(f, ip, port, vni,
+ ifindex, &oldrd);
+ }
+ notify |= rc;
+ } else {
+ NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
+ return -EOPNOTSUPP;
+ }
+ }
+ if ((flags & NLM_F_APPEND) &&
+ (is_multicast_ether_addr(f->eth_addr) ||
+ is_zero_ether_addr(f->eth_addr))) {
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+
+ if (rc < 0)
+ return rc;
+ notify |= rc;
+ }
+
+ if (ndm_flags & NTF_USE)
+ f->used = jiffies;
+
+ if (notify) {
+ if (rd == NULL)
+ rd = first_remote_rtnl(f);
+
+ err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
+ swdev_notify, extack);
+ if (err)
+ goto err_notify;
+ }
+
+ return 0;
+
+err_notify:
+ if (nhid)
+ return err;
+ if ((flags & NLM_F_REPLACE) && rc)
+ *rd = oldrd;
+ else if ((flags & NLM_F_APPEND) && rc) {
+ list_del_rcu(&rd->list);
+ call_rcu(&rd->rcu, vxlan_dst_free);
+ }
+ return err;
+}
+
+static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 src_vni, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
+ bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+