/*
* GENEVE: Generic Network Virtualization Encapsulation
*
* Copyright (c) 2015 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/hash.h>
#include <net/dst_metadata.h>
#include <net/gro_cells.h>
#include <net/rtnetlink.h>
#include <net/geneve.h>
#include <net/protocol.h>
#define GENEVE_NETDEV_VER "0.6"
#define GENEVE_UDP_PORT 6081
#define GENEVE_N_VID (1u << 24)
#define GENEVE_VID_MASK (GENEVE_N_VID - 1)
#define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
#define GENEVE_VER 0
#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
/* per-network namespace private data for this module */
struct geneve_net {
struct list_head geneve_list;
struct list_head sock_list;
};
static int geneve_net_id;
union geneve_addr {
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
struct sockaddr sa;
};
static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, };
/* Pseudo network device */
struct geneve_dev {
struct hlist_node hlist; /* vni hash table */
struct net *net; /* netns for packet i/o */
struct net_device *dev; /* netdev for geneve tunnel */
struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */
#if IS_ENABLED(CONFIG_IPV6)
struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */
#endif
u8 vni[3]; /* virtual network ID for tunnel */
u8 ttl; /* TTL override */
u8 tos; /* TOS override */
union geneve_addr remote; /* IP address for link partner */
struct list_head next; /* geneve's per namespace list */
__be16 dst_port;
bool collect_md;
struct gro_cells gro_cells;
u32 flags;
};
/* Geneve device flags */
#define GENEVE_F_UDP_CSUM BIT(0)
#define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1)
#define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2)
struct geneve_sock {
bool collect_md;
struct list_head list;
struct socket *sock;
struct rcu_head rcu;
int refcnt;
struct udp_offload udp_offloads;
struct hlist_head vni_list[VNI_HASH_SIZE];
u32 flags;
};
static inline __u32 geneve_net_vni_hash(u8 vni[3])
{
__u32 vnid;
vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
return hash_32(vnid, VNI_HASH_BITS);
}
static __be64 vni_to_tunnel_id(const __u8 *vni)
{
#ifdef __BIG_ENDIAN
return (vni[0] << 16) | (vni[1] << 8) | vni[2];
#else
return (__force __be64)(((__force u64)vni[0] << 40) |
((__force u64)vni[1] << 48) |
((__force u64)vni[2] << 56));
#endif
}
static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
__be32 addr, u8 vni[])
{
struct hlist_head *vni_list_head;
struct geneve_dev *geneve;
__u32 hash;
/* Find the device for this VNI */
hash = geneve_net_vni_hash(vni);
vni_list_head = &gs->vni_list[hash];
hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
addr == geneve->remote.sin.sin_addr.s_addr)
return geneve;
}
return NULL;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
struct in6_addr addr6, u8 vni[])
{
struct hlist_head *vni_list_head;
struct geneve_dev *geneve;
__u32 hash;
/* Find the device for this VNI */
hash = geneve_net_vni_hash(vni);
vni_list_head = &gs->vni_list[hash];
hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr))
return geneve;
}
return NULL;
}
#endif
static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
{
return (struct genevehdr *)(udp_hdr(skb) + 1);
}
/* geneve receive/decap routine */
static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
{
struct genevehdr *gnvh = geneve_hdr(skb);
struct metadata_dst *tun_dst = NULL;
struct geneve_dev *geneve = NULL;
struct pcpu_sw_netstats *stats;
struct iphdr *iph = NULL;
__be32 addr;
static u8 zero_vni[3];
u8 *vni;
int err = 0;
sa_family_t sa_family;
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6hdr *ip6h = NULL;
struct in6_addr addr6;
static struct in6_addr zero_addr6;
#endif
sa_family = gs->sock->sk->sk_family;
if (sa_family == AF_INET) {
iph = ip_hdr(skb); /* outer IP header... */
if (gs->collect_md) {
vni = zero_vni;
addr = 0;
} else {
vni = gnvh->vni;
addr = iph->saddr;
}
geneve = geneve_lookup(gs, addr, vni);
#if IS_ENABLED(CONFIG_IPV6)
} else if (sa_family == AF_INET6) {
ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
if (gs->collect_md) {
vni = zero_vni;
addr6 = zero_addr6;
} else {
vni = gnvh->vni;
addr6 = ip6h->saddr;
}
geneve = geneve6_lookup(gs, addr6, vni);
#endif
}
if (!geneve)
goto drop;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
__be16 flags;
flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
(gnvh->oam ? TUNNEL_OAM : 0) |
(gnvh->critical ? TUNNEL_CRIT_OPT : 0);
tun_dst = udp_tun_rx_dst(skb, sa_family, flags,
vni_to_tunnel_id(gnvh->vni),
gnvh->opt_len * 4);
if (!tun_dst)
goto drop;
/* Update tunnel dst according to Geneve options. */
ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
gnvh->options, gnvh->opt_len * 4);
} else {
/* Drop packets w/ critical options,
* since we don't support any...
*/
if (gnvh->critical)
goto drop;
}
skb_reset_mac_header(skb);
skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
skb->protocol = eth_type_trans(skb, geneve->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
if (tun_dst)
skb_dst_set(skb, &
|