/*
* vrf.c: device driver to encapsulate a VRF space
*
* Copyright (c) 2015 Cumulus Networks. All rights reserved.
* Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com>
* Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
*
* Based on dummy, team and ipvlan drivers
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ip.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/rtnetlink.h>
#include <net/rtnetlink.h>
#include <linux/u64_stats_sync.h>
#include <linux/hashtable.h>
#include <linux/inetdevice.h>
#include <net/arp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/addrconf.h>
#include <net/l3mdev.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
#define vrf_master_get_rcu(dev) \
((struct net_device *)rcu_dereference(dev->rx_handler_data))
struct slave {
struct list_head list;
struct net_device *dev;
};
struct slave_queue {
struct list_head all_slaves;
};
struct net_vrf {
struct slave_queue queue;
struct rtable *rth;
struct rt6_info *rt6;
u32 tb_id;
};
struct pcpu_dstats {
u64 tx_pkts;
u64 tx_bytes;
u64 tx_drps;
u64 rx_pkts;
u64 rx_bytes;
struct u64_stats_sync syncp;
};
static struct dst_entry *vrf_ip_check(struct dst_entry *dst, u32 cookie)
{
return dst;
}
static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return ip_local_out(net, sk, skb);
}
static unsigned int vrf_v4_mtu(const struct dst_entry *dst)
{
/* TO-DO: return max ethernet size? */
return dst->dev->mtu;
}
static void vrf_dst_destroy(struct dst_entry *dst)
{
/* our dst lives forever - or until the device is closed */
}
static unsigned int vrf_default_advmss(const struct dst_entry *dst)
{
return 65535 - 40;
}
static struct dst_ops vrf_dst_ops = {
.family = AF_INET,
.local_out = vrf_ip_local_out,
.check = vrf_ip_check,
.mtu = vrf_v4_mtu,
.destroy = vrf_dst_destroy,
.default_advmss = vrf_default_advmss,
};
/* neighbor handling is done with actual device; do not want
* to flip skb->dev for those ndisc packets. This really fails
* for multiple next protocols (e.g., NEXTHDR_HOP). But it is
* a start.
*/
#if IS_ENABLED(CONFIG_IPV6)
static bool check_ipv6_frame(const struct sk_buff *skb)
{
const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
size_t hlen = sizeof(*ipv6h);
bool rc = true;
if (skb->len < hlen)
goto out;
if (ipv6h->nexthdr == NEXTHDR_ICMP) {
const struct icmp6hdr *icmph;
if (skb->len < hlen + sizeof(*icmph))
goto out;
icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
switch (icmph->icmp6_type) {
case NDISC_ROUTER_SOLICITATION:
case NDISC_ROUTER_ADVERTISEMENT:
case NDISC_NEIGHBOUR_SOLICITATION:
case NDISC_NEIGHBOUR_ADVERTISEMENT:
case NDISC_REDIRECT:
rc = false;
break;
}
}
out:
return rc;
}
#else
static bool check_ipv6_frame(const struct sk_buff *skb)
{
return false;
}
#endif
static bool is_ip_rx_frame(struct sk_buff *skb)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
return true;
case htons(ETH_P_IPV6):
return check_ipv6_frame(skb);
}
return false;
}
static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
{
vrf_dev->stats.tx_errors++;
kfree_skb(skb);
}
/* note: already called with rcu_read_lock */
static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
if (is_ip_rx_frame(skb)) {
struct net_device *dev = vrf_master_get_rcu(skb->dev);
struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
u64_stats_update_begin(&dstats->syncp);
dstats->rx_pkts++;
dstats->rx_bytes += skb->len;
u64_stats_update_end(&dstats->syncp);
skb->dev = dev;
return RX_HANDLER_ANOTHER;
}
return RX_HANDLER_PASS;
}
static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
int i;
for_each_possible_cpu(i) {
const struct pcpu_dstats *dstats;
u64 tbytes, tpkts, tdrops, rbytes, rpkts;
unsigned int start;
dstats = per_cpu_ptr(dev->dstats, i);
do {
start = u64_stats_fetch_begin_irq(&dstats->syncp);
tbytes = dstats->tx_bytes;
tpkts = dstats->tx_pkts;
tdrops = dstats->tx_drps;
rbytes = dstats->rx_bytes;
rpkts = dstats->rx_pkts;
} while (u64_stats_fetch_retry_irq(&a