/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IPv4 Forwarding Information Base: FIB frontend.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
#include <net/lwtunnel.h>
#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
static int __net_init fib4_rules_init(struct net *net)
{
struct fib_table *local_table, *main_table;
main_table = fib_trie_table(RT_TABLE_MAIN, NULL);
if (!main_table)
return -ENOMEM;
local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
if (!local_table)
goto fail;
hlist_add_head_rcu(&local_table->tb_hlist,
&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
hlist_add_head_rcu(&main_table->tb_hlist,
&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
return 0;
fail:
fib_free_table(main_table);
return -ENOMEM;
}
static bool fib4_has_custom_rules(struct net *net)
{
return false;
}
#else
struct fib_table *fib_new_table(struct net *net, u32 id)
{
struct fib_table *tb, *alias = NULL;
unsigned int h;
if (id == 0)
id = RT_TABLE_MAIN;
tb = fib_get_table(net, id);
if (tb)
return tb;
if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
alias = fib_new_table(net, RT_TABLE_MAIN);
tb = fib_trie_table(id, alias);
if (!tb)
return NULL;
switch (id) {
case RT_TABLE_MAIN:
rcu_assign_pointer(net->ipv4.fib_main, tb);
break;
case RT_TABLE_DEFAULT:
rcu_assign_pointer(net->ipv4.fib_default, tb);
break;
default:
break;
}
h = id & (FIB_TABLE_HASHSZ - 1);
hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
return tb;
}
EXPORT_SYMBOL_GPL(fib_new_table);
/* caller must hold either rtnl or rcu read lock */
struct fib_table *fib_get_table(struct net *net, u32 id)
{
struct fib_table *tb;
struct hlist_head *head;
unsigned int h;
if (id == 0)
id = RT_TABLE_MAIN;
h = id & (FIB_TABLE_HASHSZ - 1);
head = &net->ipv4.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb_hlist) {
if (tb->tb_id == id)
return tb;
}
return NULL;
}
static bool fib4_has_custom_rules(struct net *net)
{
return net->ipv4.fib_has_custom_rules;
}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
static void fib_replace_table(struct net *net, struct fib_table *old,
struct fib_table *new)
{
#ifdef CONFIG_IP_MULTIPLE_TABLES
switch (new->tb_id) {
case RT_TABLE_MAIN:
rcu_assign_pointer(net->ipv4.fib_main, new);
break;
case RT_TABLE_DEFAULT:
rcu_assign_pointer(net->ipv4.fib_default, new);
break;
default:
break;
}
#endif
/* replace the old table in the hlist */
hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
}
int fib_unmerge(struct net *net)
{
struct fib_table *old, *new, *main_table;
/* attempt to fetch local table if it has been allocated */
old = fib_get_table(net, RT_TABLE_LOCAL);
if (!old)
return 0;
new = fib_trie_unmerge(old);
if (!new)
return -ENOMEM;
/* table is already unmerged */
if (new == old)
return 0;
/* replace merged table with clean table */
fib_replace_table(net, old, new);
fib_free_table(old);
/* attempt to fetch main table if it has been allocated */
main_table = fib_get_table(net, RT_TABLE_MAIN);
if (!main_table)
return 0;
/* flush local entries from main table */
fib_table_flush_external(main_table);
return 0;
}
static void fib_flush(struct net *net)
{
int flushed = 0;
unsigned int h;
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
struct hlist_node *tmp;
struct fib_table *tb;
hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
flushed += fib_table_flush(net, tb, false);
}
if (flushed)
rt_cache_flush(net);
}
/*
* Find address type as if only "dev" was present in the system. If
* on_dev is NULL then all interfaces are taken into consideration.
*/
static inline unsigned int __inet_dev_addr_type(struct net *net,
const struct net_device *dev,
__be32 addr, u32 tb_id)
{
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res;
unsigned int ret = RTN_BROADCAST;
struct fib_table *table;
if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
return RTN_BROADCAST;
if (ipv4_is_multicast(addr))
return RTN_MULTICAST;
rcu_read_lock();
table = fib_get_table(net, tb_id);
if (table) {
ret = RTN_UNICAST;
if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
if (!dev || dev == res.fi->fib_dev)
ret = res.type;
}
}
rcu_read_unlock();
return ret;
}
unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
{
return __inet_dev_addr_type(net, NULL, addr, tb_id);
}
EXPORT_SYMBOL(inet_addr_type_table);
unsigned int inet_addr_type(struct net *net, __be32 addr)
{
return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
}
EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, dev, addr, rt_table);
}
EXPORT_SYMBOL(inet_dev_addr_type);
/* inet_addr_type with dev == NULL but using the table from a dev
* if one is associated
*/
unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr)
{
u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, NULL, addr, rt_table);
}
EXPORT_SYMBOL(inet_addr_type_dev_table);
__be32 fib_compute_spec_dst(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct in_device *in_dev;
struct fib_result res;
struct rtable *rt;
struct net *net;
int scope;
rt = skb_rtable(skb);
if ((rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL)) ==
RTCF_LOCAL)
return ip_hdr(skb)->daddr;
in_dev = __in_dev_get_rcu(dev);
net = dev_net(dev);
scope = RT_SCOPE_UNIVERSE;
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
struct flowi4 fl4 = {
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_oif = l3mdev_master_ifind
|