summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-03-26 12:27:27 -0400
committerDavid S. Miller <davem@davemloft.net>2013-03-26 12:27:27 -0400
commitdbbd136c7dbfe0fba989f337c35f976ef432d1d5 (patch)
tree8487c6d6f12951178a093c95bf72d6a7915884c6
parenteaac5f3d3ad33547b299935e6db0cfc7be9a576a (diff)
parentf61dd388a9b76f273bb0de9786600fd64e34ba09 (diff)
downloadlinux-dbbd136c7dbfe0fba989f337c35f976ef432d1d5.tar.gz
linux-dbbd136c7dbfe0fba989f337c35f976ef432d1d5.tar.bz2
linux-dbbd136c7dbfe0fba989f337c35f976ef432d1d5.zip
Merge branch 'tunnels'
Pravin B Shelar says: ==================== Following patch series restructure GRE and IPIP tunneling code to make it modular. It adds ip_tunnel module which acts as generic tunneling layer which has common code. These patches do not change any functionality. v3:v4: - Fixed compilation error in ipv6. - Few coding style fixes. v2-v3: - Use GPL exports for all export symbols. - Set default config NET_IP_TUNNEL to m. v1-v2: - Dropped patch to convert gre_proto_lock to rtnl lock. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/vxlan.c100
-rw-r--r--include/net/gre.h51
-rw-r--r--include/net/ip6_tunnel.h1
-rw-r--r--include/net/ip_tunnels.h177
-rw-r--r--include/net/ipip.h84
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/ip_gre.c1504
-rw-r--r--net/ipv4/ip_tunnel.c1035
-rw-r--r--net/ipv4/ip_vti.c42
-rw-r--r--net/ipv4/ipip.c748
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_gre.c45
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/sit.c39
20 files changed, 1668 insertions, 2179 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 87f1d39ca551..3835321b8cf3 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -151,6 +151,7 @@ config MACVTAP
config VXLAN
tristate "Virtual eXtensible Local Area Network (VXLAN)"
depends on INET
+ select NET_IP_TUNNEL
---help---
This allows one to create vxlan virtual interfaces that provide
Layer 2 Networks over Layer 3 Networks. VXLAN is often used
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 33427fd62515..7624ab1f7b03 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -33,7 +33,7 @@
#include <net/arp.h>
#include <net/ndisc.h>
#include <net/ip.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/rtnetlink.h>
@@ -101,20 +101,10 @@ struct vxlan_fdb {
u8 eth_addr[ETH_ALEN];
};
-/* Per-cpu network traffic stats */
-struct vxlan_stats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
-
/* Pseudo network device */
struct vxlan_dev {
struct hlist_node hlist;
struct net_device *dev;
- struct vxlan_stats __percpu *stats;
__u32 vni; /* virtual network id */
__be32 gaddr; /* multicast group */
__be32 saddr; /* source address */
@@ -667,7 +657,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct iphdr *oip;
struct vxlanhdr *vxh;
struct vxlan_dev *vxlan;
- struct vxlan_stats *stats;
+ struct pcpu_tstats *stats;
__u32 vni;
int err;
@@ -743,7 +733,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
}
}
- stats = this_cpu_ptr(vxlan->stats);
+ stats = this_cpu_ptr(vxlan->dev->tstats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
stats->rx_bytes += skb->len;
@@ -874,28 +864,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-/* Extract dsfield from inner protocol */
-static inline u8 vxlan_get_dsfield(const struct iphdr *iph,
- const struct sk_buff *skb)
-{
- if (skb->protocol == htons(ETH_P_IP))
- return iph->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
- return ipv6_get_dsfield((const struct ipv6hdr *)iph);
- else
- return 0;
-}
-
-/* Propogate ECN bits out */
-static inline u8 vxlan_ecn_encap(u8 tos,
- const struct iphdr *iph,
- const struct sk_buff *skb)
-{
- u8 inner = vxlan_get_dsfield(iph, skb);
-
- return INET_ECN_encapsulate(tos, inner);
-}
-
static void vxlan_sock_free(struct sk_buff *skb)
{
sock_put(skb->sk);
@@ -974,8 +942,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
/* short-circuited back to local bridge */
if (netif_rx(skb) == NET_RX_SUCCESS) {
- struct vxlan_stats *stats =
- this_cpu_ptr(vxlan->stats);
+ struct pcpu_tstats *stats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
@@ -1007,7 +974,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = vxlan->tos;
if (tos == 1)
- tos = vxlan_get_dsfield(old_iph, skb);
+ tos = ip_tunnel_get_dsfield(old_iph, skb);
src_port = vxlan_src_port(vxlan, skb);
@@ -1058,7 +1025,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
iph->ihl = sizeof(struct iphdr) >> 2;
iph->frag_off = df;
iph->protocol = IPPROTO_UDP;
- iph->tos = vxlan_ecn_encap(tos, old_iph, skb);
+ iph->tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -1183,10 +1150,8 @@ static void vxlan_cleanup(unsigned long arg)
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
- struct vxlan_dev *vxlan = netdev_priv(dev);
-
- vxlan->stats = alloc_percpu(struct vxlan_stats);
- if (!vxlan->stats)
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
return -ENOMEM;
return 0;
@@ -1242,49 +1207,6 @@ static int vxlan_stop(struct net_device *dev)
return 0;
}
-/* Merge per-cpu statistics */
-static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *stats)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_stats tmp, sum = { 0 };
- unsigned int cpu;
-
- for_each_possible_cpu(cpu) {
- unsigned int start;
- const struct vxlan_stats *stats
- = per_cpu_ptr(vxlan->stats, cpu);
-
- do {
- start = u64_stats_fetch_begin_bh(&stats->syncp);
- memcpy(&tmp, stats, sizeof(tmp));
- } while (u64_stats_fetch_retry_bh(&stats->syncp, start));
-
- sum.tx_bytes += tmp.tx_bytes;
- sum.tx_packets += tmp.tx_packets;
- sum.rx_bytes += tmp.rx_bytes;
- sum.rx_packets += tmp.rx_packets;
- }
-
- stats->tx_bytes = sum.tx_bytes;
- stats->tx_packets = sum.tx_packets;
- stats->rx_bytes = sum.rx_bytes;
- stats->rx_packets = sum.rx_packets;
-
- stats->multicast = dev->stats.multicast;
- stats->rx_length_errors = dev->stats.rx_length_errors;
- stats->rx_frame_errors = dev->stats.rx_frame_errors;
- stats->rx_errors = dev->stats.rx_errors;
-
- stats->tx_dropped = dev->stats.tx_dropped;
- stats->tx_carrier_errors = dev->stats.tx_carrier_errors;
- stats->tx_aborted_errors = dev->stats.tx_aborted_errors;
- stats->collisions = dev->stats.collisions;
- stats->tx_errors = dev->stats.tx_errors;
-
- return stats;
-}
-
/* Stub, nothing needs to be done. */
static void vxlan_set_multicast_list(struct net_device *dev)
{
@@ -1295,7 +1217,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
.ndo_open = vxlan_open,
.ndo_stop = vxlan_stop,
.ndo_start_xmit = vxlan_xmit,
- .ndo_get_stats64 = vxlan_stats64,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_set_rx_mode = vxlan_set_multicast_list,
.ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
@@ -1312,9 +1234,7 @@ static struct device_type vxlan_type = {
static void vxlan_free(struct net_device *dev)
{
- struct vxlan_dev *vxlan = netdev_priv(dev);
-
- free_percpu(vxlan->stats);
+ free_percpu(dev->tstats);
free_netdev(dev);
}
diff --git a/include/net/gre.h b/include/net/gre.h
index 82665474bcb7..9f03a390c826 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -2,6 +2,7 @@
#define __LINUX_GRE_H
#include <linux/skbuff.h>
+#include <net/ip_tunnels.h>
#define GREPROTO_CISCO 0
#define GREPROTO_PPTP 1
@@ -12,7 +13,57 @@ struct gre_protocol {
void (*err_handler)(struct sk_buff *skb, u32 info);
};
+struct gre_base_hdr {
+ __be16 flags;
+ __be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
+
int gre_add_protocol(const struct gre_protocol *proto, u8 version);
int gre_del_protocol(const struct gre_protocol *proto, u8 version);
+static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
+{
+ __be16 tflags = 0;
+
+ if (flags & GRE_CSUM)
+ tflags |= TUNNEL_CSUM;
+ if (flags & GRE_ROUTING)
+ tflags |= TUNNEL_ROUTING;
+ if (flags & GRE_KEY)
+ tflags |= TUNNEL_KEY;
+ if (flags & GRE_SEQ)
+ tflags |= TUNNEL_SEQ;
+ if (flags & GRE_STRICT)
+ tflags |= TUNNEL_STRICT;
+ if (flags & GRE_REC)
+ tflags |= TUNNEL_REC;
+ if (flags & GRE_VERSION)
+ tflags |= TUNNEL_VERSION;
+
+ return tflags;
+}
+
+static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
+{
+ __be16 flags = 0;
+
+ if (tflags & TUNNEL_CSUM)
+ flags |= GRE_CSUM;
+ if (tflags & TUNNEL_ROUTING)
+ flags |= GRE_ROUTING;
+ if (tflags & TUNNEL_KEY)
+ flags |= GRE_KEY;
+ if (tflags & TUNNEL_SEQ)
+ flags |= GRE_SEQ;
+ if (tflags & TUNNEL_STRICT)
+ flags |= GRE_STRICT;
+ if (tflags & TUNNEL_REC)
+ flags |= GRE_REC;
+ if (tflags & TUNNEL_VERSION)
+ flags |= GRE_VERSION;
+
+ return flags;
+}
+
#endif
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index ebdef7f60862..4da5de10d1d4 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -3,6 +3,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
#include <linux/ip6_tunnel.h>
#define IP6TUNNEL_ERR_TIMEO (30*HZ)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
new file mode 100644
index 000000000000..4b6f0b28f41f
--- /dev/null
+++ b/include/net/ip_tunnels.h
@@ -0,0 +1,177 @@
+#ifndef __NET_IP_TUNNELS_H
+#define __NET_IP_TUNNELS_H 1
+
+#include <linux/if_tunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/u64_stats_sync.h>
+#include <net/dsfield.h>
+#include <net/gro_cells.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#endif
+
+/* Keep error state on tunnel for 30 sec */
+#define IPTUNNEL_ERR_TIMEO (30*HZ)
+
+/* 6rd prefix/relay information */
+#ifdef CONFIG_IPV6_SIT_6RD
+struct ip_tunnel_6rd_parm {
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+ u16 prefixlen;
+ u16 relay_prefixlen;
+};
+#endif
+
+struct ip_tunnel_prl_entry {
+ struct ip_tunnel_prl_entry __rcu *next;
+ __be32 addr;
+ u16 flags;
+ struct rcu_head rcu_head;
+};
+
+struct ip_tunnel {
+ struct ip_tunnel __rcu *next;
+ struct hlist_node hash_node;
+ struct net_device *dev;
+
+ int err_count; /* Number of arrived ICMP errors */
+ unsigned long err_time; /* Time when the last ICMP error
+ * arrived */
+
+ /* These four fields used only by GRE */
+ __u32 i_seqno; /* The last seen seqno */
+ __u32 o_seqno; /* The last output seqno */
+ int hlen; /* Precalculated header length */
+ int mlink;
+
+ struct ip_tunnel_parm parms;
+
+ /* for SIT */
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd_parm ip6rd;
+#endif
+ struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
+ unsigned int prl_count; /* # of entries in PRL */
+ int ip_tnl_net_id;
+ struct gro_cells gro_cells;
+};
+
+#define TUNNEL_CSUM __cpu_to_be16(0x01)
+#define TUNNEL_ROUTING __cpu_to_be16(0x02)
+#define TUNNEL_KEY __cpu_to_be16(0x04)
+#define TUNNEL_SEQ __cpu_to_be16(0x08)
+#define TUNNEL_STRICT __cpu_to_be16(0x10)
+#define TUNNEL_REC __cpu_to_be16(0x20)
+#define TUNNEL_VERSION __cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
+
+struct tnl_ptk_info {
+ __be16 flags;
+ __be16 proto;
+ __be32 key;
+ __be32 seq;
+};
+
+#define PACKET_RCVD 0
+#define PACKET_REJECT 1
+
+#define IP_TNL_HASH_BITS 10
+#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS)
+
+struct ip_tunnel_net {
+ struct hlist_head *tunnels;
+ struct net_device *fb_tunnel_dev;
+};
+
+int ip_tunnel_init(struct net_device *dev);
+void ip_tunnel_uninit(struct net_device *dev);
+void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
+int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
+ struct rtnl_link_ops *ops, char *devname);
+
+void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn);
+
+void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ const struct iphdr *tnl_params);
+int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
+
+struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot);
+struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
+ int link, __be16 flags,
+ __be32 remote, __be32 local,
+ __be32 key);
+
+int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi, bool log_ecn_error);
+int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct ip_tunnel_parm *p);
+int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
+ struct ip_tunnel_parm *p);
+void ip_tunnel_setup(struct net_device *dev, int net_id);
+
+/* Extract dsfield from inner protocol */
+static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return iph->tos;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return ipv6_get_dsfield((const struct ipv6hdr *)iph);
+ else
+ return 0;
+}
+
+/* Propogate ECN bits out */
+static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ u8 inner = ip_tunnel_get_dsfield(iph, skb);
+
+ return INET_ECN_encapsulate(tos, inner);
+}
+
+static inline void tunnel_ip_select_ident(struct sk_buff *skb,
+ const struct iphdr *old_iph,
+ struct dst_entry *dst)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ /* Use inner packet iph-id if possible. */
+ if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
+ iph->id = old_iph->id;
+ else
+ __ip_select_ident(iph, dst,
+ (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+}
+
+static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ int err;
+ int pkt_len = skb->len - skb_transport_offset(skb);
+ struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
+
+ nf_reset(skb);
+
+ err = ip_local_out(skb);
+ if (likely(net_xmit_eval(err) == 0)) {
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += pkt_len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ dev->stats.tx_errors++;
+ dev->stats.tx_aborted_errors++;
+ }
+}
+#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/ipip.h b/include/net/ipip.h
deleted file mode 100644
index 483b91a10bb2..000000000000
--- a/include/net/ipip.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef __NET_IPIP_H
-#define __NET_IPIP_H 1
-
-#include <linux/if_tunnel.h>
-#include <net/gro_cells.h>
-#include <net/ip.h>
-
-/* Keep error state on tunnel for 30 sec */
-#define IPTUNNEL_ERR_TIMEO (30*HZ)
-
-/* 6rd prefix/relay information */
-struct ip_tunnel_6rd_parm {
- struct in6_addr prefix;
- __be32 relay_prefix;
- u16 prefixlen;
- u16 relay_prefixlen;
-};
-
-struct ip_tunnel {
- struct ip_tunnel __rcu *next;
- struct net_device *dev;
-
- int err_count; /* Number of arrived ICMP errors */
- unsigned long err_time; /* Time when the last ICMP error arrived */
-
- /* These four fields used only by GRE */
- __u32 i_seqno; /* The last seen seqno */
- __u32 o_seqno; /* The last output seqno */
- int hlen; /* Precalculated GRE header length */
- int mlink;
-
- struct ip_tunnel_parm parms;
-
- /* for SIT */
-#ifdef CONFIG_IPV6_SIT_6RD
- struct ip_tunnel_6rd_parm ip6rd;
-#endif
- struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
- unsigned int prl_count; /* # of entries in PRL */
-
- struct gro_cells gro_cells;
-};
-
-struct ip_tunnel_prl_entry {
- struct ip_tunnel_prl_entry __rcu *next;
- __be32 addr;
- u16 flags;
- struct rcu_head rcu_head;
-};
-
-static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- int err;
- int pkt_len = skb->len - skb_transport_offset(skb);
- struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
-
- nf_reset(skb);
-
- err = ip_local_out(skb);
- if (likely(net_xmit_eval(err) == 0)) {
- u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- u64_stats_update_end(&tstats->syncp);
- } else {
- dev->stats.tx_errors++;
- dev->stats.tx_aborted_errors++;
- }
-}
-
-static inline void tunnel_ip_select_ident(struct sk_buff *skb,
- const struct iphdr *old_iph,
- struct dst_entry *dst)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- /* Use inner packet iph-id if possible. */
- if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
- iph->id = old_iph->id;
- else
- __ip_select_ident(iph, dst,
- (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-}
-#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7944df768454..8603ca827104 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,6 +166,7 @@ config IP_PNP_RARP
config NET_IPIP
tristate "IP: tunneling"
select INET_TUNNEL
+ select NET_IP_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX
This is helper module to demultiplex GRE packets on GRE version field criteria.
Required by ip_gre and pptp modules.
+config NET_IP_TUNNEL
+ tristate
+ default n
+
config NET_IPGRE
tristate "IP: GRE tunnels over IP"
depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
+ select NET_IP_TUNNEL
help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -313,6 +319,7 @@ config SYN_COOKIES
config NET_IPVTI
tristate "Virtual (secure) IP: tunneling"
select INET_TUNNEL
+ select NET_IP_TUNNEL
depends on INET_XFRM_MODE_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15ca63ec604e..089cb9f36387 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \
fib_frontend.o fib_semantics.o fib_trie.o \
inet_fragment.o ping.o
+obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70b2d4cf5801..93824c57b108 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -111,7 +111,6 @@
#include <net/sock.h>
#include <net/raw.h>
#include <net/icmp.h>
-#include <net/ipip.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 7a4c710c4cdd..d2d5a99fba09 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -27,11 +27,6 @@
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
static DEFINE_SPINLOCK(gre_proto_lock);
-struct gre_base_hdr {
- __be16 flags;
- __be16 protocol;
-};
-#define GRE_HEADER_SECTION 4
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
{
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2e94289a17e8..ad662e906f7e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -37,7 +37,7 @@
#include <net/ip.h>
#include <net/icmp.h>
#include <net/protocol.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/arp.h>
#include <net/checksum.h>
#include <net/dsfield.h>
@@ -108,15 +108,6 @@
fatal route to network, even if it were you who configured
fatal static route: you are innocent. :-)
-
-
- 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
- practically identical code. It would be good to glue them
- together, but it is not very evident, how to make them modular.
- sit is integral part of IPv6, ipip and gre are naturally modular.
- We could extract common parts (hash table, ioctl etc)
- to a separate module (ip_tunnel.c).
-
Alexey Kuznetsov.
*/
@@ -126,400 +117,135 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
-static void ipgre_tunnel_setup(struct net_device *dev);
-static int ipgre_tunnel_bind_dev(struct net_device *dev);
-
-/* Fallback tunnel: no source, no destination, no key, no options */
-
-#define HASH_SIZE 16
static int ipgre_net_id __read_mostly;
-struct ipgre_net {
- struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
-
- struct net_device *fb_tunnel_dev;
-};
-
-/* Tunnel hash table */
-
-/*
- 4 hash tables:
-
- 3: (remote,local)
- 2: (remote,*)
- 1: (*,local)
- 0: (*,*)
+static int gre_tap_net_id __read_mostly;
- We require exact key match i.e. if a key is present in packet
- it will match only tunnel with the same key; if it is not present,
- it will match only keyless tunnel.
-
- All keysless packets, if not matched configured keyless tunnels
- will match fallback tunnel.
- */
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+ __sum16 csum = 0;
-#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ csum = csum_fold(skb->csum);
-#define tunnels_r_l tunnels[3]
-#define tunnels_r tunnels[2]
-#define tunnels_l tunnels[1]
-#define tunnels_wc tunnels[0]
+ if (!csum)
+ break;
+ /* Fall through. */
-static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ break;
}
- tot->multicast = dev->stats.multicast;
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- return tot;
+ return csum;
}
-/* Does key in tunnel parameters match packet */
-static bool ipgre_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
+static int ip_gre_calc_hlen(__be16 o_flags)
{
- if (p->i_flags & GRE_KEY) {
- if (flags & GRE_KEY)
- return key == p->i_key;
- else
- return false; /* key expected, none present */
- } else
- return !(flags & GRE_KEY);
-}
+ int addend = 4;
-/* Given src, dst and key, find appropriate for input tunnel. */
+ if (o_flags&TUNNEL_CSUM)
+ addend += 4;
+ if (o_flags&TUNNEL_KEY)
+ addend += 4;
+ if (o_flags&TUNNEL_SEQ)
+ addend += 4;
+ return addend;
+}
-static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
- __be32 remote, __be32 local,
- __be16 flags, __be32 key,
- __be16 gre_proto)
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err, int *hdr_len)
{
- struct net *net = dev_net(dev);
- int link = dev->ifindex;
- unsigned int h0 = HASH(remote);
- unsigned int h1 = HASH(key);
- struct ip_tunnel *t, *cand = NULL;
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
- ARPHRD_ETHER : ARPHRD_IPGRE;
- int score, cand_score = 4;
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
- if (local != t->parms.iph.saddr ||
- remote != t->parms.iph.daddr ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!ipgre_key_match(&t->parms, flags, key))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
- if (remote != t->parms.iph.daddr ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!ipgre_key_match(&t->parms, flags, key))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ struct iphdr *iph = ip_hdr(skb);
+ struct gre_base_hdr *greh;
+ __be32 *options;
- for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!ipgre_key_match(&t->parms, flags, key))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+ return -EINVAL;
- for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
- if (t->parms.i_key != key ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
- if (cand != NULL)
- return cand;
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ *hdr_len = ip_gre_calc_hlen(tpi->flags);
- dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
- return netdev_priv(dev);
+ if (!pskb_may_pull(skb, *hdr_len))
+ return -EINVAL;
- return NULL;
-}
+ tpi->proto = greh->protocol;
-static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
- struct ip_tunnel_parm *parms)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- __be32 key = parms->i_key;
- unsigned int h = HASH(key);
- int prio = 0;
-
- if (local)
- prio |= 1;
- if (remote && !ipv4_is_multicast(remote)) {
- prio |= 2;
- h ^= HASH(remote);
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (check_checksum(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+ options++;
}