diff options
| author | David S. Miller <davem@davemloft.net> | 2013-03-26 12:27:27 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2013-03-26 12:27:27 -0400 |
| commit | dbbd136c7dbfe0fba989f337c35f976ef432d1d5 (patch) | |
| tree | 8487c6d6f12951178a093c95bf72d6a7915884c6 | |
| parent | eaac5f3d3ad33547b299935e6db0cfc7be9a576a (diff) | |
| parent | f61dd388a9b76f273bb0de9786600fd64e34ba09 (diff) | |
| download | linux-dbbd136c7dbfe0fba989f337c35f976ef432d1d5.tar.gz linux-dbbd136c7dbfe0fba989f337c35f976ef432d1d5.tar.bz2 linux-dbbd136c7dbfe0fba989f337c35f976ef432d1d5.zip | |
Merge branch 'tunnels'
Pravin B Shelar says:
====================
Following patch series restructure GRE and IPIP tunneling code
to make it modular. It adds ip_tunnel module which acts as
generic tunneling layer which has common code.
These patches do not change any functionality.
v3:v4:
- Fixed compilation error in ipv6.
- Few coding style fixes.
v2-v3:
- Use GPL exports for all export symbols.
- Set default config NET_IP_TUNNEL to m.
v1-v2:
- Dropped patch to convert gre_proto_lock to rtnl lock.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | drivers/net/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/net/vxlan.c | 100 | ||||
| -rw-r--r-- | include/net/gre.h | 51 | ||||
| -rw-r--r-- | include/net/ip6_tunnel.h | 1 | ||||
| -rw-r--r-- | include/net/ip_tunnels.h | 177 | ||||
| -rw-r--r-- | include/net/ipip.h | 84 | ||||
| -rw-r--r-- | net/ipv4/Kconfig | 7 | ||||
| -rw-r--r-- | net/ipv4/Makefile | 1 | ||||
| -rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
| -rw-r--r-- | net/ipv4/gre.c | 5 | ||||
| -rw-r--r-- | net/ipv4/ip_gre.c | 1504 | ||||
| -rw-r--r-- | net/ipv4/ip_tunnel.c | 1035 | ||||
| -rw-r--r-- | net/ipv4/ip_vti.c | 42 | ||||
| -rw-r--r-- | net/ipv4/ipip.c | 748 | ||||
| -rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
| -rw-r--r-- | net/ipv6/Kconfig | 2 | ||||
| -rw-r--r-- | net/ipv6/af_inet6.c | 1 | ||||
| -rw-r--r-- | net/ipv6/ip6_gre.c | 45 | ||||
| -rw-r--r-- | net/ipv6/ip6_tunnel.c | 1 | ||||
| -rw-r--r-- | net/ipv6/sit.c | 39 |
20 files changed, 1668 insertions, 2179 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 87f1d39ca551..3835321b8cf3 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -151,6 +151,7 @@ config MACVTAP config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" depends on INET + select NET_IP_TUNNEL ---help--- This allows one to create vxlan virtual interfaces that provide Layer 2 Networks over Layer 3 Networks. VXLAN is often used diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 33427fd62515..7624ab1f7b03 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -33,7 +33,7 @@ #include <net/arp.h> #include <net/ndisc.h> #include <net/ip.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/icmp.h> #include <net/udp.h> #include <net/rtnetlink.h> @@ -101,20 +101,10 @@ struct vxlan_fdb { u8 eth_addr[ETH_ALEN]; }; -/* Per-cpu network traffic stats */ -struct vxlan_stats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - /* Pseudo network device */ struct vxlan_dev { struct hlist_node hlist; struct net_device *dev; - struct vxlan_stats __percpu *stats; __u32 vni; /* virtual network id */ __be32 gaddr; /* multicast group */ __be32 saddr; /* source address */ @@ -667,7 +657,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct iphdr *oip; struct vxlanhdr *vxh; struct vxlan_dev *vxlan; - struct vxlan_stats *stats; + struct pcpu_tstats *stats; __u32 vni; int err; @@ -743,7 +733,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } } - stats = this_cpu_ptr(vxlan->stats); + stats = this_cpu_ptr(vxlan->dev->tstats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len; @@ -874,28 +864,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -/* Extract dsfield from inner protocol */ -static inline u8 vxlan_get_dsfield(const struct iphdr *iph, - const struct sk_buff *skb) -{ - if (skb->protocol == htons(ETH_P_IP)) - return iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - return ipv6_get_dsfield((const struct ipv6hdr *)iph); - else - return 0; -} - -/* Propogate ECN bits out */ -static inline u8 vxlan_ecn_encap(u8 tos, - const struct iphdr *iph, - const struct sk_buff *skb) -{ - u8 inner = vxlan_get_dsfield(iph, skb); - - return INET_ECN_encapsulate(tos, inner); -} - static void vxlan_sock_free(struct sk_buff *skb) { sock_put(skb->sk); @@ -974,8 +942,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, /* short-circuited back to local bridge */ if (netif_rx(skb) == NET_RX_SUCCESS) { - struct vxlan_stats *stats = - this_cpu_ptr(vxlan->stats); + struct pcpu_tstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; @@ -1007,7 +974,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = vxlan->tos; if (tos == 1) - tos = vxlan_get_dsfield(old_iph, skb); + tos = ip_tunnel_get_dsfield(old_iph, skb); src_port = vxlan_src_port(vxlan, skb); @@ -1058,7 +1025,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; iph->protocol = IPPROTO_UDP; - iph->tos = vxlan_ecn_encap(tos, old_iph, skb); + iph->tos = ip_tunnel_ecn_encap(tos, old_iph, skb); iph->daddr = dst; iph->saddr = fl4.saddr; iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); @@ -1183,10 +1150,8 @@ static void vxlan_cleanup(unsigned long arg) /* Setup stats when device is created */ static int vxlan_init(struct net_device *dev) { - struct vxlan_dev *vxlan = netdev_priv(dev); - - vxlan->stats = alloc_percpu(struct vxlan_stats); - if (!vxlan->stats) + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) return -ENOMEM; return 0; @@ -1242,49 +1207,6 @@ static int vxlan_stop(struct net_device *dev) return 0; } -/* Merge per-cpu statistics */ -static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_stats tmp, sum = { 0 }; - unsigned int cpu; - - for_each_possible_cpu(cpu) { - unsigned int start; - const struct vxlan_stats *stats - = per_cpu_ptr(vxlan->stats, cpu); - - do { - start = u64_stats_fetch_begin_bh(&stats->syncp); - memcpy(&tmp, stats, sizeof(tmp)); - } while (u64_stats_fetch_retry_bh(&stats->syncp, start)); - - sum.tx_bytes += tmp.tx_bytes; - sum.tx_packets += tmp.tx_packets; - sum.rx_bytes += tmp.rx_bytes; - sum.rx_packets += tmp.rx_packets; - } - - stats->tx_bytes = sum.tx_bytes; - stats->tx_packets = sum.tx_packets; - stats->rx_bytes = sum.rx_bytes; - stats->rx_packets = sum.rx_packets; - - stats->multicast = dev->stats.multicast; - stats->rx_length_errors = dev->stats.rx_length_errors; - stats->rx_frame_errors = dev->stats.rx_frame_errors; - stats->rx_errors = dev->stats.rx_errors; - - stats->tx_dropped = dev->stats.tx_dropped; - stats->tx_carrier_errors = dev->stats.tx_carrier_errors; - stats->tx_aborted_errors = dev->stats.tx_aborted_errors; - stats->collisions = dev->stats.collisions; - stats->tx_errors = dev->stats.tx_errors; - - return stats; -} - /* Stub, nothing needs to be done. */ static void vxlan_set_multicast_list(struct net_device *dev) { @@ -1295,7 +1217,7 @@ static const struct net_device_ops vxlan_netdev_ops = { .ndo_open = vxlan_open, .ndo_stop = vxlan_stop, .ndo_start_xmit = vxlan_xmit, - .ndo_get_stats64 = vxlan_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_set_rx_mode = vxlan_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -1312,9 +1234,7 @@ static struct device_type vxlan_type = { static void vxlan_free(struct net_device *dev) { - struct vxlan_dev *vxlan = netdev_priv(dev); - - free_percpu(vxlan->stats); + free_percpu(dev->tstats); free_netdev(dev); } diff --git a/include/net/gre.h b/include/net/gre.h index 82665474bcb7..9f03a390c826 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -2,6 +2,7 @@ #define __LINUX_GRE_H #include <linux/skbuff.h> +#include <net/ip_tunnels.h> #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 @@ -12,7 +13,57 @@ struct gre_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); }; +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 + int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); +static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + #endif diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index ebdef7f60862..4da5de10d1d4 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -3,6 +3,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> +#include <linux/if_tunnel.h> #include <linux/ip6_tunnel.h> #define IP6TUNNEL_ERR_TIMEO (30*HZ) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h new file mode 100644 index 000000000000..4b6f0b28f41f --- /dev/null +++ b/include/net/ip_tunnels.h @@ -0,0 +1,177 @@ +#ifndef __NET_IP_TUNNELS_H +#define __NET_IP_TUNNELS_H 1 + +#include <linux/if_tunnel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/u64_stats_sync.h> +#include <net/dsfield.h> +#include <net/gro_cells.h> +#include <net/inet_ecn.h> +#include <net/ip.h> +#include <net/rtnetlink.h> + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#endif + +/* Keep error state on tunnel for 30 sec */ +#define IPTUNNEL_ERR_TIMEO (30*HZ) + +/* 6rd prefix/relay information */ +#ifdef CONFIG_IPV6_SIT_6RD +struct ip_tunnel_6rd_parm { + struct in6_addr prefix; + __be32 relay_prefix; + u16 prefixlen; + u16 relay_prefixlen; +}; +#endif + +struct ip_tunnel_prl_entry { + struct ip_tunnel_prl_entry __rcu *next; + __be32 addr; + u16 flags; + struct rcu_head rcu_head; +}; + +struct ip_tunnel { + struct ip_tunnel __rcu *next; + struct hlist_node hash_node; + struct net_device *dev; + + int err_count; /* Number of arrived ICMP errors */ + unsigned long err_time; /* Time when the last ICMP error + * arrived */ + + /* These four fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* Precalculated header length */ + int mlink; + + struct ip_tunnel_parm parms; + + /* for SIT */ +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd_parm ip6rd; +#endif + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ + unsigned int prl_count; /* # of entries in PRL */ + int ip_tnl_net_id; + struct gro_cells gro_cells; +}; + +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) + +struct tnl_ptk_info { + __be16 flags; + __be16 proto; + __be32 key; + __be32 seq; +}; + +#define PACKET_RCVD 0 +#define PACKET_REJECT 1 + +#define IP_TNL_HASH_BITS 10 +#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) + +struct ip_tunnel_net { + struct hlist_head *tunnels; + struct net_device *fb_tunnel_dev; +}; + +int ip_tunnel_init(struct net_device *dev); +void ip_tunnel_uninit(struct net_device *dev); +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname); + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params); +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); + +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot); +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key); + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error); +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +void ip_tunnel_setup(struct net_device *dev, int net_id); + +/* Extract dsfield from inner protocol */ +static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_get_dsfield((const struct ipv6hdr *)iph); + else + return 0; +} + +/* Propogate ECN bits out */ +static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, + const struct sk_buff *skb) +{ + u8 inner = ip_tunnel_get_dsfield(iph, skb); + + return INET_ECN_encapsulate(tos, inner); +} + +static inline void tunnel_ip_select_ident(struct sk_buff *skb, + const struct iphdr *old_iph, + struct dst_entry *dst) +{ + struct iphdr *iph = ip_hdr(skb); + + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); +} + +static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err; + int pkt_len = skb->len - skb_transport_offset(skb); + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + + nf_reset(skb); + + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } +} +#endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ipip.h b/include/net/ipip.h deleted file mode 100644 index 483b91a10bb2..000000000000 --- a/include/net/ipip.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef __NET_IPIP_H -#define __NET_IPIP_H 1 - -#include <linux/if_tunnel.h> -#include <net/gro_cells.h> -#include <net/ip.h> - -/* Keep error state on tunnel for 30 sec */ -#define IPTUNNEL_ERR_TIMEO (30*HZ) - -/* 6rd prefix/relay information */ -struct ip_tunnel_6rd_parm { - struct in6_addr prefix; - __be32 relay_prefix; - u16 prefixlen; - u16 relay_prefixlen; -}; - -struct ip_tunnel { - struct ip_tunnel __rcu *next; - struct net_device *dev; - - int err_count; /* Number of arrived ICMP errors */ - unsigned long err_time; /* Time when the last ICMP error arrived */ - - /* These four fields used only by GRE */ - __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ - int hlen; /* Precalculated GRE header length */ - int mlink; - - struct ip_tunnel_parm parms; - - /* for SIT */ -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd_parm ip6rd; -#endif - struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ - unsigned int prl_count; /* # of entries in PRL */ - - struct gro_cells gro_cells; -}; - -struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry __rcu *next; - __be32 addr; - u16 flags; - struct rcu_head rcu_head; -}; - -static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) -{ - int err; - int pkt_len = skb->len - skb_transport_offset(skb); - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - - nf_reset(skb); - - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } -} - -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); -} -#endif diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7944df768454..8603ca827104 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,6 +166,7 @@ config IP_PNP_RARP config NET_IPIP tristate "IP: tunneling" select INET_TUNNEL + select NET_IP_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX This is helper module to demultiplex GRE packets on GRE version field criteria. Required by ip_gre and pptp modules. +config NET_IP_TUNNEL + tristate + default n + config NET_IPGRE tristate "IP: GRE tunnels over IP" depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -313,6 +319,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" select INET_TUNNEL + select NET_IP_TUNNEL depends on INET_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15ca63ec604e..089cb9f36387 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o +obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70b2d4cf5801..93824c57b108 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -111,7 +111,6 @@ #include <net/sock.h> #include <net/raw.h> #include <net/icmp.h> -#include <net/ipip.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/net_namespace.h> diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 7a4c710c4cdd..d2d5a99fba09 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -27,11 +27,6 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2e94289a17e8..ad662e906f7e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -37,7 +37,7 @@ #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -108,15 +108,6 @@ fatal route to network, even if it were you who configured fatal static route: you are innocent. :-) - - - 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain - practically identical code. It would be good to glue them - together, but it is not very evident, how to make them modular. - sit is integral part of IPv6, ipip and gre are naturally modular. - We could extract common parts (hash table, ioctl etc) - to a separate module (ip_tunnel.c). - Alexey Kuznetsov. */ @@ -126,400 +117,135 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); -static void ipgre_tunnel_setup(struct net_device *dev); -static int ipgre_tunnel_bind_dev(struct net_device *dev); - -/* Fallback tunnel: no source, no destination, no key, no options */ - -#define HASH_SIZE 16 static int ipgre_net_id __read_mostly; -struct ipgre_net { - struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; - - struct net_device *fb_tunnel_dev; -}; - -/* Tunnel hash table */ - -/* - 4 hash tables: - - 3: (remote,local) - 2: (remote,*) - 1: (*,local) - 0: (*,*) +static int gre_tap_net_id __read_mostly; - We require exact key match i.e. if a key is present in packet - it will match only tunnel with the same key; if it is not present, - it will match only keyless tunnel. - - All keysless packets, if not matched configured keyless tunnels - will match fallback tunnel. - */ +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); -#define tunnels_r_l tunnels[3] -#define tunnels_r tunnels[2] -#define tunnels_l tunnels[1] -#define tunnels_wc tunnels[0] + if (!csum) + break; + /* Fall through. */ -static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; } - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; + return csum; } -/* Does key in tunnel parameters match packet */ -static bool ipgre_key_match(const struct ip_tunnel_parm *p, - __be16 flags, __be32 key) +static int ip_gre_calc_hlen(__be16 o_flags) { - if (p->i_flags & GRE_KEY) { - if (flags & GRE_KEY) - return key == p->i_key; - else - return false; /* key expected, none present */ - } else - return !(flags & GRE_KEY); -} + int addend = 4; -/* Given src, dst and key, find appropriate for input tunnel. */ + if (o_flags&TUNNEL_CSUM) + addend += 4; + if (o_flags&TUNNEL_KEY) + addend += 4; + if (o_flags&TUNNEL_SEQ) + addend += 4; + return addend; +} -static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, - __be32 remote, __be32 local, - __be16 flags, __be32 key, - __be16 gre_proto) +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *hdr_len) { - struct net *net = dev_net(dev); - int link = dev->ifindex; - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(key); - struct ip_tunnel *t, *cand = NULL; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? - ARPHRD_ETHER : ARPHRD_IPGRE; - int score, cand_score = 4; - - for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { - if (local != t->parms.iph.saddr || - remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } - - for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { - if (remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + struct iphdr *iph = ip_hdr(skb); + struct gre_base_hdr *greh; + __be32 *options; - for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; - for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { - if (t->parms.i_key != key || - !(t->dev->flags & IFF_UP)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; - if (cand != NULL) - return cand; + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + *hdr_len = ip_gre_calc_hlen(tpi->flags); - dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) - return netdev_priv(dev); + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; - return NULL; -} + tpi->proto = greh->protocol; -static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - unsigned int h = HASH(key); - int prio = 0; - - if (local) - prio |= 1; - if (remote && !ipv4_is_multicast(remote)) { - prio |= 2; - h ^= HASH(remote); + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; } |
