From ccf3c8c3fe1bd4828556650ae7928da6ffb4aaf6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 12 Oct 2015 11:47:07 -0700 Subject: net: Add IPv6 support to l3mdev Add operations to retrieve cached IPv6 dst entry from l3mdev device and lookup IPv6 source address. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 44a19a171104..774d85b2d5d9 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -19,14 +19,22 @@ * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * * @l3mdev_get_saddr: Get source address for a flow + * + * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device */ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); + + /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); void (*l3mdev_get_saddr)(struct net_device *dev, struct flowi4 *fl4); + + /* IPv6 ops */ + struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + const struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -123,6 +131,31 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex, } } +static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rt6_dst) + return dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + + return NULL; +} + +static inline +struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, + const struct flowi6 *fl6) +{ + struct dst_entry *dst = NULL; + struct net_device *dev; + + dev = dev_get_by_index(net, fl6->flowi6_oif); + if (dev) { + dst = l3mdev_get_rt6_dst(dev, fl6); + dev_put(dev); + } + + return dst; +} + #else static inline int l3mdev_master_ifindex_rcu(struct net_device *dev) @@ -171,6 +204,19 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) { } + +static inline +struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + return NULL; +} +static inline +struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, + const struct flowi6 *fl6) +{ + return NULL; +} #endif #endif /* _NET_L3MDEV_H_ */ -- cgit v1.2.3 From c4850687783717fa854554965c4bc85625d0e4a8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 12 Oct 2015 11:47:08 -0700 Subject: net: Export fib6_get_table and nd_tbl Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 1 + net/ipv6/ndisc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7d2e0023c72d..09fddf70cca4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return NULL; } +EXPORT_SYMBOL_GPL(fib6_get_table); static void __net_init fib6_tables_init(struct net *net) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b18012f9f9fc..9f8a82488223 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -147,6 +147,7 @@ struct neigh_table nd_tbl = { .gc_thresh2 = 512, .gc_thresh3 = 1024, }; +EXPORT_SYMBOL_GPL(nd_tbl); static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { -- cgit v1.2.3 From 35402e31366349a32b505afdfe856aeeb8d939a0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 12 Oct 2015 11:47:09 -0700 Subject: net: Add IPv6 support to VRF device Add support for IPv6 to VRF device driver. Implemenation parallels what has been done for IPv4. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/Kconfig | 4 +- drivers/net/vrf.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 275 insertions(+), 2 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b9ebd0d18a52..f184fb5bd110 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -298,8 +298,10 @@ config NLMON config NET_VRF tristate "Virtual Routing and Forwarding (Lite)" - depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES + depends on IP_MULTIPLE_TABLES depends on NET_L3_MASTER_DEV + depends on IPV6 || IPV6=n + depends on IPV6_MULTIPLE_TABLES || IPV6=n ---help--- This option enables the support for mapping interfaces into VRF's. The support enables VRF devices. diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 191579aeab16..92fa3e1ea65c 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,7 @@ struct slave_queue { struct net_vrf { struct slave_queue queue; struct rtable *rth; + struct rt6_info *rt6; u32 tb_id; }; @@ -104,12 +106,56 @@ static struct dst_ops vrf_dst_ops = { .default_advmss = vrf_default_advmss, }; +/* neighbor handling is done with actual device; do not want + * to flip skb->dev for those ndisc packets. This really fails + * for multiple next protocols (e.g., NEXTHDR_HOP). But it is + * a start. + */ +#if IS_ENABLED(CONFIG_IPV6) +static bool check_ipv6_frame(const struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data; + size_t hlen = sizeof(*ipv6h); + bool rc = true; + + if (skb->len < hlen) + goto out; + + if (ipv6h->nexthdr == NEXTHDR_ICMP) { + const struct icmp6hdr *icmph; + + if (skb->len < hlen + sizeof(*icmph)) + goto out; + + icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h)); + switch (icmph->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + rc = false; + break; + } + } + +out: + return rc; +} +#else +static bool check_ipv6_frame(const struct sk_buff *skb) +{ + return false; +} +#endif + static bool is_ip_rx_frame(struct sk_buff *skb) { switch (skb->protocol) { case htons(ETH_P_IP): - case htons(ETH_P_IPV6): return true; + case htons(ETH_P_IPV6): + return check_ipv6_frame(skb); } return false; } @@ -169,12 +215,53 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, return stats; } +#if IS_ENABLED(CONFIG_IPV6) +static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, + struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct flowi6 fl6 = { + /* needed to match OIF rule */ + .flowi6_oif = dev->ifindex, + .flowi6_iif = LOOPBACK_IFINDEX, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, + .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + }; + int ret = NET_XMIT_DROP; + struct dst_entry *dst; + struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst == dst_null) + goto err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + ret = ip6_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(ret))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + + return ret; +err: + vrf_tx_error(dev, skb); + return NET_XMIT_DROP; +} +#else static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { vrf_tx_error(dev, skb); return NET_XMIT_DROP; } +#endif static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4, struct net_device *vrf_dev) @@ -269,6 +356,157 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie) +{ + return dst; +} + +static struct dst_ops vrf_dst_ops6 = { + .family = AF_INET6, + .local_out = ip6_local_out, + .check = vrf_ip6_check, + .mtu = vrf_v4_mtu, + .destroy = vrf_dst_destroy, + .default_advmss = vrf_default_advmss, +}; + +static int init_dst_ops6_kmem_cachep(void) +{ + vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache", + sizeof(struct rt6_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + + if (!vrf_dst_ops6.kmem_cachep) + return -ENOMEM; + + return 0; +} + +static void free_dst_ops6_kmem_cachep(void) +{ + kmem_cache_destroy(vrf_dst_ops6.kmem_cachep); +} + +static int vrf_input6(struct sk_buff *skb) +{ + skb->dev->stats.rx_errors++; + kfree_skb(skb); + return 0; +} + +/* modelled after ip6_finish_output2 */ +static int vrf_finish_output6(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct neighbour *neigh; + struct in6_addr *nexthop; + int ret; + + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); + + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(skb); + return -EINVAL; +} + +/* modelled after ip6_output */ +static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb_dst(skb)->dev, + vrf_finish_output6, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); +} + +static void vrf_rt6_destroy(struct net_vrf *vrf) +{ + dst_destroy(&vrf->rt6->dst); + free_percpu(vrf->rt6->rt6i_pcpu); + vrf->rt6 = NULL; +} + +static int vrf_rt6_create(struct net_device *dev) +{ + struct net_vrf *vrf = netdev_priv(dev); + struct dst_entry *dst; + struct rt6_info *rt6; + int cpu; + int rc = -ENOMEM; + + rt6 = dst_alloc(&vrf_dst_ops6, dev, 0, + DST_OBSOLETE_NONE, + (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); + if (!rt6) + goto out; + + dst = &rt6->dst; + + rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL); + if (!rt6->rt6i_pcpu) { + dst_destroy(dst); + goto out; + } + for_each_possible_cpu(cpu) { + struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu); + *p = NULL; + } + + memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst)); + + INIT_LIST_HEAD(&rt6->rt6i_siblings); + INIT_LIST_HEAD(&rt6->rt6i_uncached); + + rt6->dst.input = vrf_input6; + rt6->dst.output = vrf_output6; + + rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id); + + atomic_set(&rt6->dst.__refcnt, 2); + + vrf->rt6 = rt6; + rc = 0; +out: + return rc; +} +#else +static int init_dst_ops6_kmem_cachep(void) +{ + return 0; +} + +static void free_dst_ops6_kmem_cachep(void) +{ +} + +static void vrf_rt6_destroy(struct net_vrf *vrf) +{ +} + +static int vrf_rt6_create(struct net_device *dev) +{ + return 0; +} +#endif + /* modelled after ip_finish_output2 */ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -490,6 +728,7 @@ static void vrf_dev_uninit(struct net_device *dev) struct slave *slave, *next; vrf_rtable_destroy(vrf); + vrf_rt6_destroy(vrf); list_for_each_entry_safe(slave, next, head, list) vrf_del_slave(dev, slave->dev); @@ -513,10 +752,15 @@ static int vrf_dev_init(struct net_device *dev) if (!vrf->rth) goto out_stats; + if (vrf_rt6_create(dev) != 0) + goto out_rth; + dev->flags = IFF_MASTER | IFF_NOARP; return 0; +out_rth: + vrf_rtable_destroy(vrf); out_stats: free_percpu(dev->dstats); dev->dstats = NULL; @@ -586,10 +830,30 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) fl4->flowi4_scope = scope; } +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + struct rt6_info *rt = NULL; + + if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { + struct net_vrf *vrf = netdev_priv(dev); + + rt = vrf->rt6; + atomic_inc(&rt->dst.__refcnt); + } + + return (struct dst_entry *)rt; +} +#endif + static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, +#if IS_ENABLED(CONFIG_IPV6) + .l3mdev_get_rt6_dst = vrf_get_rt6_dst, +#endif }; static void vrf_get_drvinfo(struct net_device *dev, @@ -731,6 +995,10 @@ static int __init vrf_init_module(void) if (!vrf_dst_ops.kmem_cachep) return -ENOMEM; + rc = init_dst_ops6_kmem_cachep(); + if (rc != 0) + goto error2; + register_netdevice_notifier(&vrf_notifier_block); rc = rtnl_link_register(&vrf_link_ops); @@ -741,6 +1009,8 @@ static int __init vrf_init_module(void) error: unregister_netdevice_notifier(&vrf_notifier_block); + free_dst_ops6_kmem_cachep(); +error2: kmem_cache_destroy(vrf_dst_ops.kmem_cachep); return rc; } @@ -750,6 +1020,7 @@ static void __exit vrf_cleanup_module(void) rtnl_link_unregister(&vrf_link_ops); unregister_netdevice_notifier(&vrf_notifier_block); kmem_cache_destroy(vrf_dst_ops.kmem_cachep); + free_dst_ops6_kmem_cachep(); } module_init(vrf_init_module); -- cgit v1.2.3 From ca254490c8dfdaddb5df8a763774db0f4c5200c3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 12 Oct 2015 11:47:10 -0700 Subject: net: Add VRF support to IPv6 stack As with IPv4 support for VRFs added to IPv6 stack by replacing hardcoded table ids with possibly device specific ones and manipulating the oif in the flowi6. The flow flags are used to skip oif compare in nexthop lookups if the device is enslaved to a VRF via the L3 master device. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 +++++++++--- net/ipv6/icmp.c | 6 +++++- net/ipv6/ip6_output.c | 6 ++++-- net/ipv6/ndisc.c | 26 +++++++++++++++++++++++--- net/ipv6/route.c | 27 ++++++++++++++++++++++----- 5 files changed, 63 insertions(+), 14 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c8380f1876f1..f0326aae7a02 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -2146,7 +2147,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -2179,8 +2180,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; - table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2211,7 +2213,7 @@ out: static void addrconf_add_mroute(struct net_device *dev) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_LOCAL, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL, .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_dst_len = 8, @@ -3029,6 +3031,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { struct in6_addr addr; + /* no link local addresses on L3 master devices */ + if (netif_is_l3_master(idev->dev)) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6c2b2132c8d3..efb1c00f2270 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -68,6 +68,7 @@ #include #include #include +#include #include @@ -496,6 +497,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; @@ -575,7 +579,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = skb->dev->ifindex; + fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32583b507c2e..23f97c4783bb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -55,6 +55,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -885,7 +886,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { dst_release(dst); dst = NULL; } @@ -1037,7 +1039,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; if (!fl6->flowi6_oif) - fl6->flowi6_oif = dst->dev->ifindex; + fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9f8a82488223..3e0f855e1bea 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -442,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; + int oif = l3mdev_fib_oif(skb->dev); - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); + if (oif != skb->dev->ifindex) + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -767,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { - +have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { /* @@ -793,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb) } else { struct net *net = dev_net(dev); + /* perhaps an address on the master device */ + if (netif_is_l3_slave(dev)) { + struct net_device *mdev; + + mdev = netdev_master_upper_dev_get_rcu(dev); + if (mdev) { + ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1); + if (ifp) + goto have_ifp; + } + } + idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ @@ -1484,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct flowi6 fl6; int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1500,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); + &saddr_buf, &ipv6_hdr(skb)->saddr, oif); + + if (oif != skb->dev->ifindex) + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index db5b54ad5912..5fc1149fe91d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -1044,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + oif = 0; + redo_rt6_select: rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) @@ -1141,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = l3mdev_fib_oif(skb->dev), .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -1165,8 +1169,13 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { + struct dst_entry *dst; int flags = 0; + dst = l3mdev_rt6_dst_by_oif(net, fl6); + if (dst) + return dst; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || @@ -2263,7 +2272,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = ifindex, .fc_dst_len = prefixlen, @@ -2274,6 +2282,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; + cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2314,7 +2323,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -2361,7 +2370,8 @@ static void rtmsg_to_fib6_config(struct net *net, { memset(cfg, 0, sizeof(*cfg)); - cfg->fc_table = RT6_TABLE_MAIN; + cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? + : RT6_TABLE_MAIN; cfg->fc_ifindex = rtmsg->rtmsg_ifindex; cfg->fc_metric = rtmsg->rtmsg_metric; cfg->fc_expires = rtmsg->rtmsg_info; @@ -2470,6 +2480,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast) { + u32 tb_id; struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, DST_NOCOUNT); @@ -2492,7 +2503,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); + tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; + rt->rt6i_table = fib6_get_table(net, tb_id); rt->dst.flags |= DST_NOCACHE; atomic_set(&rt->dst.__refcnt, 1); @@ -3254,6 +3266,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; + if (netif_index_is_l3_master(net, oif)) { + fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | + FLOWI_FLAG_SKIP_NH_OIF; + } + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } -- cgit v1.2.3