From fe2a7ce4de07472ace0cdf460a41f462a4621687 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 18 Feb 2009 16:28:35 +0100 Subject: netfilter: change generic l4 protocol number 0 is used by Hop-by-hop header and so this may cause confusion. 255 is stated as 'Reserved' by IANA. Signed-off-by: Christoph Paasch Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 4be80d7b8795..829374f426c4 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -92,7 +92,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, - .l4proto = 0, + .l4proto = 255, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, -- cgit v1.2.3 From fecea3a389c89de9afae2eda74fad894d5677229 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 16:29:08 +0100 Subject: netfilter: remove unneeded goto Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a90ac83c5918..5bb34737501f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -174,7 +174,6 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - goto unlock; } else if (verdict == NF_DROP) { kfree_skb(skb); ret = -EPERM; @@ -183,7 +182,6 @@ next_hook: verdict >> NF_VERDICT_BITS)) goto next_hook; } -unlock: rcu_read_unlock(); return ret; } -- cgit v1.2.3 From 4a2f965ca5a4e2593744bf75425d85e0e8ff814a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:29:44 +0100 Subject: netfilter: x_tables: change elements in x_tables Change to proper type on private pointer rather than anonymous void. Keep active elements on same cache line. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c7ee8744d26b..9fac88fc0e72 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -349,9 +349,6 @@ struct xt_table { struct list_head list; - /* A unique name... */ - const char name[XT_TABLE_MAXNAMELEN]; - /* What hooks you will enter on */ unsigned int valid_hooks; @@ -359,13 +356,15 @@ struct xt_table rwlock_t lock; /* Man behind the curtain... */ - //struct ip6t_table_info *private; - void *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; u_int8_t af; /* address/protocol family */ + + /* A unique name... */ + const char name[XT_TABLE_MAXNAMELEN]; }; #include -- cgit v1.2.3 From 9c8222b9e71b690c8388bb0ebe5c3e5a1469e884 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:30:20 +0100 Subject: netfilter: x_tables: remove unneeded initializations Later patches change the locking on xt_table and the initialization of the lock element is not needed since the lock is always initialized in xt_table_register anyway. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arptable_filter.c | 2 -- net/ipv4/netfilter/iptable_filter.c | 1 - net/ipv4/netfilter/iptable_mangle.c | 1 - net/ipv4/netfilter/iptable_raw.c | 1 - net/ipv4/netfilter/iptable_security.c | 1 - net/ipv4/netfilter/nf_nat_rule.c | 1 - net/ipv6/netfilter/ip6table_filter.c | 1 - net/ipv6/netfilter/ip6table_mangle.c | 1 - net/ipv6/netfilter/ip6table_raw.c | 1 - net/ipv6/netfilter/ip6table_security.c | 1 - 10 files changed, 11 deletions(-) diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index e091187e864f..6ecfdae7c589 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,8 +48,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), - .private = NULL, .me = THIS_MODULE, .af = NFPROTO_ARP, }; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 52cb6939d093..c30a969724f8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -56,7 +56,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3929d20b9e45..4087614d9519 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -67,7 +67,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7f65d18333e3..e5356da1fb54 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -39,7 +39,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index a52a35f4a584..29ab630f240a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -60,7 +60,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index a7eb04719044..6348a793936e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,6 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 40d2e36d8fac..ef5a0a32bf8e 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -54,7 +54,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index d0b31b259d4d..ab0d398a2ba7 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -60,7 +60,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 109fab6f831a..4b792b6ca321 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -38,7 +38,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 20bc52f13e43..0ea37ff15d56 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -59,7 +59,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET6, }; -- cgit v1.2.3 From 842bff366b536787b88c07cbf2416e2cb26cae67 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:30:38 +0100 Subject: netfilter: ebtables: remove unneeded initializations The initialization of the lock element is not needed since the lock is always initialized in ebt_register_table. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/bridge/netfilter/ebtable_broute.c | 1 - net/bridge/netfilter/ebtable_filter.c | 1 - net/bridge/netfilter/ebtable_nat.c | 1 - 3 files changed, 3 deletions(-) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8604dfc1fc3b..c751111440f8 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -46,7 +46,6 @@ static struct ebt_table broute_table = .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = __RW_LOCK_UNLOCKED(broute_table.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 2b2e8040a9c6..a5eea72938a6 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -55,7 +55,6 @@ static struct ebt_table frame_filter = .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_filter.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 3fe1ae87e35f..6024c551f9a9 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -55,7 +55,6 @@ static struct ebt_table frame_nat = .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_nat.lock), .check = check, .me = THIS_MODULE, }; -- cgit v1.2.3 From 55df4ac0c927c7f1f84e6d75532f0ca45d391e64 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 18 Feb 2009 16:30:56 +0100 Subject: netfilter: log invalid new icmpv6 packet with nf_log_packet() This patch adds a logging message for invalid new icmpv6 packet. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index c323643ffcf9..165b256a6fa0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -126,6 +126,10 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); + if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + "nf_ct_icmpv6: invalid new with type %d ", + type + 128); return false; } atomic_set(&ct->proto.icmp.count, 0); -- cgit v1.2.3 From ddc214c43a923e89741e04da2f10e3037a64e222 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2009 17:47:50 +0100 Subject: netfilter: arp_tables: unfold two critical loops in arp_packet_match() x86 and powerpc can perform long word accesses in an efficient maner. We can use this to unroll two loops in arp_packet_match(), to perform arithmetic on long words instead of bytes. This is a win on x86_64 for example. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 44 +++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7ea88b61cb0d..b5db46342614 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -73,6 +73,36 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, return (ret != 0); } +/* + * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); +#else + unsigned long ret = 0; + int i; + + for (i = 0; i < IFNAMSIZ; i++) + ret |= (_a[i] ^ _b[i]) & _mask[i]; +#endif + return ret; +} + /* Returns whether packet matches rule or not. */ static inline int arp_packet_match(const struct arphdr *arphdr, struct net_device *dev, @@ -83,7 +113,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *arpptr = (char *)(arphdr + 1); const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; - int i, ret; + long ret; #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) @@ -156,10 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, } /* Look for ifname matches. */ - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (indev[i] ^ arpinfo->iniface[i]) - & arpinfo->iniface_mask[i]; - } + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -168,10 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, return 0; } - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (outdev[i] ^ arpinfo->outiface[i]) - & arpinfo->outiface_mask[i]; - } + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -221,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; bool hotdrop = false; -- cgit v1.2.3 From 563d36eb3fb22dd04da9aa6f12e1b9ba0ac115f3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 18:38:40 +0100 Subject: netfilter: Combine ipt_TTL and ip6t_HL source Suggested by: James King Similarly to commit c9fd49680954714473d6cbd2546d6ff120f96840, merge TTL and HL. Since HL does not depend on any IPv6-specific function, no new module dependencies would arise. With slight adjustments to the Kconfig help text. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 15 ---- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_TTL.c | 97 ------------------------ net/ipv6/netfilter/Kconfig | 17 ----- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_HL.c | 95 ------------------------ net/netfilter/Kconfig | 15 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_HL.c | 171 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 187 insertions(+), 226 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_TTL.c delete mode 100644 net/ipv6/netfilter/ip6t_HL.c create mode 100644 net/netfilter/xt_HL.c diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3816e1dc9295..3ad9f43b4c45 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -322,21 +322,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_TTL - tristate 'TTL target support' - depends on IP_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option adds a `TTL' target, which enables the user to modify - the TTL value of the IP header. - - While it is safe to decrement/lower the TTL, this target also enables - functionality to increment and set the TTL value of the IP header to - arbitrary values. This is EXTREMELY DANGEROUS since you can easily - create immortal packets that loop forever on the network. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5f9b650d90fc..20b0c37155fb 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -61,7 +61,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c deleted file mode 100644 index 6d76aae90cc0..000000000000 --- a/net/ipv4/netfilter/ipt_TTL.c +++ /dev/null @@ -1,97 +0,0 @@ -/* TTL modification target for IP tables - * (C) 2000,2005 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct iphdr *iph; - const struct ipt_TTL_info *info = par->targinfo; - int new_ttl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - iph = ip_hdr(skb); - - switch (info->mode) { - case IPT_TTL_SET: - new_ttl = info->ttl; - break; - case IPT_TTL_INC: - new_ttl = iph->ttl + info->ttl; - if (new_ttl > 255) - new_ttl = 255; - break; - case IPT_TTL_DEC: - new_ttl = iph->ttl - info->ttl; - if (new_ttl < 0) - new_ttl = 0; - break; - default: - new_ttl = iph->ttl; - break; - } - - if (new_ttl != iph->ttl) { - csum_replace2(&iph->check, htons(iph->ttl << 8), - htons(new_ttl << 8)); - iph->ttl = new_ttl; - } - - return XT_CONTINUE; -} - -static bool ttl_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_TTL_info *info = par->targinfo; - - if (info->mode > IPT_TTL_MAXMODE) { - printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IPT_TTL_SET && info->ttl == 0) - return false; - return true; -} - -static struct xt_target ttl_tg_reg __read_mostly = { - .name = "TTL", - .family = NFPROTO_IPV4, - .target = ttl_tg, - .targetsize = sizeof(struct ipt_TTL_info), - .table = "mangle", - .checkentry = ttl_tg_check, - .me = THIS_MODULE, -}; - -static int __init ttl_tg_init(void) -{ - return xt_register_target(&ttl_tg_reg); -} - -static void __exit ttl_tg_exit(void) -{ - xt_unregister_target(&ttl_tg_reg); -} - -module_init(ttl_tg_init); -module_exit(ttl_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 53ea512c4608..6a42a968c498 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -170,23 +170,6 @@ config IP6_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_HL - tristate 'HL (hoplimit) target support' - depends on IP6_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option adds a `HL' target, which enables the user to decrement - the hoplimit value of the IPv6 header or set it to a given (lower) - value. - - While it is safe to decrement the hoplimit value, this option also - enables functionality to increment and set the hoplimit value of the - IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since - you can easily create immortal packets that loop forever on the - network. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on NETFILTER_ADVANCED diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 3f17c948eefb..61a4570d0ede 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,5 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets -obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c deleted file mode 100644 index 27b5adf670a2..000000000000 --- a/net/ipv6/netfilter/ip6t_HL.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Hop Limit modification target for ip6tables - * Maciej Soltysiak - * Based on HW's TTL module - * - * This software is distributed under the terms of GNU GPL - */ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct ipv6hdr *ip6h; - const struct ip6t_HL_info *info = par->targinfo; - int new_hl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - ip6h = ipv6_hdr(skb); - - switch (info->mode) { - case IP6T_HL_SET: - new_hl = info->hop_limit; - break; - case IP6T_HL_INC: - new_hl = ip6h->hop_limit + info->hop_limit; - if (new_hl > 255) - new_hl = 255; - break; - case IP6T_HL_DEC: - new_hl = ip6h->hop_limit - info->hop_limit; - if (new_hl < 0) - new_hl = 0; - break; - default: - new_hl = ip6h->hop_limit; - break; - } - - ip6h->hop_limit = new_hl; - - return XT_CONTINUE; -} - -static bool hl_tg6_check(const struct xt_tgchk_param *par) -{ - const struct ip6t_HL_info *info = par->targinfo; - - if (info->mode > IP6T_HL_MAXMODE) { - printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { - printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " - "make sense with value 0\n"); - return false; - } - return true; -} - -static struct xt_target hl_tg6_reg __read_mostly = { - .name = "HL", - .family = NFPROTO_IPV6, - .target = hl_tg6, - .targetsize = sizeof(struct ip6t_HL_info), - .table = "mangle", - .checkentry = hl_tg6_check, - .me = THIS_MODULE -}; - -static int __init hl_tg6_init(void) -{ - return xt_register_target(&hl_tg6_reg); -} - -static void __exit hl_tg6_exit(void) -{ - xt_unregister_target(&hl_tg6_reg); -} - -module_init(hl_tg6_init); -module_exit(hl_tg6_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c2bac9cd0caf..d99f29b7b980 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -357,6 +357,21 @@ config NETFILTER_XT_TARGET_DSCP To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_HL + tristate '"HL" hoplimit target support' + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds the "HL" (for IPv6) and "TTL" (for IPv4) + targets, which enable the user to change the + hoplimit/time-to-live value of the IP header. + + While it is safe to decrement the hoplimit/TTL value, the + modules also allow to increment and set the hoplimit value of + the header to arbitrary values. This is EXTREMELY DANGEROUS + since you can easily create immortal packets that loop + forever on the network. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index da3d909e053f..6ebe0482265b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o +obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c new file mode 100644 index 000000000000..10e789e2d12a --- /dev/null +++ b/net/netfilter/xt_HL.c @@ -0,0 +1,171 @@ +/* + * TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte + * + * Hop Limit modification target for ip6tables + * Maciej Soltysiak + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); +MODULE_LICENSE("GPL"); + +static unsigned int +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + struct iphdr *iph; + const struct ipt_TTL_info *info = par->targinfo; + int new_ttl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + iph = ip_hdr(skb); + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + csum_replace2(&iph->check, htons(iph->ttl << 8), + htons(new_ttl << 8)); + iph->ttl = new_ttl; + } + + return XT_CONTINUE; +} + +static unsigned int +hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) +{ + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = par->targinfo; + int new_hl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + ip6h = ipv6_hdr(skb); + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + ip6h->hop_limit = new_hl; + + return XT_CONTINUE; +} + +static bool ttl_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_TTL_info *info = par->targinfo; + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", + info->mode); + return false; + } + if (info->mode != IPT_TTL_SET && info->ttl == 0) + return false; + return true; +} + +static bool hl_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_HL_info *info = par->targinfo; + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", + info->mode); + return false; + } + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { + printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " + "make sense with value 0\n"); + return false; + } + return true; +} + +static struct xt_target hl_tg_reg[] __read_mostly = { + { + .name = "TTL", + .revision = 0, + .family = NFPROTO_IPV4, + .target = ttl_tg, + .targetsize = sizeof(struct ipt_TTL_info), + .table = "mangle", + .checkentry = ttl_tg_check, + .me = THIS_MODULE, + }, + { + .name = "HL", + .revision = 0, + .family = NFPROTO_IPV6, + .target = hl_tg6, + .targetsize = sizeof(struct ip6t_HL_info), + .table = "mangle", + .checkentry = hl_tg6_check, + .me = THIS_MODULE, + }, +}; + +static int __init hl_tg_init(void) +{ + return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); +} + +static void __exit hl_tg_exit(void) +{ + xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); +} + +module_init(hl_tg_init); +module_exit(hl_tg_exit); +MODULE_ALIAS("ipt_TTL"); +MODULE_ALIAS("ip6t_HL"); -- cgit v1.2.3 From cfac5ef7b92a2d504563989ecd0beb563920444b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 18:39:31 +0100 Subject: netfilter: Combine ipt_ttl and ip6t_hl source Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 9 ---- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_ttl.c | 63 ------------------------- net/ipv6/netfilter/Kconfig | 9 ---- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_hl.c | 68 --------------------------- net/netfilter/Kconfig | 8 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_hl.c | 108 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 117 insertions(+), 151 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_ttl.c delete mode 100644 net/ipv6/netfilter/ip6t_hl.c create mode 100644 net/netfilter/xt_hl.c diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3ad9f43b4c45..40ad41f19b72 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -92,15 +92,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_TTL - tristate '"ttl" match support' - depends on NETFILTER_ADVANCED - help - This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user - to match packets by their TTL value. - - To compile it as a module, choose M here. If unsure, say N. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 20b0c37155fb..48111594ee9b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c deleted file mode 100644 index 297f1cbf4ff5..000000000000 --- a/net/ipv4/netfilter/ipt_ttl.c +++ /dev/null @@ -1,63 +0,0 @@ -/* IP tables module for matching the value of the TTL - * - * (C) 2000,2001 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); -MODULE_LICENSE("GPL"); - -static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_ttl_info *info = par->matchinfo; - const u8 ttl = ip_hdr(skb)->ttl; - - switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; - default: - printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match ttl_mt_reg __read_mostly = { - .name = "ttl", - .family = NFPROTO_IPV4, - .match = ttl_mt, - .matchsize = sizeof(struct ipt_ttl_info), - .me = THIS_MODULE, -}; - -static int __init ttl_mt_init(void) -{ - return xt_register_match(&ttl_mt_reg); -} - -static void __exit ttl_mt_exit(void) -{ - xt_unregister_match(&ttl_mt_reg); -} - -module_init(ttl_mt_init); -module_exit(ttl_mt_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6a42a968c498..4a8d7ecd6d09 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -94,15 +94,6 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_HL - tristate '"hl" match support' - depends on NETFILTER_ADVANCED - help - HL matching allows you to match packets based on the hop - limit of the packet. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 61a4570d0ede..aafbba30c899 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o -obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c deleted file mode 100644 index c964dca1132d..000000000000 --- a/net/ipv6/netfilter/ip6t_hl.c +++ /dev/null @@ -1,68 +0,0 @@ -/* Hop Limit matching module */ - -/* (C) 2001-2002 Maciej Soltysiak - * Based on HW's ttl module - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); -MODULE_LICENSE("GPL"); - -static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ip6t_hl_info *info = par->matchinfo; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - - switch (info->mode) { - case IP6T_HL_EQ: - return ip6h->hop_limit == info->hop_limit; - break; - case IP6T_HL_NE: - return ip6h->hop_limit != info->hop_limit; - break; - case IP6T_HL_LT: - return ip6h->hop_limit < info->hop_limit; - break; - case IP6T_HL_GT: - return ip6h->hop_limit > info->hop_limit; - break; - default: - printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match hl_mt6_reg __read_mostly = { - .name = "hl", - .family = NFPROTO_IPV6, - .match = hl_mt6, - .matchsize = sizeof(struct ip6t_hl_info), - .me = THIS_MODULE, -}; - -static int __init hl_mt6_init(void) -{ - return xt_register_match(&hl_mt6_reg); -} - -static void __exit hl_mt6_exit(void) -{ - xt_unregister_match(&hl_mt6_reg); -} - -module_init(hl_mt6_init); -module_exit(hl_mt6_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d99f29b7b980..0eb98b4fbf44 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -620,6 +620,14 @@ config NETFILTER_XT_MATCH_HELPER To compile it as a module, choose M here. If unsure, say Y. +config NETFILTER_XT_MATCH_HL + tristate '"hl" hoplimit/TTL match support' + depends on NETFILTER_ADVANCED + ---help--- + HL matching allows you to match packets based on the hoplimit + in the IPv6 header, or the time-to-live field in the IPv4 + header of the packet. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6ebe0482265b..da73ed25701c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o +obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c new file mode 100644 index 000000000000..7726154c87b2 --- /dev/null +++ b/net/netfilter/xt_hl.c @@ -0,0 +1,108 @@ +/* + * IP tables module for matching the value of the TTL + * (C) 2000,2001 by Harald Welte + * + * Hop Limit matching module + * (C) 2001-2002 Maciej Soltysiak + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ttl"); +MODULE_ALIAS("ip6t_hl"); + +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ipt_ttl_info *info = par->matchinfo; + const u8 ttl = ip_hdr(skb)->ttl; + + switch (info->mode) { + case IPT_TTL_EQ: + return ttl == info->ttl; + case IPT_TTL_NE: + return ttl != info->ttl; + case IPT_TTL_LT: + return ttl < info->ttl; + case IPT_TTL_GT: + return ttl > info->ttl; + default: + printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", + info->mode); + return false; + } + + return false; +} + +static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ip6t_hl_info *info = par->matchinfo; + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + switch (info->mode) { + case IP6T_HL_EQ: + return ip6h->hop_limit == info->hop_limit; + break; + case IP6T_HL_NE: + return ip6h->hop_limit != info->hop_limit; + break; + case IP6T_HL_LT: + return ip6h->hop_limit < info->hop_limit; + break; + case IP6T_HL_GT: + return ip6h->hop_limit > info->hop_limit; + break; + default: + printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", + info->mode); + return false; + } + + return false; +} + +static struct xt_match hl_mt_reg[] __read_mostly = { + { + .name = "ttl", + .revision = 0, + .family = NFPROTO_IPV4, + .match = ttl_mt, + .matchsize = sizeof(struct ipt_ttl_info), + .me = THIS_MODULE, + }, + { + .name = "hl", + .revision = 0, + .family = NFPROTO_IPV6, + .match = hl_mt6, + .matchsize = sizeof(struct ip6t_hl_info), + .me = THIS_MODULE, + }, +}; + +static int __init hl_mt_init(void) +{ + return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); +} + +static void __exit hl_mt_exit(void) +{ + xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); +} + +module_init(hl_mt_init); +module_exit(hl_mt_exit); -- cgit v1.2.3 From 4f1c3b7e7ee4d841c8af3a074dc361d6a7a77803 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2009 19:11:39 +0100 Subject: netfilter: xt_physdev fixes 1) physdev_mt() incorrectly assumes nulldevname[] is aligned on an int 2) It also uses word comparisons, while it could use long word ones. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/xt_physdev.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 1bcdfc12cf59..4b13ef7ce145 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -24,9 +24,9 @@ static bool physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { int i; - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; - bool ret; + unsigned long ret; const char *indev, *outdev; const struct nf_bridge_info *nf_bridge; @@ -68,10 +68,10 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)indev)[i] - ^ ((const unsigned int *)info->physindev)[i]) - & ((const unsigned int *)info->in_mask)[i]; + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)indev)[i] + ^ ((const unsigned long *)info->physindev)[i]) + & ((const unsigned long *)info->in_mask)[i]; } if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) @@ -82,13 +82,12 @@ match_outdev: return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)outdev)[i] - ^ ((const unsigned int *)info->physoutdev)[i]) - & ((const unsigned int *)info->out_mask)[i]; + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)outdev)[i] + ^ ((const unsigned long *)info->physoutdev)[i]) + & ((const unsigned long *)info->out_mask)[i]; } - - return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); + return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } static bool physdev_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3 From 4323362e49bd10b8ff3fe5cf183fdd52662ff4a3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 19 Feb 2009 11:16:03 +0100 Subject: netfilter: xtables: add backward-compat options Concern has been expressed about the changing Kconfig options. Provide the old options that forward-select. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 18 ++++++++++++++++++ net/ipv6/netfilter/Kconfig | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40ad41f19b72..f8d6180938d5 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -92,6 +92,15 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_TTL + tristate '"ttl" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_MATCH_HL. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" @@ -313,6 +322,15 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_TTL + tristate '"TTL" target support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_TARGET_HL. + # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4a8d7ecd6d09..625353a5fe18 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -94,6 +94,15 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_HL + tristate '"hl" hoplimit match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_MATCH_HL. + config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' default m if NETFILTER_ADVANCED=n @@ -121,6 +130,15 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. # The targets +config IP6_NF_TARGET_HL + tristate '"HL" hoplimit target support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_TARGET_HL. + config IP6_NF_TARGET_LOG tristate "LOG target support" default m if NETFILTER_ADVANCED=n -- cgit v1.2.3 From eacc17fb64f03b6c268aaf6cea320100d19d8af5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Feb 2009 11:17:17 +0100 Subject: netfilter: xt_physdev: unfold two loops in physdev_mt() xt_physdev netfilter module can use an ifname_compare() helper so that two loops are unfolded. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/xt_physdev.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 4b13ef7ce145..44a234ef4439 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -20,10 +20,27 @@ MODULE_DESCRIPTION("Xtables: Bridge physical device match"); MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + static bool physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - int i; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; unsigned long ret; @@ -68,11 +85,7 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)info->physindev)[i]) - & ((const unsigned long *)info->in_mask)[i]; - } + ret = ifname_compare(indev, info->physindev, info->in_mask); if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) return false; @@ -82,11 +95,8 @@ match_outdev: return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)info->physoutdev)[i]) - & ((const unsigned long *)info->out_mask)[i]; - } + ret = ifname_compare(outdev, info->physoutdev, info->out_mask); + return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -- cgit v1.2.3 From 323dbf96382f057d035afce0237f08e18571ac1d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Feb 2009 11:18:23 +0100 Subject: netfilter: ip6_tables: unfold two loops in ip6_packet_match() ip6_tables netfilter module can use an ifname_compare() helper so that two loops are unfolded. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/ip6_tables.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a33485dc81cb..d64594b6c061 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -89,6 +89,25 @@ ip6t_ext_hdr(u8 nexthdr) (nexthdr == IPPROTO_DSTOPTS) ); } +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -99,7 +118,6 @@ ip6_packet_match(const struct sk_buff *skb, unsigned int *protoff, int *fragoff, bool *hotdrop) { - size_t i; unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); @@ -120,12 +138,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ip6info->iniface)[i]) - & ((const unsigned long *)ip6info->iniface_mask)[i]; - } + ret = ifname_compare(indev, ip6info->iniface, ip6info->iniface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -134,11 +147,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ip6info->outiface)[i]) - & ((const unsigned long *)ip6info->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ip6info->outiface, ip6info->outiface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", -- cgit v1.2.3 From 784544739a25c30637397ace5489eeb6e15d7d49 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Feb 2009 10:35:32 +0100 Subject: netfilter: iptables: lock free counters The reader/writer lock in ip_tables is acquired in the critical path of processing packets and is one of the reasons just loading iptables can cause a 20% performance loss. The rwlock serves two functions: 1) it prevents changes to table state (xt_replace) while table is in use. This is now handled by doing rcu on the xt_table. When table is replaced, the new table(s) are put in and the old one table(s) are freed after RCU period. 2) it provides synchronization when accesing the counter values. This is now handled by swapping in new table_info entries for each cpu then summing the old values, and putting the result back onto one cpu. On a busy system it may cause sampling to occur at different times on each cpu, but no packet/byte counts are lost in the process. Signed-off-by: Stephen Hemminger Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here) BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago) Acked-by: Eric Dumazet Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 +- net/ipv4/netfilter/arp_tables.c | 115 ++++++++++++++++++++++++++--------- net/ipv4/netfilter/ip_tables.c | 120 +++++++++++++++++++++++++++---------- net/ipv6/netfilter/ip6_tables.c | 119 +++++++++++++++++++++++++----------- net/netfilter/x_tables.c | 26 ++++++-- 5 files changed, 284 insertions(+), 102 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9fac88fc0e72..e8e08d036752 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -353,7 +353,7 @@ struct xt_table unsigned int valid_hooks; /* Lock for the curtain */ - rwlock_t lock; + struct mutex lock; /* Man behind the curtain... */ struct xt_table_info *private; @@ -385,7 +385,7 @@ struct xt_table_info /* ipt_entry tables: one per CPU */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - char *entries[1]; + void *entries[1]; }; #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ @@ -432,6 +432,8 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); +extern void xt_table_entry_swap_rcu(struct xt_table_info *old, + struct xt_table_info *new); #ifdef CONFIG_COMPAT #include diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b5db46342614..64a7c6ce0b98 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -261,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - read_lock_bh(&table->lock); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -335,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - read_unlock_bh(&table->lock); + + rcu_read_unlock(); if (hotdrop) return NF_DROP; @@ -738,11 +740,65 @@ static void get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct xt_table *table) + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + ARPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct arpt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + +static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -752,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1099,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. - */ -static inline int add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { @@ -1172,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[smp_processor_id()]; @@ -1187,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); + xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca068b2..08cde5bd70a5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV4; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb, } } while (!hotdrop); - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -924,13 +926,68 @@ get_counters(const struct xt_table_info *t, counters, &i); } + +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + + +static inline int +zero_entry_counter(struct ipt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } } static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -939,14 +996,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1312,27 +1385,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) @@ -1393,13 +1445,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; @@ -1408,8 +1461,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat add_counter_to_