From c51d39010a1bccc9c1294e2d7c00005aefeb2b5c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:25 +0100 Subject: netfilter: conntrack: built-in support for DCCP CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection tracking support for DCCP protocol is built-in into nf_conntrack.ko. footprint test: $ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \ net/ipv4/netfilter/nf_conntrack_ipv4.ko \ net/ipv6/netfilter/nf_conntrack_ipv6.ko (builtin)|| dccp | ipv4 | ipv6 | nf_conntrack ---------++--------+--------+--------+-------------- none || 469140 | 828755 | 828676 | 6141434 DCCP || - | 830566 | 829935 | 6533526 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h index 40dcc82058d1..ff721d7325cf 100644 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -25,7 +25,7 @@ enum ct_dccp_roles { #define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) #ifdef __KERNEL__ -#include +#include struct nf_ct_dccp { u_int8_t role[IP_CT_DIR_MAX]; -- cgit v1.2.3 From 0aa8c57a04907a5d02068ff9f917629be97ea78d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 15 Nov 2016 17:48:44 -0500 Subject: netfilter: introduce accessor functions for hook entries This allows easier future refactoring. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 69230140215b..575aa198097e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -79,6 +79,33 @@ struct nf_hook_entry { const struct nf_hook_ops *orig_ops; }; +static inline void +nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) +{ + entry->next = NULL; + entry->ops = *ops; + entry->orig_ops = ops; +} + +static inline int +nf_hook_entry_priority(const struct nf_hook_entry *entry) +{ + return entry->ops.priority; +} + +static inline int +nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, + struct nf_hook_state *state) +{ + return entry->ops.hook(entry->ops.priv, skb, state); +} + +static inline const struct nf_hook_ops * +nf_hook_entry_ops(const struct nf_hook_entry *entry) +{ + return entry->orig_ops; +} + static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, -- cgit v1.2.3 From d415b9eb76fc55c03ef5451691170aa5771dcea3 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 15 Nov 2016 17:48:45 -0500 Subject: netfilter: decouple nf_hook_entry and nf_hook_ops During nfhook traversal we only need a very small subset of nf_hook_ops members. We need: - next element - hook function to call - hook function priv argument Bridge netfilter also needs 'thresh'; can be obtained via ->orig_ops. nf_hook_entry struct is now 32 bytes on x86_64. A followup patch will turn the run-time list into an array that only stores hook functions plus their priv arguments, eliminating the ->next element. Suggested-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 575aa198097e..a4b97be30b28 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -75,7 +75,8 @@ struct nf_hook_ops { struct nf_hook_entry { struct nf_hook_entry __rcu *next; - struct nf_hook_ops ops; + nf_hookfn *hook; + void *priv; const struct nf_hook_ops *orig_ops; }; @@ -83,21 +84,22 @@ static inline void nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) { entry->next = NULL; - entry->ops = *ops; + entry->hook = ops->hook; + entry->priv = ops->priv; entry->orig_ops = ops; } static inline int nf_hook_entry_priority(const struct nf_hook_entry *entry) { - return entry->ops.priority; + return entry->orig_ops->priority; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { - return entry->ops.hook(entry->ops.priv, skb, state); + return entry->hook(entry->priv, skb, state); } static inline const struct nf_hook_ops * -- cgit v1.2.3 From 4d31eef5176df06f218201bc9c0ce40babb41660 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:17 +0100 Subject: netfilter: x_tables: pass xt_counters struct instead of packet counter On SMP we overload the packet counter (unsigned long) to contain percpu offset. Hide this from callers and pass xt_counters address instead. Preparation patch to allocate the percpu counters in page-sized batch chunks. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index cd4eaf8df445..6e61edeb68e3 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void) return 0; } -static inline void xt_percpu_counter_free(u64 pcnt) -{ - if (nr_cpu_ids > 1) - free_percpu((void __percpu *) (unsigned long) pcnt); -} +void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) -- cgit v1.2.3 From f28e15bacedd444608e25421c72eb2cf4527c9ca Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:18 +0100 Subject: netfilter: x_tables: pass xt_counters struct to counter allocator Keeps some noise away from a followup patch. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6e61edeb68e3..05a94bd32c55 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -404,32 +404,7 @@ static inline unsigned long ifname_compare_aligned(const char *_a, } -/* On SMP, ip(6)t_entry->counters.pcnt holds address of the - * real (percpu) counter. On !SMP, its just the packet count, - * so nothing needs to be done there. - * - * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. We force an alignment of 16 bytes - * so that bytes/packets share a common cache line. - * - * Hence caller must use IS_ERR_VALUE to check for error, this - * allows us to return 0 for single core systems without forcing - * callers to deal with SMP vs. NONSMP issues. - */ -static inline unsigned long xt_percpu_counter_alloc(void) -{ - if (nr_cpu_ids > 1) { - void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), - sizeof(struct xt_counters)); - - if (res == NULL) - return -ENOMEM; - - return (__force unsigned long) res; - } - - return 0; -} +bool xt_percpu_counter_alloc(struct xt_counters *counters); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * -- cgit v1.2.3 From ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:19 +0100 Subject: netfilter: x_tables: pack percpu counter allocations instead of allocating each xt_counter individually, allocate 4k chunks and then use these for counter allocation requests. This should speed up rule evaluation by increasing data locality, also speeds up ruleset loading because we reduce calls to the percpu allocator. As Eric points out we can't use PAGE_SIZE, page_allocator would fail on arches with 64k page size. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 05a94bd32c55..5117e4d2ddfa 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } +struct xt_percpu_counter_alloc_state { + unsigned int off; + const char __percpu *mem; +}; -bool xt_percpu_counter_alloc(struct xt_counters *counters); +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * -- cgit v1.2.3 From df122f58b834b24c27d7e2ac02a4910d3e56f6ae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Nov 2016 11:40:05 +0100 Subject: netfilter: ingress: translate 0 nf_hook_slow retval to -1 The caller assumes that < 0 means that skb was stolen (or free'd). All other return values continue skb processing. nf_hook_slow returns 3 different return value types: A) a (negative) errno value: the skb was dropped (NF_DROP, e.g. by iptables '-j DROP' rule). B) 0. The skb was stolen by the hook or queued to userspace. C) 1. all hooks returned NF_ACCEPT so the caller should invoke the okfn so packet processing can continue. nft ingress facility currently doesn't have the 'okfn' that the NF_HOOK() macros use; there is no nfqueue support either. So 1 means that nf_hook_ingress() caller should go on processing the skb. In order to allow use of NF_STOLEN from ingress we need to translate this to an errno number, else we'd crash because we continue with already-free'd (or about to be free-d) skb. The errno value isn't checked, its just important that its less than 0, so return -1. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 2dc3b49b804a..59476061de86 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; + int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. @@ -29,7 +30,11 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e); + if (ret == 0) + return -1; + + return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) -- cgit v1.2.3