From 958de668197651bbf2b4b9528f204ab5a0f1af65 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Oct 2019 21:07:31 +0200 Subject: module: Remove set_all_modules_text_*() Now that there are no users of set_all_modules_text_*() left, remove it. While it appears nds32 uses it, it does not have STRICT_MODULE_RWX and therefore ends up with the NOP stubs. Tested-by: Alexei Starovoitov Tested-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Greentime Hu Cc: H. Peter Anvin Cc: Jessica Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vincent Chen Link: https://lkml.kernel.org/r/20191111132458.284298307@infradead.org Signed-off-by: Ingo Molnar --- include/linux/module.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 6d20895e7739..daae84705040 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -846,13 +846,9 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) #ifdef CONFIG_STRICT_MODULE_RWX -extern void set_all_modules_text_rw(void); -extern void set_all_modules_text_ro(void); extern void module_enable_ro(const struct module *mod, bool after_init); extern void module_disable_ro(const struct module *mod); #else -static inline void set_all_modules_text_rw(void) { } -static inline void set_all_modules_text_ro(void) { } static inline void module_enable_ro(const struct module *mod, bool after_init) { } static inline void module_disable_ro(const struct module *mod) { } #endif -- cgit v1.2.3 From 04ae87a52074e2d448fc66143f1bd2c7d694d2b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Oct 2019 22:26:59 +0200 Subject: ftrace: Rework event_create_dir() Rework event_create_dir() to use an array of static data instead of function pointers where possible. The problem is that it would call the function pointer on module load before parse_args(), possibly even before jump_labels were initialized. Luckily the generated functions don't use jump_labels but it still seems fragile. It also gets in the way of changing when we make the module map executable. The generated function are basically calling trace_define_field() with a bunch of static arguments. So instead of a function, capture these arguments in a static array, avoiding the function call. Now there are a number of cases where the fields are dynamic (syscall arguments, kprobes and uprobes), in which case a static array does not work, for these we preserve the function call. Luckily all these cases are not related to modules and so we can retain the function call for them. Also fix up all broken tracepoint definitions that now generate a compile error. Tested-by: Alexei Starovoitov Tested-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191111132458.342979914@infradead.org Signed-off-by: Ingo Molnar --- include/linux/trace_events.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 30a8cdcfd4a4..a379255c14a9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -187,6 +187,22 @@ enum trace_reg { struct trace_event_call; +#define TRACE_FUNCTION_TYPE ((const char *)~0UL) + +struct trace_event_fields { + const char *type; + union { + struct { + const char *name; + const int size; + const int align; + const int is_signed; + const int filter_type; + }; + int (*define_fields)(struct trace_event_call *); + }; +}; + struct trace_event_class { const char *system; void *probe; @@ -195,7 +211,7 @@ struct trace_event_class { #endif int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); - int (*define_fields)(struct trace_event_call *); + struct trace_event_fields *fields_array; struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; int (*raw_init)(struct trace_event_call *); -- cgit v1.2.3 From 2496396fcb44404ead24b578c583d5286886e857 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Oct 2019 21:18:10 +0200 Subject: sched/rt, fs: Use CONFIG_PREEMPTION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same functionality which today depends on CONFIG_PREEMPT. Switch the i_size() and part_nr_sects_…() code over to use CONFIG_PREEMPTION. Update the comment for fsstack_copy_inode_size() also to refer to CONFIG_PREEMPTION. [bigeasy: +PREEMPT comments] Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Alexander Viro Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20191015191821.11479-24-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/fs.h | 4 ++-- include/linux/genhd.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..dddfcbb140a7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -855,7 +855,7 @@ static inline loff_t i_size_read(const struct inode *inode) i_size = inode->i_size; } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); return i_size; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) loff_t i_size; preempt_disable(); @@ -880,7 +880,7 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); preempt_enable(); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); inode->i_size = i_size; preempt_enable(); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 8bb63027e4d6..a927829bb73a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -718,7 +718,7 @@ static inline void hd_free_part(struct hd_struct *part) * accessor function. * * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption + * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption * on. */ static inline sector_t part_nr_sects_read(struct hd_struct *part) @@ -731,7 +731,7 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) nr_sects = part->nr_sects; } while (read_seqcount_retry(&part->nr_sects_seq, seq)); return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) sector_t nr_sects; preempt_disable(); @@ -754,7 +754,7 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); part->nr_sects = size; preempt_enable(); -- cgit v1.2.3 From b50b0580d27bc45a0637aefc8bac4d31aa85771a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Nov 2019 14:48:57 +0100 Subject: net: add queue argument to __skb_wait_for_more_packets and __skb_{,try_}recv_datagram This will be used by ESP over TCP to handle the queue of IKE messages. Signed-off-by: Sabrina Dubroca Acked-by: David S. Miller Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9133bcf0544..49a10f9cc538 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3459,7 +3459,8 @@ static inline void skb_frag_list_init(struct sk_buff *skb) for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) -int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, +int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, + int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, @@ -3468,12 +3469,16 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff *skb), int *off, int *err, struct sk_buff **last); -struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err, struct sk_buff **last); -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, +struct sk_buff *__skb_recv_datagram(struct sock *sk, + struct sk_buff_head *sk_queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err); -- cgit v1.2.3 From df1e849ae4559544ff00ff5052eefe2479750539 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Nov 2019 16:36:45 -0800 Subject: rcu: Enable tick for nohz_full CPUs slow to provide expedited QS An expedited grace period can be stalled by a nohz_full CPU looping in kernel context. This possibility is currently handled by some carefully crafted checks in rcu_read_unlock_special() that enlist help from ksoftirqd when permitted by the scheduler. However, it is exactly these checks that require the scheduler avoid holding any of its rq or pi locks across rcu_read_unlock() without also having held them across the entire RCU read-side critical section. It would therefore be very nice if expedited grace periods could handle nohz_full CPUs looping in kernel context without such checks. This commit therefore adds code to the expedited grace period's wait and cleanup code that forces the scheduler-clock interrupt on for CPUs that fail to quickly supply a quiescent state. "Quickly" is currently a hard-coded single-jiffy delay. Signed-off-by: Paul E. McKenney --- include/linux/tick.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7896f792d3b0..7340613c7eff 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -109,8 +109,10 @@ enum tick_dep_bits { TICK_DEP_BIT_PERF_EVENTS = 1, TICK_DEP_BIT_SCHED = 2, TICK_DEP_BIT_CLOCK_UNSTABLE = 3, - TICK_DEP_BIT_RCU = 4 + TICK_DEP_BIT_RCU = 4, + TICK_DEP_BIT_RCU_EXP = 5 }; +#define TICK_DEP_BIT_MAX TICK_DEP_BIT_RCU_EXP #define TICK_DEP_MASK_NONE 0 #define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) @@ -118,6 +120,7 @@ enum tick_dep_bits { #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) #define TICK_DEP_MASK_RCU (1 << TICK_DEP_BIT_RCU) +#define TICK_DEP_MASK_RCU_EXP (1 << TICK_DEP_BIT_RCU_EXP) #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; -- cgit v1.2.3 From f452ee096d95482892b101bde4fd037fa025d3cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 4 Oct 2019 23:54:02 +0200 Subject: rculist: Describe variadic macro argument in a Sphinx-compatible way MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this patch, Sphinx shows "variable arguments" as the description of the cond argument, rather than the intended description, and prints the following warnings: ./include/linux/rculist.h:374: warning: Excess function parameter 'cond' description in 'list_for_each_entry_rcu' ./include/linux/rculist.h:651: warning: Excess function parameter 'cond' description in 'hlist_for_each_entry_rcu' Signed-off-by: Jonathan Neuschäfer Acked-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4158b7212936..61c6728a71f7 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -361,7 +361,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. - * @cond: optional lockdep expression if called from non-RCU protection. + * @cond...: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() @@ -636,7 +636,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. - * @cond: optional lockdep expression if called from non-RCU protection. + * @cond...: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() -- cgit v1.2.3 From c54a2744497db4b6887b9c905ef7aa0b3620c956 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 11:37:37 -0800 Subject: list: Add hlist_unhashed_lockless() We would like to use hlist_unhashed() from timer_pending(), which runs without protection of a lock. Note that other callers might also want to use this variant. Instead of forcing a READ_ONCE() for all hlist_unhashed() callers, add a new helper with an explicit _lockless suffix in the name to better document what is going on. Also add various WRITE_ONCE() in __hlist_del(), hlist_add_head() and hlist_add_before()/hlist_add_behind() to pair with the READ_ONCE(). Signed-off-by: Eric Dumazet Cc: Thomas Gleixner [ paulmck: Also add WRITE_ONCE() to rculist.h. ] Signed-off-by: Paul E. McKenney --- include/linux/list.h | 32 +++++++++++++++++++++----------- include/linux/rculist.h | 24 ++++++++++++------------ 2 files changed, 33 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 85c92555e31f..61f5aaf96192 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -749,6 +749,16 @@ static inline int hlist_unhashed(const struct hlist_node *h) return !h->pprev; } +/* This variant of hlist_unhashed() must be used in lockless contexts + * to avoid potential load-tearing. + * The READ_ONCE() is paired with the various WRITE_ONCE() in hlist + * helpers that are defined below. + */ +static inline int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !READ_ONCE(h->pprev); +} + static inline int hlist_empty(const struct hlist_head *h) { return !READ_ONCE(h->first); @@ -761,7 +771,7 @@ static inline void __hlist_del(struct hlist_node *n) WRITE_ONCE(*pprev, next); if (next) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_del(struct hlist_node *n) @@ -782,32 +792,32 @@ static inline void hlist_del_init(struct hlist_node *n) static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; - n->next = first; + WRITE_ONCE(n->next, first); if (first) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); WRITE_ONCE(h->first, n); - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); } /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) { - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; + WRITE_ONCE(n->pprev, next->pprev); + WRITE_ONCE(n->next, next); + WRITE_ONCE(next->pprev, &n->next); WRITE_ONCE(*(n->pprev), n); } static inline void hlist_add_behind(struct hlist_node *n, struct hlist_node *prev) { - n->next = prev->next; - prev->next = n; - n->pprev = &prev->next; + WRITE_ONCE(n->next, prev->next); + WRITE_ONCE(prev->next, n); + WRITE_ONCE(n->pprev, &prev->next); if (n->next) - n->next->pprev = &n->next; + WRITE_ONCE(n->next->pprev, &n->next); } /* after that we'll appear to be on some hlist and hlist_del will work */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 61c6728a71f7..4b7ae1bf50b3 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -173,7 +173,7 @@ static inline void hlist_del_init_rcu(struct hlist_node *n) { if (!hlist_unhashed(n)) { __hlist_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -473,7 +473,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, static inline void hlist_del_rcu(struct hlist_node *n) { __hlist_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -489,11 +489,11 @@ static inline void hlist_replace_rcu(struct hlist_node *old, struct hlist_node *next = old->next; new->next = next; - new->pprev = old->pprev; + WRITE_ONCE(new->pprev, old->pprev); rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) - new->next->pprev = &new->next; - old->pprev = LIST_POISON2; + WRITE_ONCE(new->next->pprev, &new->next); + WRITE_ONCE(old->pprev, LIST_POISON2); } /* @@ -528,10 +528,10 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_first_rcu(h), n); if (first) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** @@ -564,7 +564,7 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, if (last) { n->next = last->next; - n->pprev = &last->next; + WRITE_ONCE(n->pprev, &last->next); rcu_assign_pointer(hlist_next_rcu(last), n); } else { hlist_add_head_rcu(n, h); @@ -592,10 +592,10 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, static inline void hlist_add_before_rcu(struct hlist_node *n, struct hlist_node *next) { - n->pprev = next->pprev; + WRITE_ONCE(n->pprev, next->pprev); n->next = next; rcu_assign_pointer(hlist_pprev_rcu(n), n); - next->pprev = &n->next; + WRITE_ONCE(next->pprev, &n->next); } /** @@ -620,10 +620,10 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, struct hlist_node *prev) { n->next = prev->next; - n->pprev = &prev->next; + WRITE_ONCE(n->pprev, &prev->next); rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) - n->next->pprev = &n->next; + WRITE_ONCE(n->next->pprev, &n->next); } #define __hlist_for_each_rcu(pos, head) \ -- cgit v1.2.3 From b3e627d3d5092a87fc9b9e37e341610cfecfbfdc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Oct 2019 02:55:57 +0000 Subject: rcu: Make PREEMPT_RCU be a modifier to TREE_RCU Currently PREEMPT_RCU and TREE_RCU are mutually exclusive Kconfig options. But PREEMPT_RCU actually specifies a kind of TREE_RCU, namely a preemptible TREE_RCU. This commit therefore makes PREEMPT_RCU be a modifer to the TREE_RCU Kconfig option. This has the benefit of simplifying several of the #if expressions that formerly needed to check both, but now need only check one or the other. Signed-off-by: Lai Jiangshan Signed-off-by: Lai Jiangshan Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0b7506330c87..70a41cd8f58d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -167,7 +167,7 @@ do { \ * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) #include #elif defined(CONFIG_TINY_RCU) #include @@ -601,7 +601,7 @@ do { \ * read-side critical section that would block in a !PREEMPT kernel. * But if you want the full story, read on! * - * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), + * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, -- cgit v1.2.3 From 90326f0521a88004194f88f1b597b54347482b5c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Oct 2019 21:18:14 +0200 Subject: rcu: Use CONFIG_PREEMPTION where appropriate The config option `CONFIG_PREEMPT' is used for the preemption model "Low-Latency Desktop". The config option `CONFIG_PREEMPTION' is enabled when kernel preemption is enabled which is true for the preemption model `CONFIG_PREEMPT' and `CONFIG_PREEMPT_RT'. Use `CONFIG_PREEMPTION' if it applies to both preemption models and not just to `CONFIG_PREEMPT'. Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Cc: Joel Fernandes Cc: Davidlohr Bueso Cc: rcu@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 70a41cd8f58d..eb32fff81c30 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -154,7 +154,7 @@ static inline void exit_tasks_rcu_finish(void) { } * * This macro resembles cond_resched(), except that it is defined to * report potential quiescent states to RCU-tasks even if the cond_resched() - * machinery were to be shut off, as some advocate for PREEMPT kernels. + * machinery were to be shut off, as some advocate for PREEMPTION kernels. */ #define cond_resched_tasks_rcu_qs() \ do { \ @@ -598,7 +598,7 @@ do { \ * * You can avoid reading and understanding the next paragraph by * following this rule: don't put anything in an rcu_read_lock() RCU - * read-side critical section that would block in a !PREEMPT kernel. + * read-side critical section that would block in a !PREEMPTION kernel. * But if you want the full story, read on! * * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), -- cgit v1.2.3 From 1f059dfdf5d170dccbac92193be2fee3c1763384 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Nov 2019 08:19:36 +0100 Subject: mm/vmalloc: Add empty headers and use them from In the x86 MM code we'd like to untangle various types of historic header dependency spaghetti, but for this we'd need to pass to the generic vmalloc code various vmalloc related defines that customarily come via the low level arch header. Signed-off-by: Ingo Molnar --- include/linux/vmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a4b241102771..ec3813236699 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,6 +10,8 @@ #include #include +#include + struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ -- cgit v1.2.3 From 186525bd6b83efc592672e2d6185e4d7c810d2b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Nov 2019 08:17:25 +0100 Subject: mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions - Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Linus Torvalds Cc: Andrew Morton Cc: Rik van Riel Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c97ea3b694e6..fb8f9412e2cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ -static inline bool is_vmalloc_addr(const void *x) -{ -#ifdef CONFIG_MMU - unsigned long addr = (unsigned long)x; - - return addr >= VMALLOC_START && addr < VMALLOC_END; -#else - return false; -#endif -} #ifndef is_ioremap_addr #define is_ioremap_addr(x) is_vmalloc_addr(x) #endif #ifdef CONFIG_MMU +extern bool is_vmalloc_addr(const void *x); extern int is_vmalloc_or_module_addr(const void *x); #else +static inline bool is_vmalloc_addr(const void *x) +{ + return false; +} static inline int is_vmalloc_or_module_addr(const void *x) { return 0; -- cgit v1.2.3 From bbefa1dd6a6d53537c11624752219e39959d04fb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 Nov 2019 15:58:45 +0800 Subject: crypto: pcrypt - Avoid deadlock by using per-instance padata queues If the pcrypt template is used multiple times in an algorithm, then a deadlock occurs because all pcrypt instances share the same padata_instance, which completes requests in the order submitted. That is, the inner pcrypt request waits for the outer pcrypt request while the outer request is already waiting for the inner. This patch fixes this by allocating a set of queues for each pcrypt instance instead of using two global queues. In order to maintain the existing user-space interface, the pinst structure remains global so any sysfs modifications will apply to every pcrypt instance. Note that when an update occurs we have to allocate memory for every pcrypt instance. Should one of the allocations fail we will abort the update without rolling back changes already made. The new per-instance data structure is called padata_shell and is essentially a wrapper around parallel_data. Reproducer: #include #include #include int main() { struct sockaddr_alg addr = { .salg_type = "aead", .salg_name = "pcrypt(pcrypt(rfc4106-gcm-aesni))" }; int algfd, reqfd; char buf[32] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (void *)&addr, sizeof(addr)); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, buf, 20); reqfd = accept(algfd, 0, 0); write(reqfd, buf, 32); read(reqfd, buf, 16); } Reported-by: syzbot+56c7151cad94eec37c521f0e47d2eee53f9361c4@syzkaller.appspotmail.com Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto parallelization wrapper") Signed-off-by: Herbert Xu Tested-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/padata.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 23717eeaad23..cccab7a59787 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -9,6 +9,7 @@ #ifndef PADATA_H #define PADATA_H +#include #include #include #include @@ -98,7 +99,7 @@ struct padata_cpumask { * struct parallel_data - Internal control structure, covers everything * that depends on the cpumask in use. * - * @pinst: padata instance. + * @sh: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. * @reorder_objects: Number of objects waiting in the reorder queues. @@ -111,7 +112,7 @@ struct padata_cpumask { * @lock: Reorder lock. */ struct parallel_data { - struct padata_instance *pinst; + struct padata_shell *ps; struct padata_parallel_queue __percpu *pqueue; struct padata_serial_queue __percpu *squeue; atomic_t reorder_objects; @@ -124,14 +125,33 @@ struct parallel_data { spinlock_t lock ____cacheline_aligned; }; +/** + * struct padata_shell - Wrapper around struct parallel_data, its + * purpose is to allow the underlying control structure to be replaced + * on the fly using RCU. + * + * @pinst: padat instance. + * @pd: Actual parallel_data structure which may be substituted on the fly. + * @opd: Pointer to old pd to be freed by padata_replace. + * @list: List entry in padata_instance list. + */ +struct padata_shell { + struct padata_instance *pinst; + struct parallel_data __rcu *pd; + struct parallel_data *opd; + struct list_head list; +}; + /** * struct padata_instance - The overall control structure. * * @cpu_notifier: cpu hotplug notifier. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. - * @pd: The internal control structure. + * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. + * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. + * @omask: Temporary storage used to compute the notification mask. * @cpumask_change_notifier: Notifiers chain for user-defined notify * callbacks that will be called when either @pcpu or @cbcpu * or both cpumasks change. @@ -143,8 +163,10 @@ struct padata_instance { struct hlist_node node; struct workqueue_struct *parallel_wq; struct workqueue_struct *serial_wq; - struct parallel_data *pd; + struct list_head pslist; struct padata_cpumask cpumask; + struct padata_cpumask rcpumask; + cpumask_var_t omask; struct blocking_notifier_head cpumask_change_notifier; struct kobject kobj; struct mutex lock; @@ -156,7 +178,9 @@ struct padata_instance { extern struct padata_instance *padata_alloc_possible(const char *name); extern void padata_free(struct padata_instance *pinst); -extern int padata_do_parallel(struct padata_instance *pinst, +extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); +extern void padata_free_shell(struct padata_shell *ps); +extern int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu); extern void padata_do_serial(struct padata_priv *padata); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, -- cgit v1.2.3 From c441a909c68618ff64aa70394d0b270b0665a229 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Dec 2019 13:42:29 -0800 Subject: crypto: compress - remove crt_u.compress (struct compress_tfm) crt_u.compress (struct compress_tfm) is pointless because its two fields, ->cot_compress() and ->cot_decompress(), always point to crypto_compress() and crypto_decompress(). Remove this pointless indirection, and just make crypto_comp_compress() and crypto_comp_decompress() be direct calls to what used to be crypto_compress() and crypto_decompress(). Also remove the unused function crypto_comp_cast(). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 43 ++++++------------------------------------- 1 file changed, 6 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 23365a9d062e..8f708564b98b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -606,17 +606,7 @@ struct cipher_tfm { void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct compress_tfm { - int (*cot_compress)(struct crypto_tfm *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); - int (*cot_decompress)(struct crypto_tfm *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); -}; - #define crt_cipher crt_u.cipher -#define crt_compress crt_u.compress struct crypto_tfm { @@ -624,7 +614,6 @@ struct crypto_tfm { union { struct cipher_tfm cipher; - struct compress_tfm compress; } crt_u; void (*exit)(struct crypto_tfm *tfm); @@ -928,13 +917,6 @@ static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) return (struct crypto_comp *)tfm; } -static inline struct crypto_comp *crypto_comp_cast(struct crypto_tfm *tfm) -{ - BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_COMPRESS) & - CRYPTO_ALG_TYPE_MASK); - return __crypto_comp_cast(tfm); -} - static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, u32 type, u32 mask) { @@ -969,26 +951,13 @@ static inline const char *crypto_comp_name(struct crypto_comp *tfm) return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); } -static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) -{ - return &crypto_comp_tfm(tfm)->crt_compress; -} - -static inline int crypto_comp_compress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen) -{ - return crypto_comp_crt(tfm)->cot_compress(crypto_comp_tfm(tfm), - src, slen, dst, dlen); -} +int crypto_comp_compress(struct crypto_comp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen); -static inline int crypto_comp_decompress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen) -{ - return crypto_comp_crt(tfm)->cot_decompress(crypto_comp_tfm(tfm), - src, slen, dst, dlen); -} +int crypto_comp_decompress(struct crypto_comp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen); #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From e8cfed5e4e2b5929371955f476a52a4c3398ead3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Dec 2019 13:42:30 -0800 Subject: crypto: cipher - remove crt_u.cipher (struct cipher_tfm) Of the three fields in crt_u.cipher (struct cipher_tfm), ->cit_setkey() is pointless because it always points to setkey() in crypto/cipher.c. ->cit_decrypt_one() and ->cit_encrypt_one() are slightly less pointless, since if the algorithm doesn't have an alignmask, they are set directly to ->cia_encrypt() and ->cia_decrypt(). However, this "optimization" isn't worthwhile because: - The "cipher" algorithm type is the only algorithm still using crt_u, so it's bloating every struct crypto_tfm for every algorithm type. - If the algorithm has an alignmask, this "optimization" actually makes things slower, as it causes 2 indirect calls per block rather than 1. - It adds extra code complexity. - Some templates already call ->cia_encrypt()/->cia_decrypt() directly instead of going through ->cit_encrypt_one()/->cit_decrypt_one(). - The "cipher" algorithm type never gives optimal performance anyway. For that, a higher-level type such as skcipher needs to be used. Therefore, just remove the extra indirection, and make crypto_cipher_setkey(), crypto_cipher_encrypt_one(), and crypto_cipher_decrypt_one() be direct calls into crypto/cipher.c. Also remove the unused function crypto_cipher_cast(). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 48 ++++++------------------------------------------ 1 file changed, 6 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8f708564b98b..c23f1eed7970 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -599,23 +599,10 @@ int crypto_has_alg(const char *name, u32 type, u32 mask); * crypto_free_*(), as well as the various helpers below. */ -struct cipher_tfm { - int (*cit_setkey)(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen); - void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -}; - -#define crt_cipher crt_u.cipher - struct crypto_tfm { u32 crt_flags; - union { - struct cipher_tfm cipher; - } crt_u; - void (*exit)(struct crypto_tfm *tfm); struct crypto_alg *__crt_alg; @@ -752,12 +739,6 @@ static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) return (struct crypto_cipher *)tfm; } -static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - return __crypto_cipher_cast(tfm); -} - /** * crypto_alloc_cipher() - allocate single block cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the @@ -815,11 +796,6 @@ static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) return crypto_has_alg(alg_name, type, mask); } -static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) -{ - return &crypto_cipher_tfm(tfm)->crt_cipher; -} - /** * crypto_cipher_blocksize() - obtain block size for cipher * @tfm: cipher handle @@ -873,12 +849,8 @@ static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ -static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, - const u8 *key, unsigned int keylen) -{ - return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm), - key, keylen); -} +int crypto_cipher_setkey(struct crypto_cipher *tfm, + const u8 *key, unsigned int keylen); /** * crypto_cipher_encrypt_one() - encrypt one block of plaintext @@ -889,12 +861,8 @@ static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, * Invoke the encryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ -static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, - u8 *dst, const u8 *src) -{ - crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm), - dst, src); -} +void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src); /** * crypto_cipher_decrypt_one() - decrypt one block of ciphertext @@ -905,12 +873,8 @@ static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, * Invoke the decryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ -static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, - u8 *dst, const u8 *src) -{ - crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm), - dst, src); -} +void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src); static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) { -- cgit v1.2.3 From 894c9ef9780c5cf2f143415e867ee39a33ecb75d Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:10 -0500 Subject: padata: validate cpumask without removed CPU during offline Configuring an instance's parallel mask without any online CPUs... echo 2 > /sys/kernel/pcrypt/pencrypt/parallel_cpumask echo 0 > /sys/devices/system/cpu/cpu1/online ...makes tcrypt mode=215 crash like this: divide error: 0000 [#1] SMP PTI CPU: 4 PID: 283 Comm: modprobe Not tainted 5.4.0-rc8-padata-doc-v2+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20191013_105130-anatol 04/01/2014 RIP: 0010:padata_do_parallel+0x114/0x300 Call Trace: pcrypt_aead_encrypt+0xc0/0xd0 [pcrypt] crypto_aead_encrypt+0x1f/0x30 do_mult_aead_op+0x4e/0xdf [tcrypt] test_mb_aead_speed.constprop.0.cold+0x226/0x564 [tcrypt] do_test+0x28c2/0x4d49 [tcrypt] tcrypt_mod_init+0x55/0x1000 [tcrypt] ... cpumask_weight() in padata_cpu_hash() returns 0 because the mask has no CPUs. The problem is __padata_remove_cpu() checks for valid masks too early and so doesn't mark the instance PADATA_INVALID as expected, which would have made padata_do_parallel() return error before doing the division. Fix by introducing a second padata CPU hotplug state before CPUHP_BRINGUP_CPU so that __padata_remove_cpu() sees the online mask without @cpu. No need for the second argument to padata_replace() since @cpu is now already missing from the online mask. Fixes: 33e54450683c ("padata: Handle empty padata cpumasks") Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Sebastian Andrzej Siewior Cc: Steffen Klassert Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e51ee772b9f5..def48a583670 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -59,6 +59,7 @@ enum cpuhp_state { CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, + CPUHP_PADATA_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 91a71d612128f84f725022d7b7c5d5a741f6fdc7 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:12 -0500 Subject: padata: remove cpumask change notifier Since commit 63d3578892dc ("crypto: pcrypt - remove padata cpumask notifier") this feature is unused, so get rid of it. Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Jonathan Corbet Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/padata.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index cccab7a59787..178d5cc6b494 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #define PADATA_CPU_SERIAL 0x01 @@ -151,10 +150,6 @@ struct padata_shell { * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. - * @omask: Temporary storage used to compute the notification mask. - * @cpumask_change_notifier: Notifiers chain for user-defined notify - * callbacks that will be called when either @pcpu or @cbcpu - * or both cpumasks change. * @kobj: padata instance kernel object. * @lock: padata instance lock. * @flags: padata flags. @@ -166,8 +161,6 @@ struct padata_instance { struct list_head pslist; struct padata_cpumask cpumask; struct padata_cpumask rcpumask; - cpumask_var_t omask; - struct blocking_notifier_head cpumask_change_notifier; struct kobject kobj; struct mutex lock; u8 flags; @@ -187,8 +180,4 @@ extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); extern int padata_start(struct padata_instance *pinst); extern void padata_stop(struct padata_instance *pinst); -extern int padata_register_cpumask_notifier(struct padata_instance *pinst, - struct notifier_block *nblock); -extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst, - struct notifier_block *nblock); #endif -- cgit v1.2.3 From 3facced7aeed131c1002b724e488d68ebe59c56f Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:13 -0500 Subject: padata: remove reorder_objects reorder_objects is unused since the rework of padata's flushing, so remove it. Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/padata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 178d5cc6b494..faa2e36832f8 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -101,7 +101,6 @@ struct padata_cpumask { * @sh: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. - * @reorder_objects: Number of objects waiting in the reorder queues. * @refcnt: Number of objects holding a reference on this parallel_data. * @max_seq_nr: Maximal used sequence number. * @processed: Number of already processed objects. @@ -114,7 +113,6 @@ struct parallel_data { struct padata_shell *ps; struct padata_parallel_queue __percpu *pqueue; struct padata_serial_queue __percpu *squeue; - atomic_t reorder_objects; atomic_t refcnt; atomic_t seq_nr; unsigned int processed; -- cgit v1.2.3 From bfcdcef8c8e3469f4d6c082a1da27a6ef77e5715 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:14 -0500 Subject: padata: update documentation Remove references to unused functions, standardize language, update to reflect new functionality, migrate to rst format, and fix all kernel-doc warnings. Fixes: 815613da6a67 ("kernel/padata.c: removed unused code") Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Jonathan Corbet Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Daniel Jordan Signed-off-by: Herbert Xu --- include/linux/padata.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index faa2e36832f8..a0d8b41850b2 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -19,7 +19,7 @@ #define PADATA_CPU_PARALLEL 0x02 /** - * struct padata_priv - Embedded to the users data structure. + * struct padata_priv - Represents one job * * @list: List entry, to attach to the padata lists. * @pd: Pointer to the internal control structure. @@ -42,7 +42,7 @@ struct padata_priv { }; /** - * struct padata_list + * struct padata_list - one per work type per CPU * * @list: List head. * @lock: List lock. @@ -70,9 +70,6 @@ struct padata_serial_queue { * * @parallel: List to wait for parallelization. * @reorder: List to wait for reordering after parallel processing. - * @serial: List to wait for serialization after reordering. - * @pwork: work struct for parallelization. - * @swork: work struct for serialization. * @work: work struct for parallelization. * @num_obj: Number of objects that are processed by this cpu. */ @@ -98,11 +95,11 @@ struct padata_cpumask { * struct parallel_data - Internal control structure, covers everything * that depends on the cpumask in use. * - * @sh: padata_shell object. + * @ps: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. * @refcnt: Number of objects holding a reference on this parallel_data. - * @max_seq_nr: Maximal used sequence number. + * @seq_nr: Sequence number of the parallelized data object. * @processed: Number of already processed objects. * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. @@ -119,7 +116,7 @@ struct parallel_data { int cpu; struct padata_cpumask cpumask; struct work_struct reorder_work; - spinlock_t lock ____cacheline_aligned; + spinlock_t ____cacheline_aligned lock; }; /** @@ -142,7 +139,7 @@ struct padata_shell { /** * struct padata_instance - The overall control structure. * - * @cpu_notifier: cpu hotplug notifier. + * @node: Used by CPU hotplug. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. * @pslist: List of padata_shell objects attached to this instance. -- cgit v1.2.3 From a4516c7053b96fed98a0439a9226983b5275474b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:55:59 +0000 Subject: net: sfp: derive interface mode from ethtool link modes We don't need the EEPROM ID to derive the phy interface mode as we can derive it merely from the ethtool link modes. Remove the EEPROM ID argument to sfp_select_interface(). Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 487fd9412d10..8d7b98c214d7 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -504,7 +504,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); @@ -532,7 +531,6 @@ static inline void sfp_parse_support(struct sfp_bus *bus, } static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes) { return PHY_INTERFACE_MODE_NA; -- cgit v1.2.3 From 0fbd26a9fb6875b98fcfff523831fec47bc5e9a2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:56:04 +0000 Subject: net: sfp: add more extended compliance codes SFF-8024 is used to define various constants re-used in several SFF SFP-related specifications. Split these constants from the enum, and rename them to indicate that they're defined by SFF-8024. Add and use updated SFF-8024 extended compliance code definitions for 10GBASE-T, 5GBASE-T and 2.5GBASE-T modules. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 82 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 8d7b98c214d7..373d8b67ea86 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -275,6 +275,61 @@ struct sfp_diag { __be16 cal_v_offset; } __packed; +/* SFF8024 defined constants */ +enum { + SFF8024_ID_UNK = 0x00, + SFF8024_ID_SFF_8472 = 0x02, + SFF8024_ID_SFP = 0x03, + SFF8024_ID_DWDM_SFP = 0x0b, + SFF8024_ID_QSFP_8438 = 0x0c, + SFF8024_ID_QSFP_8436_8636 = 0x0d, + SFF8024_ID_QSFP28_8636 = 0x11, + + SFF8024_ENCODING_UNSPEC = 0x00, + SFF8024_ENCODING_8B10B = 0x01, + SFF8024_ENCODING_4B5B = 0x02, + SFF8024_ENCODING_NRZ = 0x03, + SFF8024_ENCODING_8472_MANCHESTER= 0x04, + SFF8024_ENCODING_8472_SONET = 0x05, + SFF8024_ENCODING_8472_64B66B = 0x06, + SFF8024_ENCODING_8436_MANCHESTER= 0x06, + SFF8024_ENCODING_8436_SONET = 0x04, + SFF8024_ENCODING_8436_64B66B = 0x05, + SFF8024_ENCODING_256B257B = 0x07, + SFF8024_ENCODING_PAM4 = 0x08, + + SFF8024_CONNECTOR_UNSPEC = 0x00, + /* codes 01-05 not supportable on SFP, but some modules have single SC */ + SFF8024_CONNECTOR_SC = 0x01, + SFF8024_CONNECTOR_FIBERJACK = 0x06, + SFF8024_CONNECTOR_LC = 0x07, + SFF8024_CONNECTOR_MT_RJ = 0x08, + SFF8024_CONNECTOR_MU = 0x09, + SFF8024_CONNECTOR_SG = 0x0a, + SFF8024_CONNECTOR_OPTICAL_PIGTAIL= 0x0b, + SFF8024_CONNECTOR_MPO_1X12 = 0x0c, + SFF8024_CONNECTOR_MPO_2X16 = 0x0d, + SFF8024_CONNECTOR_HSSDC_II = 0x20, + SFF8024_CONNECTOR_COPPER_PIGTAIL= 0x21, + SFF8024_CONNECTOR_RJ45 = 0x22, + SFF8024_CONNECTOR_NOSEPARATE = 0x23, + SFF8024_CONNECTOR_MXC_2X16 = 0x24, + + SFF8024_ECC_UNSPEC = 0x00, + SFF8024_ECC_100G_25GAUI_C2M_AOC = 0x01, + SFF8024_ECC_100GBASE_SR4_25GBASE_SR = 0x02, + SFF8024_ECC_100GBASE_LR4_25GBASE_LR = 0x03, + SFF8024_ECC_100GBASE_ER4_25GBASE_ER = 0x04, + SFF8024_ECC_100GBASE_SR10 = 0x05, + SFF8024_ECC_100GBASE_CR4 = 0x0b, + SFF8024_ECC_25GBASE_CR_S = 0x0c, + SFF8024_ECC_25GBASE_CR_N = 0x0d, + SFF8024_ECC_10GBASE_T_SFI = 0x16, + SFF8024_ECC_10GBASE_T_SR = 0x1c, + SFF8024_ECC_5GBASE_T = 0x1d, + SFF8024_ECC_2_5GBASE_T = 0x1e, +}; + /* SFP EEPROM registers */ enum { SFP_PHYS_ID = 0x00, @@ -309,34 +364,7 @@ enum { SFP_SFF8472_COMPLIANCE = 0x5e, SFP_CC_EXT = 0x5f, - SFP_PHYS_ID_SFF = 0x02, - SFP_PHYS_ID_SFP = 0x03, SFP_PHYS_EXT_ID_SFP = 0x04, - SFP_CONNECTOR_UNSPEC = 0x00, - /* codes 01-05 not supportable on SFP, but some modules have single SC */ - SFP_CONNECTOR_SC = 0x01, - SFP_CONNECTOR_FIBERJACK = 0x06, - SFP_CONNECTOR_LC = 0x07, - SFP_CONNECTOR_MT_RJ = 0x08, - SFP_CONNECTOR_MU = 0x09, - SFP_CONNECTOR_SG = 0x0a, - SFP_CONNECTOR_OPTICAL_PIGTAIL = 0x0b, - SFP_CONNECTOR_MPO_1X12 = 0x0c, - SFP_CONNECTOR_MPO_2X16 = 0x0d, - SFP_CONNECTOR_HSSDC_II = 0x20, - SFP_CONNECTOR_COPPER_PIGTAIL = 0x21, - SFP_CONNECTOR_RJ45 = 0x22, - SFP_CONNECTOR_NOSEPARATE = 0x23, - SFP_CONNECTOR_MXC_2X16 = 0x24, - SFP_ENCODING_UNSPEC = 0x00, - SFP_ENCODING_8B10B = 0x01, - SFP_ENCODING_4B5B = 0x02, - SFP_ENCODING_NRZ = 0x03, - SFP_ENCODING_8472_MANCHESTER = 0x04, - SFP_ENCODING_8472_SONET = 0x05, - SFP_ENCODING_8472_64B66B = 0x06, - SFP_ENCODING_256B257B = 0x07, - SFP_ENCODING_PAM4 = 0x08, SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), SFP_OPTIONS_PAGING_A2 = BIT(12), SFP_OPTIONS_RETIMER = BIT(11), -- cgit v1.2.3 From 74c551ca5a0edcc9cf66a3b73fd95b9a8615bfd0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:56:09 +0000 Subject: net: sfp: add module start/stop upstream notifications When dealing with some copper modules, we can't positively know the module capabilities are until we have probed the PHY. Without the full capabilities, we may end up failing a module that we could otherwise drive with a restricted set of capabilities. An example of this would be a module with a NBASE-T PHY plugged into a host that supports phy interface modes 2500BASE-X and SGMII. The PHY supports 10GBASE-R, 5000BASE-X, 2500BASE-X, SGMII interface modes, which means a subset of the capabilities are compatible with the host. However, reading the module EEPROM leads us to believe that the module only supports ethtool link mode 10GBASE-T, which is incompatible with the host - and thus results in the module being rejected. This patch adds an extra notification which are triggered after the SFP module's PHY probe, and a corresponding notification just before the PHY is removed. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 373d8b67ea86..66a56396e8e3 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -507,6 +507,8 @@ struct sfp_bus; * @module_insert: called after a module has been detected to determine * whether the module is supported for the upstream device. * @module_remove: called after the module has been removed. + * @module_start: called after the PHY probe step + * @module_stop: called before the PHY is removed * @link_down: called when the link is non-operational for whatever * reason. * @link_up: called when the link is operational. @@ -520,6 +522,8 @@ struct sfp_upstream_ops { void (*detach)(void *priv, struct sfp_bus *bus); int (*module_insert)(void *priv, const struct sfp_eeprom_id *id); void (*module_remove)(void *priv); + int (*module_start)(void *priv); + void (*module_stop)(void *priv); void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); -- cgit v1.2.3 From 52c956003a9d5bcae1f445f9dfd42b624adb6e87 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:56:45 +0000 Subject: net: phylink: delay MAC configuration for copper SFP modules Knowing whether we need to delay the MAC configuration because a module may have a PHY is useful to phylink to allow NBASE-T modules to work on systems supporting no more than 2.5G speeds. This commit allows us to delay such configuration until after the PHY has been probed by recording the parsed capabilities, and if the module may have a PHY, doing no more until the module_start() notification is called. At that point, we either have a PHY, or we don't. We move the PHY-based setup a little later, and use the PHYs support capabilities rather than the EEPROM parsed capabilities to determine whether we can support the PHY. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 66a56396e8e3..38893e4dd0f0 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -533,6 +533,7 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); +bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); phy_interface_t sfp_select_interface(struct sfp_bus *bus, @@ -556,6 +557,12 @@ static inline int sfp_parse_port(struct sfp_bus *bus, return PORT_OTHER; } +static inline bool sfp_may_have_phy(struct sfp_bus *bus, + const struct sfp_eeprom_id *id) +{ + return false; +} + static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) -- cgit v1.2.3 From 528be501b7d4a64e04672a38ebfc9e19c555e770 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:44:57 -0600 Subject: soundwire: sdw_slave: add probe_complete structure and new fields When a Slave device becomes synchronized with the bus, it may report its presence in PING frames, as well as optionally asserting an in-band PREQ signal. The bus driver will detect a new Device0, start the enumeration process and assign it a non-zero device number. The SoundWire enumeration provides an arbitration to deal with multiple Slaves reporting ATTACHED at the same time. The bus driver will also invoke the driver .probe() callback associated with this device. The probe() depends on the Linux device core, which handles the match operations and may result in modules being loaded. Once the non-zero device number is programmed, the Slave will report its new status in PING frames and the Master hardware will typically report this status change with an interrupt. At this point, the .update_status() callback of the codec driver will be invoked (usually from an interrupt thread or workqueue scheduled from the interrupt thread). The first race condition which can happen is between the .probe(), which allocates the resources, and .update_status() where initializations are typically handled. The .probe() is only called once during the initial boot, while .update_status() will be called for every bus hardware reset and if the Slave device loses synchronization (an unlikely event but with non-zero probability). The time difference between the end of the enumeration process and a change of status reported by the hardware may be as small as one SoundWire PING frame. The scheduling of the interrupt thread, which invokes .update_status() is not deterministic, but can be small enough to create a race condition. With a 48 kHz frame rate and ideal scheduling cases, the .probe() may be pre-empted within double-digit microseconds. Since there is no guarantee that the .probe() completes by the time .update_status() is invoked as a result of an interrupt, it's not unusual for the .update_status() to rely on data structures that have not been allocated yet, leading to kernel oopses. This patch adds a probe_complete utility, which is used in the sdw_update_slave_status() routine. The codec driver does not need to do anything and can safely assume all resources are allocated in its update_status() callback. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertio