diff options
51 files changed, 1089 insertions, 719 deletions
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index 28f8ad16db25..78404625bad2 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -254,17 +254,6 @@ period (in this case 2603), the grace-period sequence number (7075), and an estimate of the total number of RCU callbacks queued across all CPUs (625 in this case). -In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed -for each CPU:: - - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1 - -The "last_accelerate:" prints the low-order 16 bits (in hex) of the -jiffies counter when this CPU last invoked rcu_try_advance_all_cbs() -from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from -rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle -processing is enabled. - If the grace period ends just as the stall warning starts printing, there will be a spurious stall-warning message, which will include the following:: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9725c546a0d4..3483ac7f019d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4343,19 +4343,30 @@ Disable the Correctable Errors Collector, see CONFIG_RAS_CEC help text. - rcu_nocbs= [KNL] - The argument is a cpu list, as described above. - - In kernels built with CONFIG_RCU_NOCB_CPU=y, set - the specified list of CPUs to be no-callback CPUs. - Invocation of these CPUs' RCU callbacks will be - offloaded to "rcuox/N" kthreads created for that - purpose, where "x" is "p" for RCU-preempt, and - "s" for RCU-sched, and "N" is the CPU number. - This reduces OS jitter on the offloaded CPUs, - which can be useful for HPC and real-time - workloads. It can also improve energy efficiency - for asymmetric multiprocessors. + rcu_nocbs[=cpu-list] + [KNL] The optional argument is a cpu list, + as described above. + + In kernels built with CONFIG_RCU_NOCB_CPU=y, + enable the no-callback CPU mode, which prevents + such CPUs' callbacks from being invoked in + softirq context. Invocation of such CPUs' RCU + callbacks will instead be offloaded to "rcuox/N" + kthreads created for that purpose, where "x" is + "p" for RCU-preempt, "s" for RCU-sched, and "g" + for the kthreads that mediate grace periods; and + "N" is the CPU number. This reduces OS jitter on + the offloaded CPUs, which can be useful for HPC + and real-time workloads. It can also improve + energy efficiency for asymmetric multiprocessors. + + If a cpulist is passed as an argument, the specified + list of CPUs is set to no-callback mode from boot. + + Otherwise, if the '=' sign and the cpulist + arguments are omitted, no CPU will be set to + no-callback mode from boot but the mode may be + toggled at runtime via cpusets. rcu_nocb_poll [KNL] Rather than requiring that offloaded CPUs @@ -4489,10 +4500,6 @@ on rcutree.qhimark at boot time and to zero to disable more aggressive help enlistment. - rcutree.rcu_idle_gp_delay= [KNL] - Set wakeup interval for idle CPUs that have - RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.rcu_kick_kthreads= [KNL] Cause the grace-period kthread to get an extra wake_up() if it sleeps three times longer than @@ -4603,8 +4610,12 @@ in seconds. rcutorture.fwd_progress= [KNL] - Enable RCU grace-period forward-progress testing + Specifies the number of kthreads to be used + for RCU grace-period forward-progress testing for the types of RCU supporting this notion. + Defaults to 1 kthread, values less than zero or + greater than the number of CPUs cause the number + of CPUs to be used. rcutorture.fwd_progress_div= [KNL] Specify the fraction of a CPU-stall-warning @@ -4805,6 +4816,29 @@ period to instead use normal non-expedited grace-period processing. + rcupdate.rcu_task_collapse_lim= [KNL] + Set the maximum number of callbacks present + at the beginning of a grace period that allows + the RCU Tasks flavors to collapse back to using + a single callback queue. This switching only + occurs when rcupdate.rcu_task_enqueue_lim is + set to the default value of -1. + + rcupdate.rcu_task_contend_lim= [KNL] + Set the minimum number of callback-queuing-time + lock-contention events per jiffy required to + cause the RCU Tasks flavors to switch to per-CPU + callback queuing. This switching only occurs + when rcupdate.rcu_task_enqueue_lim is set to + the default value of -1. + + rcupdate.rcu_task_enqueue_lim= [KNL] + Set the number of callback queues to use for the + RCU Tasks family of RCU flavors. The default + of -1 allows this to be automatically (and + dynamically) adjusted. This parameter is intended + for use in testing. + rcupdate.rcu_task_ipi_delay= [KNL] Set time in jiffies during which RCU tasks will avoid sending IPIs, starting with the beginning diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index 20ad23a6c618..f8786be15183 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -184,16 +184,12 @@ There are situations in which idle CPUs cannot be permitted to enter either dyntick-idle mode or adaptive-tick mode, the most common being when that CPU has RCU callbacks pending. -The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs -to enter dyntick-idle mode or adaptive-tick mode anyway. In this case, -a timer will awaken these CPUs every four jiffies in order to ensure -that the RCU callbacks are processed in a timely fashion. - -Another approach is to offload RCU callback processing to "rcuo" kthreads +Avoid this by offloading RCU callback processing to "rcuo" kthreads using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to offload may be selected using The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated list of CPUs and CPU ranges, for example, -"1,3-5" selects CPUs 1, 3, 4, and 5. +"1,3-5" selects CPUs 1, 3, 4, and 5. Note that CPUs specified by +the "nohz_full" kernel boot parameter are also offloaded. The offloaded CPUs will never queue RCU callbacks, and therefore RCU never prevents offloaded CPUs from entering either dyntick-idle mode diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 3db96c4f45fd..659d13a7ddaa 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -69,7 +69,7 @@ struct rcu_cblist { * * * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | @@ -77,7 +77,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_OFFLOADED | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | @@ -89,7 +89,9 @@ struct rcu_cblist { * | | * v v * --------------------------------------- ----------------------------------| - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | * | | | | * | | | | @@ -104,9 +106,10 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | - * | SEGCBLIST_KTHREAD_GP | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | | + * | SEGCBLIST_KTHREAD_CB | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -120,7 +123,8 @@ struct rcu_cblist { * * * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -130,6 +134,22 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_CB | | + * | SEGCBLIST_KTHREAD_GP | + * | | + * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | + * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | Invoke RCU core so we make sure not to preempt it in the middle with | + * | leaving some urgent work unattended within a jiffy. | + * ---------------------------------------------------------------------------- + * | + * v + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -143,7 +163,9 @@ struct rcu_cblist { * | | * v v * ---------------------------------------------------------------------------| - * | | + * | | | + * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | * | | | * | GP kthread woke up and | CB kthread woke up and | @@ -159,7 +181,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | 0 | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | @@ -168,17 +190,18 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_SOFTIRQ_ONLY BIT(1) -#define SEGCBLIST_KTHREAD_CB BIT(2) -#define SEGCBLIST_KTHREAD_GP BIT(3) -#define SEGCBLIST_OFFLOADED BIT(4) +#define SEGCBLIST_RCU_CORE BIT(1) +#define SEGCBLIST_LOCKING BIT(2) +#define SEGCBLIST_KTHREAD_CB BIT(3) +#define SEGCBLIST_KTHREAD_GP BIT(4) +#define SEGCBLIST_OFFLOADED BIT(5) struct rcu_segcblist { struct rcu_head *head; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e0beb5c5659..88b42eb46406 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -364,6 +364,12 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ +#define __unrcu_pointer(p, local) \ +({ \ + typeof(*p) *local = (typeof(*p) *__force)(p); \ + rcu_check_sparse(p, __rcu); \ + ((typeof(*p) __force __kernel *)(local)); \ +}) /** * unrcu_pointer - mark a pointer as not being RCU protected * @p: pointer needing to lose its __rcu property @@ -371,39 +377,35 @@ static inline void rcu_preempt_sleep_check(void) { } * Converts @p from an __rcu pointer to a __kernel pointer. * This allows an __rcu pointer to be used with xchg() and friends. */ -#define unrcu_pointer(p) \ -({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \ - rcu_check_sparse(p, __rcu); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ -}) +#define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu)) -#define __rcu_access_pointer(p, space) \ +#define __rcu_access_pointer(p, local, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_check(p, c, space) \ +#define __rcu_dereference_check(p, local, c, space) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_protected(p, c, space) \ +#define __rcu_dereference_protected(p, local, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) -#define rcu_dereference_raw(p) \ +#define __rcu_dereference_raw(p, local) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(p) ________p1 = READ_ONCE(p); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + typeof(p) local = READ_ONCE(p); \ + ((typeof(*p) __force __kernel *)(local)); \ }) +#define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu)) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable @@ -490,7 +492,7 @@ do { \ * when tearing down multi-linked structures after a grace period * has elapsed. */ -#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) +#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking @@ -526,7 +528,8 @@ do { \ * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking @@ -541,7 +544,8 @@ do { \ * rcu_read_lock() but also rcu_read_lock_bh() into account. */ #define rcu_dereference_bh_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking @@ -556,7 +560,8 @@ do { \ * only rcu_read_lock() but also rcu_read_lock_sched() into account. */ #define rcu_dereference_sched_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_sched_held(), \ __rcu) /* @@ -566,7 +571,8 @@ do { \ * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ -#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) +#define rcu_dereference_raw_check(p) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented @@ -585,7 +591,7 @@ do { \ * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) + __rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu) /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9be015305f9f..858f4d429946 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) + (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e6011a9975af..01226e4d960a 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -117,7 +117,8 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * lockdep_is_held() calls. */ #define srcu_dereference_check(p, ssp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing diff --git a/include/linux/torture.h b/include/linux/torture.h index 24f58e50a94b..63fa4196e51c 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -38,13 +38,8 @@ do { \ pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); \ } \ } while (0) -#define VERBOSE_TOROUT_ERRSTRING(s) \ -do { \ - if (verbose) { \ - verbose_torout_sleep(); \ - pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); \ - } \ -} while (0) +#define TOROUT_ERRSTRING(s) \ + pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s) void verbose_torout_sleep(void); #define torture_init_error(firsterr) \ diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 397ac13d2ef7..9c2fb613a55d 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -1047,7 +1047,7 @@ static int __init lock_torture_init(void) sizeof(writer_tasks[0]), GFP_KERNEL); if (writer_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); + TOROUT_ERRSTRING("writer_tasks: Out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -1058,7 +1058,7 @@ static int __init lock_torture_init(void) sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); + TOROUT_ERRSTRING("reader_tasks: Out of memory"); kfree(writer_tasks); writer_tasks = NULL; firsterr = -ENOMEM; diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 3128b7cf8e1f..bf8e341e75b4 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -112,7 +112,7 @@ config RCU_STALL_COMMON making these warnings mandatory for the tree variants. config RCU_NEED_SEGCBLIST - def_bool ( TREE_RCU || TREE_SRCU ) + def_bool ( TREE_RCU || TREE_SRCU || TASKS_RCU_GENERIC ) config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" @@ -169,24 +169,6 @@ config RCU_FANOUT_LEAF Take the default if unsure. -config RCU_FAST_NO_HZ - bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ_COMMON && SMP && RCU_EXPERT - default n - help - This option permits CPUs to enter dynticks-idle state even if - they have RCU callbacks queued, and prevents RCU from waking - these CPUs up more than roughly once every four jiffies (by - default, you can adjust this using the rcutree.rcu_idle_gp_delay - parameter), thus improving energy efficiency. On the other - hand, this option increases the duration of RCU grace periods, - for example, slowing down synchronize_rcu(). - - Say Y if energy efficiency is critically important, and you - don't care about increased grace-period durations. - - Say N if you are unsure. - config RCU_BOOST bool "Enable RCU priority boosting" depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index aaa111237b60..81145c3ece25 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -261,16 +261,14 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) } /* - * Mark the specified rcu_segcblist structure as offloaded. + * Mark the specified rcu_segcblist structure as offloaded (or not) */ void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload) { - if (offload) { - rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY); - rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED); - } else { + if (offload) + rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED); + else rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED); - } } /* |
