diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-15 15:25:27 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-15 15:25:27 -0700 |
| commit | 9855e873285f1395b9a04613c56101f04a5ec9fb (patch) | |
| tree | f4208bc03ebcc3011feec1bf7534b5a85d7183fd | |
| parent | 253e1e98180a124475327f2ce7b6f15f2e4d0d45 (diff) | |
| parent | 02219caa92b5b0ed97f8d8b9cf580f6f34a9be31 (diff) | |
| download | linux-9855e873285f1395b9a04613c56101f04a5ec9fb.tar.gz linux-9855e873285f1395b9a04613c56101f04a5ec9fb.tar.bz2 linux-9855e873285f1395b9a04613c56101f04a5ec9fb.zip | |
Merge tag 'rcu.2024.07.12a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu
Pull RCU updates from Paul McKenney:
- Update Tasks RCU and Tasks Rude RCU description in Requirements.rst
and clarify rcu_assign_pointer() and rcu_dereference() ordering
properties
- Add lockdep assertions for RCU readers, limit inline wakeups for
callback-bypass synchronize_rcu(), add an
rcutree.nohz_full_patience_delay to reduce nohz_full OS jitter, add
Uladzislau Rezki as RCU maintainer, and fix a subtle
callback-migration memory-ordering issue
- Remove a number of redundant memory barriers
- Remove unnecessary bypass-list lock-contention mitigation, use
parking API instead of open-coded ad-hoc equivalent, and upgrade
obsolete comments
- Revert avoidance of a deadlock that can no longer occur and properly
synchronize Tasks Trace RCU checking of runqueues
- Add tests for handling of double-call_rcu() bug, add missing
MODULE_DESCRIPTION, and add a script that histograms the number of
calls to RCU updaters
- Fill out SRCU polled-grace-period API
* tag 'rcu.2024.07.12a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (29 commits)
rcu: Fix rcu_barrier() VS post CPUHP_TEARDOWN_CPU invocation
rcu: Eliminate lockless accesses to rcu_sync->gp_count
MAINTAINERS: Add Uladzislau Rezki as RCU maintainer
rcu: Add rcutree.nohz_full_patience_delay to reduce nohz_full OS jitter
rcu/exp: Remove redundant full memory barrier at the end of GP
rcu: Remove full memory barrier on RCU stall printout
rcu: Remove full memory barrier on boot time eqs sanity check
rcu/exp: Remove superfluous full memory barrier upon first EQS snapshot
rcu: Remove superfluous full memory barrier upon first EQS snapshot
rcu: Remove full ordering on second EQS snapshot
srcu: Fill out polled grace-period APIs
srcu: Update cleanup_srcu_struct() comment
srcu: Add NUM_ACTIVE_SRCU_POLL_OLDSTATE
srcu: Disable interrupts directly in srcu_gp_end()
rcu: Disable interrupts directly in rcu_gp_init()
rcu/tree: Reduce wake up for synchronize_rcu() common case
rcu/tasks: Fix stale task snaphot for Tasks Trace
tools/rcu: Add rcu-updaters.sh script
rcutorture: Add missing MODULE_DESCRIPTION() macros
rcutorture: Fix rcu_torture_fwd_cb_cr() data race
...
| -rw-r--r-- | Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst | 6 | ||||
| -rw-r--r-- | Documentation/RCU/Design/Requirements/Requirements.rst | 16 | ||||
| -rw-r--r-- | Documentation/RCU/whatisRCU.rst | 30 | ||||
| -rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 8 | ||||
| -rw-r--r-- | MAINTAINERS | 1 | ||||
| -rw-r--r-- | include/linux/rcu_segcblist.h | 88 | ||||
| -rw-r--r-- | include/linux/rcupdate.h | 62 | ||||
| -rw-r--r-- | include/linux/srcu.h | 35 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 17 | ||||
| -rw-r--r-- | kernel/rcu/rcuscale.c | 1 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 49 | ||||
| -rw-r--r-- | kernel/rcu/refscale.c | 1 | ||||
| -rw-r--r-- | kernel/rcu/srcutiny.c | 3 | ||||
| -rw-r--r-- | kernel/rcu/srcutree.c | 13 | ||||
| -rw-r--r-- | kernel/rcu/sync.c | 12 | ||||
| -rw-r--r-- | kernel/rcu/tasks.h | 26 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 92 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 2 | ||||
| -rw-r--r-- | kernel/rcu/tree_exp.h | 24 | ||||
| -rw-r--r-- | kernel/rcu/tree_nocb.h | 147 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 14 | ||||
| -rw-r--r-- | kernel/rcu/tree_stall.h | 4 | ||||
| -rw-r--r-- | kernel/sched/core.c | 14 | ||||
| -rwxr-xr-x | tools/rcu/rcu-updaters.sh | 52 |
24 files changed, 439 insertions, 278 deletions
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst index 5750f125361b..728b1e690c64 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst @@ -149,9 +149,9 @@ This case is handled by calls to the strongly ordered ``atomic_add_return()`` read-modify-write atomic operation that is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time. -The grace-period kthread invokes ``rcu_dynticks_snap()`` and -``rcu_dynticks_in_eqs_since()`` (both of which invoke -an ``atomic_add_return()`` of zero) to detect idle CPUs. +The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()`` +(preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()`` +(both of which rely on acquire semantics) to detect idle CPUs. +-----------------------------------------------------------------------+ | **Quick Quiz**: | diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index cccafdaa1f84..f511476b4550 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -2357,6 +2357,7 @@ section. #. `Sched Flavor (Historical)`_ #. `Sleepable RCU`_ #. `Tasks RCU`_ +#. `Tasks Trace RCU`_ Bottom-Half Flavor (Historical) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2610,6 +2611,16 @@ critical sections that are delimited by voluntary context switches, that is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In addition, transitions to and from userspace execution also delimit tasks-RCU read-side critical sections. +Idle tasks are ignored by Tasks RCU, and Tasks Rude RCU may be used to +interact with them. + +Note well that involuntary context switches are *not* Tasks-RCU quiescent +states. After all, in preemptible kernels, a task executing code in a +trampoline might be preempted. In this case, the Tasks-RCU grace period +clearly cannot end until that task resumes and its execution leaves that +trampoline. This means, among other things, that cond_resched() does +not provide a Tasks RCU quiescent state. (Instead, use rcu_softirq_qs() +from softirq or rcu_tasks_classic_qs() otherwise.) The tasks-RCU API is quite compact, consisting only of call_rcu_tasks(), synchronize_rcu_tasks(), and @@ -2632,6 +2643,11 @@ moniker. And this operation is considered to be quite rude by real-time workloads that don't want their ``nohz_full`` CPUs receiving IPIs and by battery-powered systems that don't want their idle CPUs to be awakened. +Once kernel entry/exit and deep-idle functions have been properly tagged +``noinstr``, Tasks RCU can start paying attention to idle tasks (except +those that are idle from RCU's perspective) and then Tasks Rude RCU can +be removed from the kernel. + The tasks-rude-RCU API is also reader-marking-free and thus quite compact, consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(), and rcu_barrier_tasks_rude(). diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index 94838c65c7d9..d585a5490aee 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -250,21 +250,25 @@ rcu_assign_pointer() ^^^^^^^^^^^^^^^^^^^^ void rcu_assign_pointer(p, typeof(p) v); - Yes, rcu_assign_pointer() **is** implemented as a macro, though it - would be cool to be able to declare a function in this manner. - (Compiler experts will no doubt disagree.) + Yes, rcu_assign_pointer() **is** implemented as a macro, though + it would be cool to be able to declare a function in this manner. + (And there has been some discussion of adding overloaded functions + to the C language, so who knows?) The updater uses this spatial macro to assign a new value to an RCU-protected pointer, in order to safely communicate the change in value from the updater to the reader. This is a spatial (as opposed to temporal) macro. It does not evaluate to an rvalue, - but it does execute any memory-barrier instructions required - for a given CPU architecture. Its ordering properties are that - of a store-release operation. - - Perhaps just as important, it serves to document (1) which - pointers are protected by RCU and (2) the point at which a - given structure becomes accessible to other CPUs. That said, + but it does provide any compiler directives and memory-barrier + instructions required for a given compile or CPU architecture. + Its ordering properties are that of a store-release operation, + that is, any prior loads and stores required to initialize the + structure are ordered before the store that publishes the pointer + to that structure. + + Perhaps just as important, rcu_assign_pointer() serves to document + (1) which pointers are protected by RCU and (2) the point at which + a given structure becomes accessible to other CPUs. That said, rcu_assign_pointer() is most frequently used indirectly, via the _rcu list-manipulation primitives such as list_add_rcu(). @@ -283,7 +287,11 @@ rcu_dereference() executes any needed memory-barrier instructions for a given CPU architecture. Currently, only Alpha needs memory barriers within rcu_dereference() -- on other CPUs, it compiles to a - volatile load. + volatile load. However, no mainstream C compilers respect + address dependencies, so rcu_dereference() uses volatile casts, + which, in combination with the coding guidelines listed in + rcu_dereference.rst, prevent current compilers from breaking + these dependencies. Common coding practice uses rcu_dereference() to copy an RCU-protected pointer to a local variable, then dereferences diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 27ec49af1bf2..fc01da74cf99 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5015,6 +5015,14 @@ the ->nocb_bypass queue. The definition of "too many" is supplied by this kernel boot parameter. + rcutree.nohz_full_patience_delay= [KNL] + On callback-offloaded (rcu_nocbs) CPUs, avoid + disturbing RCU unless the grace period has + reached the specified age in milliseconds. + Defaults to zero. Large values will be capped + at five seconds. All values will be rounded down + to the nearest value representable by jiffies. + rcutree.qhimark= [KNL] Set threshold of queued RCU callbacks beyond which batch limiting is disabled. diff --git a/MAINTAINERS b/MAINTAINERS index 7507671fd4e9..b1ce34379868 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18863,6 +18863,7 @@ M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h) M: Joel Fernandes <joel@joelfernandes.org> M: Josh Triplett <josh@joshtriplett.org> M: Boqun Feng <boqun.feng@gmail.com> +M: Uladzislau Rezki <urezki@gmail.com> R: Steven Rostedt <rostedt@goodmis.org> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> R: Lai Jiangshan <jiangshanlai@gmail.com> diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 659d13a7ddaa..ba95c06675e1 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -80,36 +80,35 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | - * | allowing nocb_timer to be armed. | + * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads. | * ---------------------------------------------------------------------------- * | * v - * ----------------------------------- - * | | - * v v - * --------------------------------------- ----------------------------------| - * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | - * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | - * | | | | - * | | | | - * | CB kthread woke up and | | GP kthread woke up and | - * | acknowledged SEGCBLIST_OFFLOADED. | | acknowledged SEGCBLIST_OFFLOADED| - * | Processes callbacks concurrently | | | - * | with rcu_core(), holding | | | - * | nocb_lock. | | | - * --------------------------------------- ----------------------------------- - * | | - * ----------------------------------- + * ---------------------------------------------------------------------------- + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | + * | + unparked CB kthread | + * | | + * | CB kthread got unparked and processes callbacks concurrently with | + * | rcu_core(), holding nocb_lock. | + * --------------------------------------------------------------------------- + * | + * v + * ---------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | + * | | + * | GP kthread woke up and acknowledged nocb_lock. | + * ---------------------------------------- ----------------------------------- * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_LOCKING | | - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_GP | | - * | SEGCBLIST_KTHREAD_CB | + * | + unparked CB kthread | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -125,8 +124,8 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_LOCKING | | * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | * | ignores callbacks. Bypass enqueue is enabled. | @@ -137,11 +136,11 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | - * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | handles callbacks concurrently. Bypass enqueue is disabled. | * | Invoke RCU core so we make sure not to preempt it in the middle with | * | leaving some urgent work unattended within a jiffy. | * ---------------------------------------------------------------------------- @@ -150,42 +149,31 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable | - * | bypass enqueue. | + * | holding nocb_lock. Wake up GP kthread if necessary. | * ---------------------------------------------------------------------------- * | * v - * ----------------------------------- - * | | - * v v - * ---------------------------------------------------------------------------| - * | | | - * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | - * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | - * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | - * | | | - * | GP kthread woke up and | CB kthread woke up and | - * | acknowledged the fact that | acknowledged the fact that | - * | SEGCBLIST_OFFLOADED got cleared. | SEGCBLIST_OFFLOADED got cleared. | - * | | The CB kthread goes to sleep | - * | The callbacks from the target CPU | until it ever gets re-offloaded. | - * | will be ignored from the GP kthread | | - * | loop. | | + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | + unparked CB kthread | + * | | + * | GP kthread woke up and acknowledged the fact that SEGCBLIST_OFFLOADED | + * | got cleared. The callbacks from the target CPU will be ignored from the| + * | GP kthread loop. | * ---------------------------------------------------------------------------- - * | | - * ----------------------------------- * | * v * ---------------------------------------------------------------------------- * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | + * | + parked CB kthread | * | | - * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | - * | Flush pending nocb_timer. Flush nocb bypass callbacks. | + * | CB kthread is parked. Callbacks processed by rcu_core() from softirqs or | + * | local rcuc kthread, while holding nocb_lock. | * ---------------------------------------------------------------------------- * | * v diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..be450a3477be 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void); #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); -void exit_tasks_rcu_stop(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_classic_qs(t, preempt) do { } while (0) @@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void); #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } -static inline void exit_tasks_rcu_stop(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ @@ -421,11 +419,71 @@ static inline void rcu_preempt_sleep_check(void) { } "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) +// See RCU_LOCKDEP_WARN() for an explanation of the double call to +// debug_lockdep_rcu_enabled(). +static inline bool lockdep_assert_rcu_helper(bool c) +{ + return debug_lockdep_rcu_enabled() && + (c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) && + debug_lockdep_rcu_enabled(); +} + +/** + * lockdep_assert_in_rcu_read_lock - WARN if not protected by rcu_read_lock() + * + * Splats if lockdep is enabled and there is no rcu_read_lock() in effect. + */ +#define lockdep_assert_in_rcu_read_lock() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map))) + +/** + * lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh() + * + * Splats if lockdep is enabled and there is no rcu_read_lock_bh() in effect. + * Note that local_bh_disable() and friends do not suffice here, instead an + * actual rcu_read_lock_bh() is required. + */ +#define lockdep_assert_in_rcu_read_lock_bh() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map))) + +/** + * lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched() + * + * Splats if lockdep is enabled and there is no rcu_read_lock_sched() + * in effect. Note that preempt_disable() and friends do not suffice here, + * instead an actual rcu_read_lock_sched() is required. + */ +#define lockdep_assert_in_rcu_read_lock_sched() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map))) + +/** + * lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader + * + * Splats if lockdep is enabled and there is no RCU reader of any + * type in effect. Note that regions of code protected by things like + * preempt_disable, local_bh_disable(), and local_irq_disable() all qualify + * as RCU readers. + * + * Note that this will never trigger in PREEMPT_NONE or PREEMPT_VOLUNTARY + * kernels that are not also built with PREEMPT_COUNT. But if you have + * lockdep enabled, you might as well also enable PREEMPT_COUNT. + */ +#define lockdep_assert_in_rcu_reader() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) && \ + !lock_is_held(&rcu_bh_lock_map) && \ + !lock_is_held(&rcu_sched_lock_map) && \ + preemptible())) + #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) +#define lockdep_assert_in_rcu_read_lock() do { } while (0) +#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0) +#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0) +#define lockdep_assert_in_rcu_reader() do { } while (0) + #endif /* #else #ifdef CONFIG_PROVE_RCU */ /* diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 236610e4a8fa..6f6cb5fc1242 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -57,10 +57,45 @@ void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); + +#define SRCU_GET_STATE_COMPLETED 0x1 + +/** + * get_completed_synchronize_srcu - Return a pre-completed polled state cookie + * + * Returns a value that poll_state_synchronize_srcu() will always treat + * as a cookie whose grace period has already completed. + */ +static inline unsigned long get_completed_synchronize_srcu(void) +{ + return SRCU_GET_STATE_COMPLETED; +} + unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); +// Maximum number of unsigned long values corresponding to +// not-yet-completed SRCU grace periods. +#define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2 + +/** + * same_state_synchronize_srcu - Are two old-state values identical? + * @oldstate1: First old-state value. + * @oldstate2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_srcu(), start_poll_synchronize_srcu(), or + * get_completed_synchronize_srcu(). Returns @true if the two values are + * identical and @false otherwise. This allows structures whose lifetimes + * are tracked by old-state values to push these values to a list header, + * allowing those structures to be slightly smaller. + */ +static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned long oldstate2) +{ + return oldstate1 == oldstate2; +} + #ifdef CONFIG_NEED_SRCU_NMI_SAFE int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 25f3cf679b35..bdf0087d6442 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -249,24 +249,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->pid_allocated == init_pids) break; - /* - * Release tasks_rcu_exit_srcu to avoid following deadlock: - * - * 1) TASK A unshare(CLONE_NEWPID) - * 2) TASK A fork() twice -> TASK B (child reaper for new ns) - * and TASK C - * 3) TASK B exits, kills TASK C, waits for TASK A to reap it - * 4) TASK A calls synchronize_rcu_tasks() - * -> synchronize_srcu(tasks_rcu_exit_srcu) - * 5) *DEADLOCK* - * - * It is considered safe to release tasks_rcu_exit_srcu here - * because we assume the current task can not be concurrently - * reaped at this point. - */ - exit_tasks_rcu_stop(); schedule(); - exit_tasks_rcu_start(); } __set_current_state(TASK_RUNNING); diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 8db4fedaaa1e..b53a9e8f5904 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -42,6 +42,7 @@ #include "rcu.h" +MODULE_DESCRIPTION("Read-Copy Update module-based scalability-test facility"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 807fbf6123a7..08bf7c669dd3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -51,6 +51,7 @@ #include "rcu.h" +MODULE_DESCRIPTION("Read-Copy Update module-based torture test facility"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); @@ -390,6 +391,7 @@ struct rcu_torture_ops { int extendables; int slow_gps; int no_pi_lock; + int debug_objects; const char *name; }; @@ -577,6 +579,7 @@ static struct rcu_torture_ops rcu_ops = { .irq_capable = 1, .can_boost = IS_ENABLED(CONFIG_RCU_BOOST), .extendables = RCUTORTURE_MAX_EXTEND, + .debug_objects = 1, .name = "rcu" }; @@ -747,6 +750,7 @@ static struct rcu_torture_ops srcu_ops = { .cbflood_max = 50000, .irq_capable = 1, .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), + .debug_objects = 1, .name = "srcu" }; @@ -786,6 +790,7 @@ static struct rcu_torture_ops srcud_ops = { .cbflood_max = 50000, .irq_capable = 1, .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), + .debug_objects = 1, .name = "srcud" }; @@ -2626,7 +2631,7 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) spin_lock_irqsave(&rfp->rcu_fwd_lock, flags); rfcpp = rfp->rcu_fwd_cb_tail; rfp->rcu_fwd_cb_tail = &rfcp->rfc_next; - WRITE_ONCE(*rfcpp, rfcp); + smp_store_release(rfcpp, rfcp); WRITE_ONCE(rfp->n_launders_cb, rfp->n_launders_cb + 1); i = ((jiffies - rfp->rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV)); if (i >= ARRAY_SIZE(rfp->n_launders_hist)) @@ -3455,7 +3460,6 @@ rcu_torture_cleanup(void) cur_ops->gp_slow_unregister(NULL); } -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD static void rcu_torture_leak_cb(struct rcu_head *rhp) { } @@ -3473,7 +3477,6 @@ static void rcu_torture_err_cb(struct rcu_head *rhp) */ pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME); } -#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ /* * Verify that double-free causes debug-objects to complain, but only @@ -3482,39 +3485,43 @@ static void rcu_torture_err_cb(struct rcu_head *rhp) */ static void rcu_test_debug_objects(void) { -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD struct rcu_head rh1; struct rcu_head rh2; + int idx; + + if (!IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) { + pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_%s()\n", + KBUILD_MODNAME, cur_ops->name); + return; + } + + if (WARN_ON_ONCE(cur_ops->debug_objects && + (!cur_ops->call || !cur_ops->cb_barrier))) + return; + struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); init_rcu_head_on_stack(&rh1); init_rcu_head_on_stack(&rh2); - pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME); + pr_alert("%s: WARN: Duplicate call_%s() test starting.\n", KBUILD_MODNAME, cur_ops->name); /* Try to queue the rh2 pair of callbacks for the same grace period. */ - preempt_disable(); /* Prevent preemption from interrupting test. */ - rcu_read_lock(); /* Make it impossible to finish a grace period. */ - call_rcu_hurry(&rh1, rcu_torture_leak_cb); /* Start grace period. */ - local_irq_disable(); /* Make it harder to start a new grace period. */ - call_rcu_hurry(&rh2, rcu_torture_leak_cb); - call_rcu_hurry(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ + idx = cur_ops->readlock(); /* Make it impossible to finish a grace period. */ + cur_ops->call(&rh1, rcu_torture_leak_cb); /* Start grace period. */ + cur_ops->call(&rh2, rcu_torture_leak_cb); + cur_ops->call(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ if (rhp) { - call_rcu_hurry(rhp, rcu_torture_leak_cb); - call_rcu_hurry(rhp, rcu_torture_err_cb); /* Another duplicate callback. */ + cur_ops->call(rhp, rcu_torture_leak_cb); + cur_ops->call(rhp, rcu_torture_err_cb); /* Another duplicate callback. */ } - local_irq_enable(); - rcu_read_unlock(); - preempt_enable(); + cur_ops->readunlock(idx); /* Wait for them all to get done so we can safely return. */ - rcu_barrier(); - pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME); + cur_ops->cb_barrier(); + pr_alert("%s: WARN: Duplicate call_%s() test complete.\n", KBUILD_MODNAME, cur_ops->name); destroy_rcu_head_on_stack(&rh1); destroy_rcu_head_on_stack(&rh2); kfree(rhp); -#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ - pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME); -#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ } static void rcutorture_sync(void) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 2c2648a3ad30..f4ea5b1ec068 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -63,6 +63,7 @@ do { \ #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) +MODULE_DESCRIPTION("Scalability test for object reference mechanisms"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); |
