From b934b7ff5ea755473d9737d79ab303722ceba22e Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 12 Jul 2023 23:15:56 +0800 Subject: rcu: Delete a redundant check in rcu_check_gp_kthread_starvation() The rcu_check_gp_kthread_starvation() function uses task_cpu() to sample the last CPU that the grace-period kthread ran on, and task_cpu() samples the thread_info structure's ->cpu field. But this field will always contain a number corresponding to a CPU that was online some time in the past, thus never a negative number. This invariant is checked by a WARN_ON_ONCE() in set_task_cpu(). This means that if the grace-period kthread exists, that is, if the "gpk" local variable is non-NULL, the "cpu" local variable will be non-negative. This in turn means that the existing check for non-negative "cpu" is redundant with the enclosing check for non-NULL "gpk". This commit threefore removes the redundant check of "cpu". Signed-off-by: Zhen Lei Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree_stall.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6f06dc12904a..b5ce0580074e 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -537,13 +537,11 @@ static void rcu_check_gp_kthread_starvation(void) pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("RCU grace-period kthread stack dump:\n"); sched_show_task(gpk); - if (cpu >= 0) { - if (cpu_is_offline(cpu)) { - pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); - } else { - pr_err("Stack dump where RCU GP kthread last ran:\n"); - dump_cpu_task(cpu); - } + if (cpu_is_offline(cpu)) { + pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); + } else { + pr_err("Stack dump where RCU GP kthread last ran:\n"); + dump_cpu_task(cpu); } wake_up_process(gpk); } -- cgit v1.2.3 From f3efe02fd56e2498ad8da4c4f444a34aa1456a46 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 12 Jul 2023 23:15:57 +0800 Subject: rcu: Don't redump the stalled CPU where RCU GP kthread last ran The stacks of all stalled CPUs will be dumped in rcu_dump_cpu_stacks(). If the CPU on where RCU GP kthread last ran is stalled, its stack does not need to be dumped again. We can search the corresponding backtrace based on the printed CPU ID. For example: [ 87.328275] rcu: rcu_sched kthread starved for ... ->cpu=3 <--------| ... ... | [ 89.385007] NMI backtrace for cpu 3 <--------| [ 89.385179] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.10.0+ #22 <--| [ 89.385188] Hardware name: linux,dummy-virt (DT) [ 89.385196] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--) [ 89.385204] pc : arch_cpu_idle+0x40/0xc0 [ 89.385211] lr : arch_cpu_idle+0x2c/0xc0 ... ... [ 89.385566] Call trace: [ 89.385574] arch_cpu_idle+0x40/0xc0 [ 89.385581] default_idle_call+0x100/0x450 [ 89.385589] cpuidle_idle_call+0x2f8/0x460 [ 89.385596] do_idle+0x1dc/0x3d0 [ 89.385604] cpu_startup_entry+0x5c/0xb0 [ 89.385613] secondary_start_kernel+0x35c/0x520 Signed-off-by: Zhen Lei Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree_stall.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index b5ce0580074e..fc04a8d7ce96 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -534,12 +534,14 @@ static void rcu_check_gp_kthread_starvation(void) data_race(READ_ONCE(rcu_state.gp_state)), gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu); if (gpk) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("RCU grace-period kthread stack dump:\n"); sched_show_task(gpk); if (cpu_is_offline(cpu)) { pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); - } else { + } else if (!(data_race(READ_ONCE(rdp->mynode->qsmask)) & rdp->grpmask)) { pr_err("Stack dump where RCU GP kthread last ran:\n"); dump_cpu_task(cpu); } -- cgit v1.2.3 From 243d5ab34446854ceca55146fc0d837655657f8e Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 24 Jul 2023 10:26:51 +0800 Subject: rcu: Eliminate check_cpu_stall() duplicate code The code and comments of self-detected and other-detected RCU CPU stall warnings are identical except the output function. This commit therefore refactors so as to consolidate the duplicate code. Signed-off-by: Zhen Lei Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree_stall.h | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index fc04a8d7ce96..2443d1d4a6dc 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -711,7 +711,7 @@ static void print_cpu_stall(unsigned long gps) static void check_cpu_stall(struct rcu_data *rdp) { - bool didstall = false; + bool self_detected; unsigned long gs1; unsigned long gs2; unsigned long gps; @@ -758,10 +758,10 @@ static void check_cpu_stall(struct rcu_data *rdp) return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; jn = jiffies + ULONG_MAX / 2; + self_detected = READ_ONCE(rnp->qsmask) & rdp->grpmask; if (rcu_gp_in_progress() && - (READ_ONCE(rnp->qsmask) & rdp->grpmask) && + (self_detected || ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { - /* * If a virtual machine is stopped by the host it can look to * the watchdog like an RCU stall. Check to see if the host @@ -770,33 +770,21 @@ static void check_cpu_stall(struct rcu_data *rdp) if (kvm_check_and_clear_guest_paused()) return; - /* We haven't checked in, so go dump stack. */ - print_cpu_stall(gps); - if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) - rcu_ftrace_dump(DUMP_ALL); - didstall = true; - - } else if (rcu_gp_in_progress() && - ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && - cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { - - /* - * If a virtual machine is stopped by the host it can look to - * the watchdog like an RCU stall. Check to see if the host - * stopped the vm. - */ - if (kvm_check_and_clear_guest_paused()) - return; + if (self_detected) { + /* We haven't checked in, so go dump stack. */ + print_cpu_stall(gps); + } else { + /* They had a few time units to dump stack, so complain. */ + print_other_cpu_stall(gs2, gps); + } - /* They had a few time units to dump stack, so complain. */ - print_other_cpu_stall(gs2, gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); - didstall = true; - } - if (didstall && READ_ONCE(rcu_state.jiffies_stall) == jn) { - jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; - WRITE_ONCE(rcu_state.jiffies_stall, jn); + + if (READ_ONCE(rcu_state.jiffies_stall) == jn) { + jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + WRITE_ONCE(rcu_state.jiffies_stall, jn); + } } } -- cgit v1.2.3 From 5b404fdabacf4bee92d8c66013402a85f18a6a10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Aug 2023 15:46:32 -0700 Subject: rcu: Add RCU CPU stall notifier It is sometimes helpful to have a way for the subsystem causing the stall to dump its state when an RCU CPU stall occurs. This commit therefore bases rcu_stall_chain_notifier_register() and rcu_stall_chain_notifier_unregister() on atomic notifiers in order to provide this functionality. Signed-off-by: Paul E. McKenney Cc: Steven Rostedt Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcu.h | 6 +++++ kernel/rcu/tree_exp.h | 6 ++++- kernel/rcu/tree_stall.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 98e13be411af..ef3bab977407 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -654,4 +654,10 @@ static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; } bool rcu_cpu_beenfullyonline(int cpu); #endif +#ifdef CONFIG_RCU_STALL_COMMON +int rcu_stall_notifier_call_chain(unsigned long val, void *v); +#else // #ifdef CONFIG_RCU_STALL_COMMON +static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } +#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8239b39d945b..6d7cea5d591f 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -621,10 +621,14 @@ static void synchronize_rcu_expedited_wait(void) } for (;;) { + unsigned long j; + if (synchronize_rcu_expedited_wait_once(jiffies_stall)) return; if (rcu_stall_is_suppressed()) continue; + j = jiffies; + rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start)); trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall")); pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", rcu_state.name); @@ -647,7 +651,7 @@ static void synchronize_rcu_expedited_wait(void) } } pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", - jiffies - jiffies_start, rcu_state.expedited_sequence, + j - jiffies_start, rcu_state.expedited_sequence, data_race(rnp_root->expmask), ".T"[!!data_race(rnp_root->exp_tasks)]); if (ndetected) { diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 2443d1d4a6dc..49544f932279 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -8,6 +8,7 @@ */ #include +#include ////////////////////////////////////////////////////////////////////////////// // @@ -770,6 +771,7 @@ static void check_cpu_stall(struct rcu_data *rdp) if (kvm_check_and_clear_guest_paused()) return; + rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps); if (self_detected) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); @@ -790,7 +792,7 @@ static void check_cpu_stall(struct rcu_data *rdp) ////////////////////////////////////////////////////////////////////////////// // -// RCU forward-progress mechanisms, including of callback invocation. +// RCU forward-progress mechanisms, including for callback invocation. /* @@ -1042,3 +1044,58 @@ static int __init rcu_sysrq_init(void) return 0; } early_initcall(rcu_sysrq_init); + + +////////////////////////////////////////////////////////////////////////////// +// +// RCU CPU stall-warning notifiers + +static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list); + +/** + * rcu_stall_chain_notifier_register - Add an RCU CPU stall notifier + * @n: Entry to add. + * + * Adds an RCU CPU stall notifier to an atomic notifier chain. + * The @action passed to a notifier will be @RCU_STALL_NOTIFY_NORM or + * friends. The @data will be the duration of the stalled grace period, + * in jiffies, coerced to a void* pointer. + * + * Returns 0 on success, %-EEXIST on error. + */ +int rcu_stall_chain_notifier_register(struct notifier_block *n) +{ + return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n); +} +EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register); + +/** + * rcu_stall_chain_notifier_unregister - Remove an RCU CPU stall notifier + * @n: Entry to add. + * + * Removes an RCU CPU stall notifier from an atomic notifier chain. + * + * Returns zero on success, %-ENOENT on failure. + */ +int rcu_stall_chain_notifier_unregister(struct notifier_block *n) +{ + return atomic_notifier_chain_unregister(&rcu_cpu_stall_notifier_list, n); +} +EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_unregister); + +/* + * rcu_stall_notifier_call_chain - Call functions in an RCU CPU stall notifier chain + * @val: Value passed unmodified to notifier function + * @v: Pointer passed unmodified to notifier function + * + * Calls each function in the RCU CPU stall notifier chain in turn, which + * is an atomic call chain. See atomic_notifier_call_chain() for more + * information. + * + * This is for use within RCU, hence the omission of the extra asterisk + * to indicate a non-kerneldoc format header comment. + */ +int rcu_stall_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&rcu_cpu_stall_notifier_list, val, v); +} -- cgit v1.2.3 From 7c1b3e0c988f2902695ef6175ab8ad00c0e8b65f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Aug 2023 15:53:32 -0700 Subject: rcutorture: Add test of RCU CPU stall notifiers This commit registers an RCU CPU stall notifier when testing RCU CPU stalls. The notifier logs a message similar to the following: rcu_torture_stall_nf: v=1, duration=21001. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcutorture.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ade42d6a9d9b..1830cacac01d 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2428,6 +2429,16 @@ static int rcutorture_booster_init(unsigned int cpu) return 0; } +static int rcu_torture_stall_nf(struct notifier_block *nb, unsigned long v, void *ptr) +{ + pr_info("%s: v=%lu, duration=%lu.\n", __func__, v, (unsigned long)ptr); + return NOTIFY_OK; +} + +static struct notifier_block rcu_torture_stall_block = { + .notifier_call = rcu_torture_stall_nf, +}; + /* * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then * induces a CPU stall for the time specified by stall_cpu. @@ -2435,9 +2446,14 @@ static int rcutorture_booster_init(unsigned int cpu) static int rcu_torture_stall(void *args) { int idx; + int ret; unsigned long stop_at; VERBOSE_TOROUT_STRING("rcu_torture_stall task started"); + ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block); + if (ret) + pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n", + __func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : ""); if (stall_cpu_holdoff > 0) { VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff"); schedule_timeout_interruptible(stall_cpu_holdoff * HZ); @@ -2481,6 +2497,11 @@ static int rcu_torture_stall(void *args) cur_ops->readunlock(idx); } pr_alert("%s end.\n", __func__); + if (!ret) { + ret = rcu_stall_chain_notifier_unregister(&rcu_torture_stall_block); + if (ret) + pr_info("%s: rcu_stall_chain_notifier_unregister() returned %d.\n", __func__, ret); + } torture_shutdown_absorb("rcu_torture_stall"); while (!kthread_should_stop()) schedule_timeout_interruptible(10 * HZ); -- cgit v1.2.3 From b96e7a5fa0ba9cda32888e04f8f4bac42d49a7f8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 5 Sep 2023 00:02:11 +0000 Subject: rcu/tree: Defer setting of jiffies during stall reset There are instances where rcu_cpu_stall_reset() is called when jiffies did not get a chance to update for a long time. Before jiffies is updated, the CPU stall detector can go off triggering false-positives where a just-started grace period appears to be ages old. In the past, we disabled stall detection in rcu_cpu_stall_reset() however this got changed [1]. This is resulting in false-positives in KGDB usecase [2]. Fix this by deferring the update of jiffies to the third run of the FQS loop. This is more robust, as, even if rcu_cpu_stall_reset() is called just before jiffies is read, we would end up pushing out the jiffies read by 3 more FQS loops. Meanwhile the CPU stall detection will be delayed and we will not get any false positives. [1] https://lore.kernel.org/all/20210521155624.174524-2-senozhatsky@chromium.org/ [2] https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Tested with rcutorture.cpu_stall option as well to verify stall behavior with/without patch. Tested-by: Huacai Chen Reported-by: Binbin Zhou Closes: https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Suggested-by: Paul McKenney Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: a80be428fbc1 ("rcu: Do not disable GP stall detection in rcu_cpu_stall_reset()") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 12 ++++++++++++ kernel/rcu/tree.h | 4 ++++ kernel/rcu/tree_stall.h | 20 ++++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb1caefa8bd0..d85779f67aea 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1556,10 +1556,22 @@ static bool rcu_gp_fqs_check_wake(int *gfp) */ static void rcu_gp_fqs(bool first_time) { + int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall); struct rcu_node *rnp = rcu_get_root(); WRITE_ONCE(rcu_state.gp_activity, jiffies); WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); + + WARN_ON_ONCE(nr_fqs > 3); + /* Only countdown nr_fqs for stall purposes if jiffies moves. */ + if (nr_fqs) { + if (nr_fqs == 1) { + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); + } + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs); + } + if (first_time) { /* Collect dyntick-idle snapshots. */ force_qs_rnp(dyntick_save_progress_counter); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 192536916f9a..e9821a8422db 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -386,6 +386,10 @@ struct rcu_state { /* in jiffies. */ unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ + int nr_fqs_jiffies_stall; /* Number of fqs loops after + * which read jiffies and set + * jiffies_stall. Stall + * warnings disabled if !0. */ unsigned long jiffies_resched; /* Time at which to resched */ /* a reluctant CPU. */ unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 49544f932279..ac8e86babe44 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -150,12 +150,17 @@ static void panic_on_rcu_stall(void) /** * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period * + * To perform the reset request from the caller, disable stall detection until + * 3 fqs loops have passed. This is required to ensure a fresh jiffies is + * loaded. It should be safe to do from the fqs loop as enough timer + * interrupts and context switches should have passed. + * * The caller must disable hard irqs. */ void rcu_cpu_stall_reset(void) { - WRITE_ONCE(rcu_state.jiffies_stall, - jiffies + rcu_jiffies_till_stall_check()); + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3); + WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX); } ////////////////////////////////////////////////////////////////////////////// @@ -171,6 +176,7 @@ static void record_gp_stall_check_time(void) WRITE_ONCE(rcu_state.gp_start, j); j1 = rcu_jiffies_till_stall_check(); smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq. + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0); WRITE_ONCE(rcu_state.jiffies_stall, j + j1); rcu_state.jiffies_resched = j + j1 / 2; rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); @@ -726,6 +732,16 @@ static void check_cpu_stall(struct rcu_data *rdp) !rcu_gp_in_progress()) return; rcu_stall_kick_kthreads(); + + /* + * Check if it was requested (via rcu_cpu_stall_reset()) that the FQS + * loop has to set jiffies to ensure a non-stale jiffies value. This + * is required to have good jiffies value after coming out of long + * breaks of jiffies updates. Not doing so can cause false positives. + */ + if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0) + return; + j = jiffies; /* -- cgit v1.2.3 From 92a708dc1fb8c33b0017ad77dc7ff6e434f96ee2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Jul 2023 13:13:46 -0700 Subject: rcu-tasks: Add printk()s to localize boot-time self-test hang Currently, rcu_tasks_initiate_self_tests() prints a message and then initiates self tests on up to three different RCU Tasks flavors. If one of the flavors has a grace-period hang, it is not easy to work out which of the three hung. This commit therefore prints a message prior to each individual test. Reported-by: Guenter Roeck Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tasks.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8d65f7d576a3..83049a893de5 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1979,20 +1979,22 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp) static void rcu_tasks_initiate_self_tests(void) { - pr_info("Running RCU-tasks wait API self tests\n"); #ifdef CONFIG_TASKS_RCU + pr_info("Running RCU Tasks wait API self tests\n"); tests[0].runstart = jiffies; synchronize_rcu_tasks(); call_rcu_tasks(&tests[0].rh, test_rcu_tasks_callback); #endif #ifdef CONFIG_TASKS_RUDE_RCU + pr_info("Running RCU Tasks Rude wait API self tests\n"); tests[1].runstart = jiffies; synchronize_rcu_tasks_rude(); call_rcu_tasks_rude(&tests[1].rh, test_rcu_tasks_callback); #endif #ifdef CONFIG_TASKS_TRACE_RCU + pr_info("Running RCU Tasks Trace wait API self tests\n"); tests[2].runstart = jiffies; synchronize_rcu_tasks_trace(); call_rcu_tasks_trace(&tests[2].rh, test_rcu_tasks_callback); -- cgit v1.2.3 From e62d8ae4620865411d1b2347980aa28ccf891a3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Aug 2023 13:42:00 -0700 Subject: rcu-tasks: Pull sampling of ->percpu_dequeue_lim out of loop The rcu_tasks_need_gpcb() samples ->percpu_dequeue_lim as part of the condition clause of a "for" loop, which is a bit confusing. This commit therefore hoists this sampling out of the loop, using the result loaded in the condition clause. So why does this work in the face of a concurrent switch from single-CPU queueing to per-CPU queueing? o The call_rcu_tasks_generic() that makes the change has already enqueued its callback, which means that all of the other CPU's callback queues are empty. o For the call_rcu_tasks_generic() that first notices the switch to per-CPU queues, the smp_store_release() used to update ->percpu_enqueue_lim pairs with the raw_spin_trylock_rcu_node()'s full barrier that is between the READ_ONCE(rtp->percpu_enqueue_shift) and the rcu_segcblist_enqueue() that enqueues the callback. o Because this CPU's queue is empty (unless it happens to be the original single queue, in which case there is no need for synchronization), this call_rcu_tasks_generic() will do an irq_work_queue() to schedule a handler for the needed rcuwait_wake_up() call. This call will be ordered after the first call_rcu_tasks_generic() function's change to ->percpu_dequeue_lim. o This rcuwait_wake_up() will either happen before or after the set_current_state() in rcuwait_wait_event(). If it happens before, the "condition" argument's call to rcu_tasks_need_gpcb() will be ordered after the original change, and all callbacks on all CPUs will be visible. Otherwise, if it happens after, then the grace-period kthread's state will be set back to running, which will result in a later call to rcuwait_wait_event() and thus to rcu_tasks_need_gpcb(), which will again see the change. So it all works out. Suggested-by: Linus Torvalds Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tasks.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 83049a893de5..94bb5abdbb37 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -432,6 +432,7 @@ static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp) static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) { int cpu; + int dequeue_limit; unsigned long flags; bool gpdone = poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq); long n; @@ -439,7 +440,8 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) long ncbsnz = 0; int needgpcb = 0; - for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) { + dequeue_limit = smp_load_acquire(&rtp->percpu_dequeue_lim); + for (cpu = 0; cpu < dequeue_limit; cpu++) { struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); /* Advance and accelerate any new callbacks. */ -- cgit v1.2.3 From 0325e8a1282dfd30c3e44928c6384bb978649c63 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 3 Aug 2023 16:06:59 +0800 Subject: rcu-tasks: Make rcu_tasks_lazy_ms static The rcu_tasks_lazy_ms variable is not used outside the file tasks.h, so this commit marks it static. kernel/rcu/tasks.h:1085:5: warning: symbol 'rcu_tasks_lazy_ms' was not declared. Should it be static? Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6086 Signed-off-by: Jiapeng Chong Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 94bb5abdbb37..018f03f20629 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1086,7 +1086,7 @@ void rcu_barrier_tasks(void) } EXPORT_SYMBOL_GPL(rcu_barrier_tasks); -int rcu_tasks_lazy_ms = -1; +static int rcu_tasks_lazy_ms = -1; module_param(rcu_tasks_lazy_ms, int, 0444); static int __init rcu_spawn_tasks_kthread(void) -- cgit v1.2.3 From 730c3ed4ba30feadfb20b4d4d18e869bc00348f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Aug 2023 09:30:18 -0700 Subject: refscale: Fix misplaced data re-read This commit fixes a misplaced data re-read in the typesafe code. The reason that this was not noticed is that this is a performance test with no writers, so a mismatch could not occur. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/refscale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 91a0fd0d4d9a..750a63e99539 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -655,12 +655,12 @@ retry: goto retry; } un_delay(udl, ndl); + b = READ_ONCE(rtsp->a); // Remember, seqlock read-side release can fail. if (!rts_release(rtsp, start)) { rcu_read_unlock(); goto retry; } - b = READ_ONCE(rtsp->a); WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b); b = rtsp->b; rcu_read_unlock(); -- cgit v1.2.3 From d6fea1dde2064c8f298dbe872587c7ace7701b3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Aug 2023 11:27:26 -0700 Subject: refscale: Print out additional module parameters The refscale.verbose_batched and refscale.lookup_instances module parameters are omitted from the ref_scale_print_module_parms() beginning-of-test output. This commit therefore adds them. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/refscale.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 750a63e99539..2c2648a3ad30 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -1025,8 +1025,8 @@ static void ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, - verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay); + "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, + verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); } static void -- cgit v1.2.3 From f0a31b26be1f725e3f56beed76486c1b034120b3 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 29 Jul 2023 14:27:32 +0000 Subject: srcu: Fix error handling in init_srcu_struct_fields() The current error handling in init_srcu_struct_fields() is a bit inconsistent. If init_srcu_struct_nodes() fails, the function either returns -ENOMEM or 0 depending on whether ssp->sda_is_static is true or false. This can make init_srcu_struct_fields() return 0 even if memory allocation failed! Simplify the error handling by always returning -ENOMEM if either init_srcu_struct_nodes() or the per-CPU allocation fails. This makes the control flow easier to follow and avoids the inconsistent return values. Add goto labels to avoid duplicating the error cleanup code. Link: https://lore.kernel.org/r/20230404003508.GA254019@google.com Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/srcutree.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 20d7a238d675..f1a905200fc2 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -255,29 +255,31 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->sda_is_static = is_static; if (!is_static) ssp->sda = alloc_percpu(struct srcu_data); - if (!ssp->sda) { - if (!is_static) - kfree(ssp->srcu_sup); - return -ENOMEM; - } + if (!ssp->sda) + goto err_free_sup; init_srcu_struct_data(ssp); ssp->srcu_sup->srcu_gp_seq_needed_exp = 0; ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { - if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { - if (!ssp->srcu_sup->sda_is_static) { - free_percpu(ssp->sda); - ssp->sda = NULL; - kfree(ssp->srcu_sup); - return -ENOMEM; - } - } else { - WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); - } + if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) + goto err_free_sda; + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } ssp->srcu_sup->srcu_ssp = ssp; smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */ return 0; + +err_free_sda: + if (!is_static) { + free_percpu(ssp->sda); + ssp->sda = NULL; + } +err_free_sup: + if (!is_static) { + kfree(ssp->srcu_sup); + ssp->srcu_sup = NULL; + } + return -ENOMEM; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.3 From 4502138acc8f4139c94ce0ec0eee926f8805fbbc Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 29 Jul 2023 14:27:36 +0000 Subject: rcu/tree: Remove superfluous return from void call_rcu* functions The return keyword is not needed here. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb1caefa8bd0..7c79480bfaa0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2713,7 +2713,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) */ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) { - return __call_rcu_common(head, func, false); + __call_rcu_common(head, func, false); } EXPORT_SYMBOL_GPL(call_rcu_hurry); #endif @@ -2764,7 +2764,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry); */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { - return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); + __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); } EXPORT_SYMBOL_GPL(call_rcu); -- cgit v1.2.3 From 16128b1f8c823438dcd3f3b4a57cbe7267bcf82f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Aug 2023 17:15:25 -0700 Subject: rcu: Add sysfs to provide throttled access to rcu_barrier() When running a series of stress tests all making heavy use of RCU, it is all too possible to OOM the system when the prior test's RCU callbacks don't get invoked until after the subsequent test starts. One way of handling this is just a timed wait, but this fails when a given CPU has so many callbacks queued that they take longer to invoke than allowed for by that timed wait. This commit therefore adds an rcutree.do_rcu_barrier module parameter that is accessible from sysfs. Writing one of the many synonyms for boolean "true" will cause an rcu_barrier() to be invoked, but will guarantee that no more than one rcu_barrier() will be invoked per sixteenth of a second via this mechanism. The flip side is that a given request might wait a second or three longer than absolutely necessary, but only when there are multiple uses of rcutree.do_rcu_barrier within a one-second time interval. This commit unnecessarily serializes the rcu_barrier() machinery, given that serialization is already provided by procfs. This has the advantage of allowing throttled rcu_barrier() from other sources within the kernel. Reported-by: Johannes Weiner Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7c79480bfaa0..3c7281fc25a7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4083,6 +4083,82 @@ retry: } EXPORT_SYMBOL_GPL(rcu_barrier); +static unsigned long rcu_barrier_last_throttle; + +/** + * rcu_barrier_throttled - Do rcu_barrier(), but limit to one per second + * + * This can be thought of as guard rails around rcu_barrier() that + * permits unrestricted userspace use, at least assuming the hardware's + * try_cmpxchg() is robust. There will be at most one call per second to + * rcu_barrier() system-wide from use of this function, which means that + * callers might needlessly wait a second or three. + * + * This is intended for use by test suites to avoid OOM by flushing RCU + * callbacks from the previous test before starting the next. See the + * rcutree.do_rcu_barrier module parameter for more information. + * + * Why not simply make rcu_barrier() more scalable? That might be + * the eventual endpoint, but let's keep it simple for the time being. + * Note that the module parameter infrastructure serializes calls to a + * given .set() function, but should concurrent .set() invocation ever be + * possible, we are ready! + */ +static void rcu_barrier_throttled(void) +{ + unsigned long j = jiffies; + unsigned long old = READ_ONCE(rcu_barrier_last_throttle); + unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence); + + while (time_in_range(j, old, old + HZ / 16) || + !try_cmpxchg(&rcu_barrier_last_throttle, &old, j)) { + schedule_timeout_idle(HZ / 16); + if (rcu_seq_done(&rcu_state.barrier_sequence, s)) { + smp_mb(); /* caller's subsequent code after above check. */ + return; + } + j = jiffies; + old = READ_ONCE(rcu_barrier_last_throttle); + } + rcu_barrier(); +} + +/* + * Invoke rcu_barrier_throttled() when a rcutree.do_rcu_barrier + * request arrives. We insist on a true value to allow for possible + * future expansion. + */ +static int param_set_do_rcu_barrier(const char *val, const struct kernel_param *kp) +{ + bool b; + int ret; + + if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) + return -EAGAIN; + ret = kstrtobool(val, &b); + if (!ret && b) { + atomic_inc((atomic_t *)kp->arg); + rcu_barrier_throttled(); + atomic_dec((atomic_t *)kp->arg); + } + return ret; +} + +/* + * Output the number of outstanding rcutree.do_rcu_barrier requests. + */ +static int param_get_do_rcu_barrier(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%d\n", atomic_read((atomic_t *)kp->arg)); +} + +static const struct kernel_param_ops do_rcu_barrier_ops = { + .set = param_set_do_rcu_barrier, + .get = param_get_do_rcu_barrier, +}; +static atomic_t do_rcu_barrier; +module_param_cb(do_rcu_barrier, &do_rcu_barrier_ops, &do_rcu_barrier, 0644); + /* * Compute the mask of online CPUs for the specified rcu_node structure. * This will not be stable unless the rcu_node structure's ->lock is -- cgit v1.2.3 From 2cbc482d325ee58001472c4359b311958c4efdd1 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 5 Aug 2023 11:17:26 +0800 Subject: rcu: Dump memory object info if callback function is invalid When a structure containing an RCU callback rhp is (incorrectly) freed and reallocated after rhp is passed to call_rcu(), it is not unusual for rhp->func to be set to NULL. This defeats the debugging prints used by __call_rcu_common() in kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the offending code using the identity of this function. And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things are even worse, as can be seen from this splat: Unable to handle kernel NULL pointer dereference at virtual address 0 ... ... PC is at 0x0 LR is at rcu_do_batch+0x1c0/0x3b8 ... ... (rcu_do_batch) from (rcu_core+0x1d4/0x284) (rcu_core) from (__do_softirq+0x24c/0x344) (__do_softirq) from (__irq_exit_rcu+0x64/0x108) (__irq_exit_rcu) from (irq_exit+0x8/0x10) (irq_exit) from (__handle_domain_irq+0x74/0x9c) (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98) (gic_handle_irq) from (__irq_svc+0x5c/0x94) (__irq_svc) from (arch_cpu_idle+0x20/0x3c) (arch_cpu_idle) from (default_idle_call+0x4c/0x78) (default_idle_call) from (do_idle+0xf8/0x150) (do_idle) from (cpu_startup_entry+0x18/0x20) (cpu_startup_entry) from (0xc01530) This commit therefore adds calls to mem_dump_obj(rhp) to output some information, for example: slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256 This provides the rough size of the memory block and the offset of the rcu_head structure, which as least provides at least a few clues to help locate the problem. If the problem is reproducible, additional slab debugging can be enabled, for example, CONFIG_DEBUG_SLAB=y, which can provide significantly more information. Signed-off-by: Zhen Lei Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcu.h | 7 +++++++ kernel/rcu/srcutiny.c | 1 + kernel/rcu/srcutree.c | 1 + kernel/rcu/tasks.h | 1 + kernel/rcu/tiny.c | 1 + kernel/rcu/tree.c | 1 + 6 files changed, 12 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 98e13be411af..d612731feea4 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -10,6 +10,7 @@ #ifndef __LINUX_RCU_H #define __LINUX_RCU_H +#include #include /* @@ -248,6 +249,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void debug_rcu_head_callback(struct rcu_head *rhp) +{ + if (unlikely(!rhp->func)) + kmem_dump_obj(rhp); +} + extern int rcu_cpu_stall_suppress_at_boot; static inline bool rcu_stall_is_suppressed_at_boot(void) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 336af24e0fe3..c38e5933a5d6 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp) while (lh) { rhp = lh; lh = lh->next; + debug_rcu_head_callback(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index f1a905200fc2..833a8f848a90 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1710,6 +1710,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); + debug_rcu_head_callback(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8d65f7d576a3..7c845532a50a 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -538,6 +538,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); len = rcl.len; for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) { + debug_rcu_head_callback(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 42f7589e51e0..fec804b79080 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head) trace_rcu_invoke_callback("", head); f = head->func; + debug_rcu_head_callback(head); WRITE_ONCE(head->func, (rcu_callback_t)0L); f(head); rcu_lock_release(&rcu_callback_map); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3c7281fc25a7..aae515071ffd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2135,6 +2135,7 @@ static void rcu_do_batch(struct rcu_data *rdp) trace_rcu_invoke_callback(rcu_state.name, rhp); f = rhp->func; + debug_rcu_head_callback(rhp); WRITE_ONCE(rhp->func, (rcu_callback_t)0L); f(rhp); -- cgit v1.2.3 From 0ae9942f03d0d034fdb0a4f44fc99f62a3107987 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 18 Aug 2023 08:53:58 -0700 Subject: rcu: Eliminate rcu_gp_slow_unregister() false positive When using rcutorture as a module, there are a number of conditions that can abort the modprobe operation, for example, when attempting to run both RCU CPU stall warning tests and forward-progress tests. This can cause rcu_torture_cleanup() to be invoked on the unwind path out of rcu_rcu_torture_init(), which will mean that rcu_gp_slow_unregister() is invoked without a matching rcu_gp_slow_register(). This will cause a splat because rcu_gp_slow_unregister() is passed rcu_fwd_cb_nodelay, which does not match a NULL pointer. This commit therefore forgives a mismatch involving a NULL pointer, thus avoiding this false-positive splat. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index aae515071ffd..a83ecab77917 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1260,7 +1260,7 @@ EXPORT_SYMBOL_GPL(rcu_gp_slow_register); /* Unregister a counter, with NULL for not caring which. */ void rcu_gp_slow_unregister(atomic_t *rgssp) { - WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress); + WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress && rcu_gp_slow_suppress != NULL); WRITE_ONCE(rcu_gp_slow_suppress, NULL); } -- cgit v1.2.3 From d0b654e19a83840e11d6ba87ea0be30e5fae343a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Jul 2023 12:03:20 -0700 Subject: torture: Share torture_random_state with torture_shuffle_tasks() Both torture_shuffle_tasks() and its caller torture_shuffle() define a torture_random_state structure. This is suboptimal given that torture_shuffle_tasks() runs for a very short period of time. This commit therefore causes torture_shuffle() to pass a pointer to its torture_random_state structure down to torture_shuffle_tasks(). Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/torture.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/torture.c b/kernel/torture.c index b28b05bbef02..68dba4ecab5c 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -520,9 +520,8 @@ static void torture_shuffle_task_unregister_all(void) * A special case is when shuffle_idle_cpu = -1, in which case we allow * the tasks to run on all CPUs. */ -static void torture_shuffle_tasks(void) +static void torture_shuffle_tasks(struct torture_random_state *trp) { - DEFINE_TORTURE_RANDOM(rand); struct shuffle_task *stp; cpumask_setall(shuffle_tmp_mask); @@ -543,7 +542,7 @@ static void torture_shuffle_tasks(void) mutex_lock(&shuffle_task_mutex); list_for_each_entry(stp, &shuffle_task_list, st_l) { - if (!random_shuffle || torture_random(&rand) & 0x1) + if (!random_shuffle || torture_random(trp) & 0x1) set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask); } mutex_unlock(&shuffle_task_mutex); @@ -562,7 +561,7 @@ static int torture_shuffle(void *arg) VERBOSE_TOROUT_STRING("torture_shuffle task started"); do { torture_hrtimeout_jiffies(shuffle_interval, &rand); - torture_shuffle_tasks(); + torture_shuffle_tasks(&rand); torture_shutdown_absorb("torture_shuffle"); } while (!torture_must_stop()); torture_kthread_stopping("torture_shuffle"); @@ -673,7 +672,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void)) if (ssecs > 0) { shutdown_time = ktime_add(ktime_get(), ktime_set(ssecs, 0)); return torture_create_kthread(torture_shutdown, NULL, - shutdown_task); + shutdown_task); } return 0; } -- cgit v1.2.3 From a741deac787f0d2d7068638c067db20af9e63752 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 26 Jul 2023 13:57:03 -0700 Subject: torture: Make torture_hrtimeout_ns() take an hrtimer mode parameter The current torture-test sleeps are waiting for a duration, but there are situations where it is better to wait for an absolute time, for example, when ending a stutter interval. This commit therefore adds an hrtimer mode parameter to torture_hrtimeout_ns(). Why not also the other torture_hrtimeout_*() functions? The theory is that most absolute times will be in nanoseconds, especially not (say) jiffies. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/torture.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/torture.c b/kernel/torture.c index 68dba4ecab5c..6ba62e5993e7 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -87,14 +87,15 @@ EXPORT_SYMBOL_GPL(verbose_torout_sleep); * nanosecond random fuzz. This function and its friends desynchronize * testing from the timer wheel. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp) +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp) { ktime_t hto = baset_ns; if (trsp) hto += torture_random(trsp) % fuzzt_ns; set_current_state(TASK_IDLE); - return schedule_hrtimeout(&hto, HRTIMER_MODE_REL); + return schedule_hrtimeout(&hto, mode); } EXPORT_SYMBOL_GPL(torture_hrtimeout_ns); @@ -106,7 +107,7 @@ int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state { ktime_t baset_ns = baset_us * NSEC_PER_USEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_us); @@ -123,7 +124,7 @@ int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state fuzzt_ns = (u32)~0U; else fuzzt_ns = fuzzt_us * NSEC_PER_USEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_ms); @@ -136,7 +137,7 @@ int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp) { ktime_t baset_ns = jiffies_to_nsecs(baset_j); - return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), trsp); + return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_jiffies); @@ -153,7 +154,7 @@ int torture_hrtimeout_s(u32 baset_s, u32 fuzzt_ms, struct torture_random_state * fuzzt_ns = (u32)~0U; else fuzzt_ns = fuzzt_ms * NSEC_PER_MSEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_s); -- cgit v1.2.3 From 3853a720f8bce9c5facb593e817f97659cc0512a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Aug 2023 16:40:11 +0200 Subject: rcu: Include torture_sched_setaffinity() declaration The prototype for torture_sched_setaffinity() will be moved to a different header, which will need to be included from update.c to avoid this W=1 warning: kernel/rcu/update.c:529:6: error: no previous prototype for 'torture_sched_setaffinity' [-Werror=missing-prototypes] 529 | long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) Signed-off-by: Arnd Bergmann Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/update.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 19bf6fa3ee6a..9d3c2e6ba667 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 0cfecd7d754f2ef5d7e6b56ee656a8544ade920a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Jul 2023 13:10:34 -0700 Subject: torture: Move rcutorture_sched_setaffinity() out of rcutorture The rcutorture_sched_setaffinity() function is needed by locktorture, so move its declaration from rcu.h to torture.h and rename it to the more generic torture_sched_setaffinity() name. Please note that use of this function is still restricted to torture tests, and of those, currently only rcutorture and locktorture. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcu.h | 4 ---- kernel/rcu/rcutorture.c | 2 +- kernel/rcu/update.c | 8 ++++---- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 98e13be411af..567bd3d72e39 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -568,10 +568,6 @@ void do_trace_rcu_torture_read(const char *rcutorturename, static inline void rcu_gp_set_torture_wait(int duration) { } #endif -#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) -long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); -#endif - #ifdef CONFIG_TINY_SRCU static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ade42d6a9d9b..7e82fb887d09 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -810,7 +810,7 @@ static void synchronize_rcu_trivial(void) int cpu; for_each_online_cpu(cpu) { - rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu)); + torture_sched_setaffinity(current->pid, cpumask_of(cpu)); WARN_ON_ONCE(raw_smp_processor_id() != cpu); } } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 9d3c2e6ba667..c534d6806d3d 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -525,17 +525,17 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); do { } while (0) #endif -#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) /* Get rcutorture access to sched_setaffinity(). */ -long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) { int ret; ret = sched_setaffinity(pid, in_mask); - WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret); + WARN_ONCE(ret, "%s: sched_setaffinity(%d) returned %d\n", __func__, pid, ret); return ret; } -EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity); +EXPORT_SYMBOL_GPL(torture_sched_setaffinity); #endif #ifdef CONFIG_RCU_STALL_COMMON -- cgit v1.2.3 From 73e3412424832f519004dd2176226982be35d34e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Jul 2023 20:04:06 -0700 Subject: locktorture: Add readers_bind and writers_bind module parameters This commit adds readers_bind and writers_bind module parameters to locktorture in order to skew tests across socket boundaries. This skewing is intended to provide additional variable-latency stress on the primitive under test. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/locking/locktorture.c | 64 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 270c7f80ce84..441866259278 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -56,6 +56,55 @@ module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)"); +static cpumask_var_t readers_bind; // Bind the readers to the specified set of CPUs. +static cpumask_var_t writers_bind; // Bind the writers to the specified set of CPUs. + +// Parse a cpumask kernel parameter. If there are more users later on, +// this might need to got to a more central location. +static int param_set_cpumask(const char *val, const struct kernel_param *kp) +{ + cpumask_var_t *cm_bind = kp->arg; + int ret; + char *s; + + if (!alloc_cpumask_var(cm_bind, GFP_KERNEL)) { + s = "Out of memory"; + ret = -ENOMEM; + goto out_err; + } + ret = cpulist_parse(val, *cm_bind); + if (!ret) + return ret; + s = "Bad CPU range"; +out_err: + pr_warn("%s: %s, all CPUs set\n", kp->name, s); + cpumask_setall(*cm_bind); + return ret; +} + +// Output a cpumask kernel parameter. +static int param_get_cpumask(char *buffer, const struct kernel_param *kp) +{ + cpumask_var_t *cm_bind = kp->arg; + + return sprintf(buffer, "%*pbl", cpumask_pr_args(*cm_bind)); +} + +static bool cpumask_nonempty(cpumask_var_t mask) +{ + return cpumask_available(mask) && !cpumask_empty(mask); +} + +static const struct kernel_param_ops lt_bind_ops = { + .set = param_set_cpumask, + .get = param_get_cpumask, +}; + +module_param_cb(readers_bind, <_bind_ops, &readers_bind, 0644); +module_param_cb(writers_bind, <_bind_ops, &writers_bind, 0644); + +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); + static struct task_struct *stats_task; static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; @@ -986,16 +1035,23 @@ static int lock_torture_stats(void *arg) return 0; } + static inline void lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, const char *tag) { + static cpumask_t cpumask_all; + cpumask_t *rcmp = cpumask_nonempty(readers_bind) ? readers_bind : &cpumask_all; + cpumask_t *wcmp = cpumask_nonempty(writers_bind) ? writers_bind : &cpumask_all; + + cpumask_setall(&cpumask_all); pr_alert("%s" TORTURE_FLAG - "--- %s%s: nwriters_stress=%d nreaders_stress=%d nested_locks=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", + "--- %s%s: nwriters_stress=%d nreaders_stress=%d nested_locks=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d readers_bind=%*pbl writers_bind=%*pbl\n", torture_type, tag, cxt.debug_lock ? " [debug]": "", cxt.nrealwriters_stress, cxt.nrealreaders_stress, nested_locks, stat_interval, verbose, shuffle_interval, - stutter, shutdown_secs, onoff_interval, onoff_holdoff); + stutter, shutdown_secs, onoff_interval, onoff_holdoff, + cpumask_pr_args(rcmp), cpumask_pr_args(wcmp)); } static void lock_torture_cleanup(void) @@ -1250,6 +1306,8 @@ static int __init lock_torture_init(void) writer_fifo ? sched_set_fifo : NULL); if (torture_init_error(firsterr)) goto unwind; + if (cpumask_nonempty(writers_bind)) + torture_sched_setaffinity(writer_tasks[i]->pid, writers_bind); create_reader: if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress)) @@ -1259,6 +1317,8 @@ static int __init lock_torture_init(void) reader_tasks[j]); if (torture_init_error(firsterr)) goto unwind; + if (cpumask_nonempty(readers_bind)) + torture_sched_setaffinity(reader_tasks[j]->pid, readers_bind); } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, -- cgit v1.2.3 From cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 29 Jul 2023 14:27:31 +0000 Subject: rcutorture: Fix stuttering races and other issues The stuttering code isn't functioning as expected. Ideally, it should pause the torture threads for a designated period before resuming. Yet, it fails to halt the test for the correct duration. Additionally, a race condition exists, potentially causing the stuttering code to pause for an extended period if the 'spt' variable is non-zero due to the stutter orchestration thread's inadequate CPU time. Moreover, over-stuttering can hinder RCU's progress on TREE07 kernels. This happens as the stuttering code may run within a softirq due to RCU callbacks. Consequently, ksoftirqd keeps a CPU busy for several seconds, thus obstructing RCU's progress. This situation triggers a warning message in the logs: [ 2169.481783] rcu_torture_writer: rtort_pipe_count: 9 This warning suggests that an RCU torture object, although invisible to RCU readers, couldn't make it past the pipe array and be freed -- a strong indication that there weren't enough grace periods during the stutter interval. To address these issues, this patch sets the "stutter end" time to an absolute point in the future set by the main stutter thread. This is then used for waiting in stutter_wait(). While the stutter thread still defines this absolute time, the waiters' waiting logic doesn't rely on the stutter thread receiving sufficient CPU time to halt the stuttering as the halting is now self-controlled. Cc: stable@vger.kernel.org Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/torture.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/torture.c b/kernel/torture.c index 6ba62e5993e7..fd353f98162f 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -720,7 +720,7 @@ static void torture_shutdown_cleanup(void) * suddenly applied to or removed from the system. */ static struct task_struct *stutter_task; -static int stutter_pause_test; +static ktime_t stutter_till_abs_time; static int stutter; static int stutter_gap; @@ -730,30 +730,16 @@ static int stutter_gap; */ bool stutter_wait(const char *title) { - unsigned int i = 0; bool ret = false; - int spt; + ktime_t till_ns; cond_resched_tasks_rcu_qs(); - spt = READ_ONCE(stutter_pause_test); - for (; spt; spt = READ_ONCE(stutter_pause_test)) { - if (!ret && !rt_task(current)) { - sched_set_normal(current, MAX_NICE); - ret = true; - } - if (spt == 1) { - torture_hrtimeout_jiffies(1, NULL); - } else if (spt == 2) { - while (READ_ONCE(stutter_pause_test)) { - if (!(i++ & 0xffff)) - torture_hrtimeout_us(10, 0, NULL); - cond_resched(); - } - } else { - torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL); - } - torture_shutdown_absorb(title); + till_ns = READ_ONCE(stutter_till_abs_time); + if (till_ns && ktime_before(ktime_get(), till_ns)) { + torture_hrtimeout_ns(till_ns, 0, HRTIMER_MODE_ABS, NULL); + ret = true; } + torture_shutdown_absorb(title); return ret; } EXPORT_SYMBOL_GPL(stutter_wait); @@ -764,23 +750,16 @@ EXPORT_SYMBOL_GPL(stutter_wait); */ static int torture_stutter(void *arg) { - DEFINE_TORTURE_RANDOM(rand); - int wtime; + ktime_t till_ns; VERBOSE_TOROUT_STRING("torture_stutter task started"); do { if (!torture_must_stop() && stutter > 1) { - wtime = stutter; - if (stutter > 2) { - WRITE_ONCE(stutter_pause_test, 1); - wtime = stutter - 3; - torture_hrtimeout_jiffies(wtime, &rand); - wtime = 2; - } - WRITE_ONCE(stutter_pause_test, 2); - torture_hrtimeout_jiffies(wtime, NULL); + till_ns = ktime_add_ns(ktime_get(), + jiffies_to_nsecs(stutter)); + WRITE_ONCE(stutter_till_abs_time, till_ns); + torture_hrtimeout_jiffies(stutter - 1, NULL); } - WRITE_ONCE(stutter_pause_test, 0); if (!torture_must_stop()) torture_hrtimeout_jiffies(stutter_gap, NULL); torture_shutdown_absorb("torture_stutter"); -- cgit v1.2.3 From 31742a56c676b6014fde99a0cf07d3d12b822e9a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Aug 2023 15:30:31 -0700 Subject: locktorture: Alphabetize torture_param() entries There are getting to be too many module parameters for a random list to be comfortable, so this commit alphabetizes the list. Strictly code motion. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/locking/locktorture.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 441866259278..57ee16cf879d 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -33,21 +33,21 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); -torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads"); -torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads"); torture_param(int, long_hold, 100, "Do occasional long hold of lock (ms), 0=disable"); +torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)"); +torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads"); +torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads"); torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); torture_param(int, o