From d0b6e0a8ef24b1b07078ababe5d91bcdf4f4264a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Sep 2017 21:36:55 +0200 Subject: watchdog/hardlockup: Provide interface to stop/restart perf events Provide an interface to stop and restart perf NMI watchdog events on all CPUs. This is only usable during init and especially for handling the perf HT bug on Intel machines. It's safe to use it this way as nothing can start/stop the NMI watchdog in parallel. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.167649596@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog_hld.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'kernel') diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 3a09ea1b1d3d..c9586ebc2e98 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -261,3 +261,44 @@ void watchdog_nmi_disable(unsigned int cpu) firstcpu_err = 0; } } + +/** + * hardlockup_detector_perf_stop - Globally stop watchdog events + * + * Special interface for x86 to handle the perf HT bug. + */ +void __init hardlockup_detector_perf_stop(void) +{ + int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) + perf_event_disable(event); + } +} + +/** + * hardlockup_detector_perf_restart - Globally restart watchdog events + * + * Special interface for x86 to handle the perf HT bug. + */ +void __init hardlockup_detector_perf_restart(void) +{ + int cpu; + + lockdep_assert_cpus_held(); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return; + + for_each_online_cpu(cpu) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) + perf_event_enable(event); + } +} -- cgit v1.2.3 From 6554fd8cf06db86f861bb24d7487b2873ca444c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:57 +0200 Subject: watchdog/core: Provide interface to stop from poweroff() PARISC has a a busy looping power off routine. If the watchdog is enabled the watchdog timer will still fire, but the thread is not running, which causes the softlockup watchdog to trigger. Provide a interface which allows to turn the watchdog off. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Helge Deller Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linux-parisc@vger.kernel.org Link: http://lkml.kernel.org/r/20170912194146.327343752@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f5d52024f6b7..f23e373aa3bf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -333,7 +333,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; - if (atomic_read(&watchdog_park_in_progress) != 0) + if (!watchdog_enabled || + atomic_read(&watchdog_park_in_progress) != 0) return HRTIMER_NORESTART; /* kick the hardlockup detector */ @@ -660,6 +661,17 @@ static void set_sample_period(void) } #endif /* SOFTLOCKUP */ +/** + * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) + * + * Special interface for parisc. It prevents lockup detector warnings from + * the default pm_poweroff() function which busy loops forever. + */ +void lockup_detector_soft_poweroff(void) +{ + watchdog_enabled = 0; +} + /* * Suspend the hard and soft lockup detector by parking the watchdog threads. */ -- cgit v1.2.3 From 5490125d77a43016b26f629d4b485e2c62172551 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:59 +0200 Subject: watchdog/core: Remove broken suspend/resume interfaces This interface has several issues: - It's causing recursive locking of the hotplug lock. - It's complete overkill to teardown all threads and then recreate them The same can be achieved with the simple hardlockup_detector_perf_stop / restart() interfaces. The abuse from the busy looping poweroff() loop of PARISC has been solved as well. Remove the cruft. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.487537732@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 89 +------------------------------------------------------ 1 file changed, 1 insertion(+), 88 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f23e373aa3bf..b2d46757917e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -97,19 +97,6 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); * unregistered/stopped, so it is an indicator whether the threads exist. */ static int __read_mostly watchdog_running; -/* - * If a subsystem has a need to deactivate the watchdog temporarily, it - * can use the suspend/resume interface to achieve this. The content of - * the 'watchdog_suspended' variable reflects this state. Existing threads - * are parked/unparked by the lockup_detector_{suspend|resume} functions - * (see comment blocks pertaining to those functions for further details). - * - * 'watchdog_suspended' also prevents threads from being registered/started - * or unregistered/stopped via parameters in /proc/sys/kernel, so the state - * of 'watchdog_running' cannot change while the watchdog is deactivated - * temporarily (see related code in 'proc' handlers). - */ -int __read_mostly watchdog_suspended; /* * These functions can be overridden if an architecture implements its @@ -136,7 +123,6 @@ void __weak watchdog_nmi_disable(unsigned int cpu) * - watchdog_cpumask * - sysctl_hardlockup_all_cpu_backtrace * - hardlockup_panic - * - watchdog_suspended */ void __weak watchdog_nmi_reconfigure(void) { @@ -672,61 +658,6 @@ void lockup_detector_soft_poweroff(void) watchdog_enabled = 0; } -/* - * Suspend the hard and soft lockup detector by parking the watchdog threads. - */ -int lockup_detector_suspend(void) -{ - int ret = 0; - - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); - /* - * Multiple suspend requests can be active in parallel (counted by - * the 'watchdog_suspended' variable). If the watchdog threads are - * running, the first caller takes care that they will be parked. - * The state of 'watchdog_running' cannot change while a suspend - * request is active (see related code in 'proc' handlers). - */ - if (watchdog_running && !watchdog_suspended) - ret = watchdog_park_threads(); - - if (ret == 0) - watchdog_suspended++; - else { - watchdog_disable_all_cpus(); - pr_err("Failed to suspend lockup detectors, disabled\n"); - watchdog_enabled = 0; - } - - watchdog_nmi_reconfigure(); - - mutex_unlock(&watchdog_proc_mutex); - - return ret; -} - -/* - * Resume the hard and soft lockup detector by unparking the watchdog threads. - */ -void lockup_detector_resume(void) -{ - mutex_lock(&watchdog_proc_mutex); - - watchdog_suspended--; - /* - * The watchdog threads are unparked if they were previously running - * and if there is no more active suspend request. - */ - if (watchdog_running && !watchdog_suspended) - watchdog_unpark_threads(); - - watchdog_nmi_reconfigure(); - - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); -} - #ifdef CONFIG_SYSCTL /* @@ -775,12 +706,6 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, get_online_cpus(); mutex_lock(&watchdog_proc_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - /* * If the parameter is being read return the state of the corresponding * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the @@ -872,12 +797,6 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, get_online_cpus(); mutex_lock(&watchdog_proc_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - old = ACCESS_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -917,12 +836,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, get_online_cpus(); mutex_lock(&watchdog_proc_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) { /* Remove impossible cpus to keep sysctl output cleaner. */ @@ -941,7 +854,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, watchdog_nmi_reconfigure(); } -out: + mutex_unlock(&watchdog_proc_mutex); put_online_cpus(); return err; -- cgit v1.2.3 From b7a349819d4b9b5db64e523351e66a79a758eaa5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:00 +0200 Subject: watchdog/core: Rework CPU hotplug locking The watchdog proc interface causes extensive recursive locking of the CPU hotplug percpu rwsem, which is deadlock prone. Replace the get/put_online_cpus() pairs with cpu_hotplug_disable()/enable() calls for now. Later patches will remove that requirement completely. Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.568079057@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b2d46757917e..cd79f644ea34 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -703,7 +703,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, int err, old, new; int *watchdog_param = (int *)table->data; - get_online_cpus(); + cpu_hotplug_disable(); mutex_lock(&watchdog_proc_mutex); /* @@ -752,7 +752,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + cpu_hotplug_enable(); return err; } @@ -794,7 +794,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, { int err, old, new; - get_online_cpus(); + cpu_hotplug_disable(); mutex_lock(&watchdog_proc_mutex); old = ACCESS_ONCE(watchdog_thresh); @@ -818,7 +818,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + cpu_hotplug_enable(); return err; } @@ -833,7 +833,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, { int err; - get_online_cpus(); + cpu_hotplug_disable(); mutex_lock(&watchdog_proc_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); @@ -856,7 +856,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, } mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + cpu_hotplug_enable(); return err; } -- cgit v1.2.3 From 946d197794b23202b8b46c43016747c72fe23393 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:01 +0200 Subject: watchdog/core: Rename watchdog_proc_mutex Following patches will use the mutex for other purposes as well. Rename it as it is not longer a proc specific thing. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.647714850@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index cd79f644ea34..7c3a0a76b41b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,8 +29,7 @@ #include #include -/* Watchdog configuration */ -static DEFINE_MUTEX(watchdog_proc_mutex); +static DEFINE_MUTEX(watchdog_mutex); int __read_mostly nmi_watchdog_enabled; @@ -704,7 +703,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, int *watchdog_param = (int *)table->data; cpu_hotplug_disable(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); /* * If the parameter is being read return the state of the corresponding @@ -751,7 +750,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, err = proc_watchdog_update(); } out: - mutex_unlock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } @@ -795,7 +794,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, int err, old, new; cpu_hotplug_disable(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); old = ACCESS_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -817,7 +816,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, set_sample_period(); } out: - mutex_unlock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } @@ -834,7 +833,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, int err; cpu_hotplug_disable(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) { @@ -855,7 +854,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, watchdog_nmi_reconfigure(); } - mutex_unlock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } -- cgit v1.2.3 From 7a3558200739e1378800a7a6d7f63c031115f7a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:02 +0200 Subject: watchdog/core: Mark hardlockup_detector_disable() __init The function is only used by the KVM init code. Mark it __init to prevent creative abuse. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.727134632@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7c3a0a76b41b..1c185d9dd468 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -55,7 +55,7 @@ unsigned int __read_mostly hardlockup_panic = * kernel command line parameters are parsed, because otherwise it is not * possible to override this in hardlockup_panic_setup(). */ -void hardlockup_detector_disable(void) +void __init hardlockup_detector_disable(void) { watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; } -- cgit v1.2.3 From 20d853fd0703b1d73c35a22024c0d4fcbcc57c8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:03 +0200 Subject: watchdog/hardlockup/perf: Remove broken self disable on failure The self disabling feature is broken vs. CPU hotplug locking: CPU 0 CPU 1 cpus_write_lock(); cpu_up(1) wait_for_completion() .... unpark_watchdog() ->unpark() perf_event_create() <- fails watchdog_enable &= ~NMI_WATCHDOG; .... cpus_write_unlock(); CPU 2 cpus_write_lock() cpu_down(2) wait_for_completion() wakeup(watchdog); watchdog() if (!(watchdog_enable & NMI_WATCHDOG)) watchdog_nmi_disable() perf_event_disable() .... cpus_read_lock(); stop_smpboot_threads() park_watchdog(); wait_for_completion(watchdog->parked); Result: End of hotplug and instantaneous full lockup of the machine. There is a similar problem with disabling the watchdog via the user space interface as the sysctl function fiddles with watchdog_enable directly. It's very debatable whether this is required at all. If the watchdog works nicely on N CPUs and it fails to enable on the N + 1 CPU either during hotplug or because the user space interface disabled it via sysctl cpumask and then some perf user grabbed the counter which is then unavailable for the watchdog when the sysctl cpumask gets changed back. There is no real justification for this. One of the reasons WHY this is done is the utter stupidity of the init code of the perf NMI watchdog. Instead of checking upfront at boot whether PERF is available and functional at all, it just does this check at run time over and over when user space fiddles with the sysctl. That's broken beyond repair along with the idiotic error code dependent warn level printks and the even more silly printk rate limiting. If the init code checks whether perf works at boot time, then this mess can be more or less avoided completely. Perf does not come magically into life at runtime. Brain usage while coding is overrated. Remove the cruft and add a temporary safe guard which gets removed later. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.806708429@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 15 --------------- kernel/watchdog_hld.c | 20 +++++++------------- 2 files changed, 7 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 1c185d9dd468..af000956286c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -485,21 +485,6 @@ static void watchdog(unsigned int cpu) __this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts)); __touch_watchdog(); - - /* - * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the - * failure path. Check for failures that can occur asynchronously - - * for example, when CPUs are on-lined - and shut down the hardware - * perf event on each CPU accordingly. - * - * The only non-obvious place this bit can be cleared is through - * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a - * pr_info here would be too noisy as it would result in a message - * every few seconds if the hardlockup was disabled but the softlockup - * enabled. - */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - watchdog_nmi_disable(cpu); } static struct smp_hotplug_thread watchdog_threads = { diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index c9586ebc2e98..7b602714ea53 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -23,6 +23,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static unsigned long hardlockup_allcpu_dumped; +static bool hardlockup_detector_disabled; void arch_touch_nmi_watchdog(void) { @@ -178,6 +179,10 @@ int watchdog_nmi_enable(unsigned int cpu) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) goto out; + /* A failure disabled the hardlockup detector permanently */ + if (hardlockup_detector_disabled) + return -ENODEV; + /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -206,18 +211,6 @@ int watchdog_nmi_enable(unsigned int cpu) goto out_save; } - /* - * Disable the hard lockup detector if _any_ CPU fails to set up - * set up the hardware perf event. The watchdog() function checks - * the NMI_WATCHDOG_ENABLED bit periodically. - * - * The barriers are for syncing up watchdog_enabled across all the - * cpus, as clear_bit() does not use barriers. - */ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); - smp_mb__after_atomic(); - /* skip displaying the same error again */ if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) return PTR_ERR(event); @@ -232,7 +225,8 @@ int watchdog_nmi_enable(unsigned int cpu) pr_err("disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); - pr_info("Shutting down hard lockup detector on all cpus\n"); + pr_info("Disabling hard lockup detector permanently\n"); + hardlockup_detector_disabled = true; return PTR_ERR(event); -- cgit v1.2.3 From 941154bd6937a710ae9193a3c733c0029e5ae7b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:04 +0200 Subject: watchdog/hardlockup/perf: Prevent CPU hotplug deadlock The following deadlock is possible in the watchdog hotplug code: cpus_write_lock() ... takedown_cpu() smpboot_park_threads() smpboot_park_thread() kthread_park() ->park() := watchdog_disable() watchdog_nmi_disable() perf_event_release_kernel(); put_event() _free_event() ->destroy() := hw_perf_event_destroy() x86_release_hardware() release_ds_buffers() get_online_cpus() when a per cpu watchdog perf event is destroyed which drops the last reference to the PMU hardware. The cleanup code there invokes get_online_cpus() which instantly deadlocks because the hotplug percpu rwsem is write locked. To solve this add a deferring mechanism: cpus_write_lock() kthread_park() watchdog_nmi_disable(deferred) perf_event_disable(event); move_event_to_deferred(event); .... cpus_write_unlock() cleaup_deferred_events() perf_event_release_kernel() This is still properly serialized against concurrent hotplug via the cpu_add_remove_lock, which is held by the task which initiated the hotplug event. This is also used to handle event destruction when the watchdog threads are parked via other mechanisms than CPU hotplug. Analyzed-by: Peter Zijlstra Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.884469246@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 6 ++++++ kernel/watchdog.c | 25 +++++++++++++++++++++++++ kernel/watchdog_hld.c | 34 ++++++++++++++++++++++++++++------ 3 files changed, 59 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index acf5308fad51..a96b348591df 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -734,6 +735,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, out: cpus_write_unlock(); + /* + * Do post unplug cleanup. This is still protected against + * concurrent CPU hotplug via cpu_add_remove_lock. + */ + lockup_detector_cleanup(); return ret; } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index af000956286c..dd1fd59683c5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -109,8 +109,10 @@ int __weak watchdog_nmi_enable(unsigned int cpu) { return 0; } + void __weak watchdog_nmi_disable(unsigned int cpu) { + hardlockup_detector_perf_disable(); } /* @@ -193,6 +195,8 @@ __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); #endif #endif +static void __lockup_detector_cleanup(void); + /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- * lockups can have false positives under extreme conditions. So we generally @@ -631,6 +635,24 @@ static void set_sample_period(void) } #endif /* SOFTLOCKUP */ +static void __lockup_detector_cleanup(void) +{ + lockdep_assert_held(&watchdog_mutex); + hardlockup_detector_perf_cleanup(); +} + +/** + * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes + * + * Caller must not hold the cpu hotplug rwsem. + */ +void lockup_detector_cleanup(void) +{ + mutex_lock(&watchdog_mutex); + __lockup_detector_cleanup(); + mutex_unlock(&watchdog_mutex); +} + /** * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) * @@ -665,6 +687,8 @@ static int proc_watchdog_update(void) watchdog_nmi_reconfigure(); + __lockup_detector_cleanup(); + return err; } @@ -837,6 +861,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, } watchdog_nmi_reconfigure(); + __lockup_detector_cleanup(); } mutex_unlock(&watchdog_mutex); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 7b602714ea53..94111ccb09b5 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -21,6 +21,8 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); +static DEFINE_PER_CPU(struct perf_event *, dead_event); +static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; static bool hardlockup_detector_disabled; @@ -239,16 +241,18 @@ out: return 0; } -void watchdog_nmi_disable(unsigned int cpu) +/** + * hardlockup_detector_perf_disable - Disable the local event + */ +void hardlockup_detector_perf_disable(void) { - struct perf_event *event = per_cpu(watchdog_ev, cpu); + struct perf_event *event = this_cpu_read(watchdog_ev); if (event) { perf_event_disable(event); - per_cpu(watchdog_ev, cpu) = NULL; - - /* should be in cleanup, but blocks oprofile */ - perf_event_release_kernel(event); + this_cpu_write(watchdog_ev, NULL); + this_cpu_write(dead_event, event); + cpumask_set_cpu(smp_processor_id(), &dead_events_mask); /* watchdog_nmi_enable() expects this to be zero initially. */ if (atomic_dec_and_test(&watchdog_cpus)) @@ -256,6 +260,24 @@ void watchdog_nmi_disable(unsigned int cpu) } } +/** + * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them + * + * Called from lockup_detector_cleanup(). Serialized by the caller. + */ +void hardlockup_detector_perf_cleanup(void) +{ + int cpu; + + for_each_cpu(cpu, &dead_events_mask) { + struct perf_event *event = per_cpu(dead_event, cpu); + + per_cpu(dead_event, cpu) = NULL; + perf_event_release_kernel(event); + } + cpumask_clear(&dead_events_mask); +} + /** * hardlockup_detector_perf_stop - Globally stop watchdog events * -- cgit v1.2.3 From 01f0a02701cbcf32d22cfc9d1ab9a3f0ff2ba68c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:05 +0200 Subject: watchdog/core: Remove the park_in_progress obfuscation Commit: b94f51183b06 ("kernel/watchdog: prevent false hardlockup on overloaded system") tries to fix the following issue: proc_write() set_sample_period() <--- New sample period becoms visible <----- Broken starts proc_watchdog_update() watchdog_enable_all_cpus() watchdog_hrtimer_fn() update_watchdog_all_cpus() restart_timer(sample_period) watchdog_park_threads() thread->park() disable_nmi() <----- Broken ends The reason why this is broken is that the update of the watchdog threshold becomes immediately effective and visible for the hrtimer function which uses that value to rearm the timer. But the NMI/perf side still uses the old value up to the point where it is disabled. If the rate has been lowered then the NMI can run fast enough to 'detect' a hard lockup because the timer has not fired due to the longer period. The patch 'fixed' this by adding a variable: proc_write() set_sample_period() <----- Broken starts proc_watchdog_update() watchdog_enable_all_cpus() watchdog_hrtimer_fn() update_watchdog_all_cpus() restart_timer(sample_period) watchdog_park_threads() park_in_progress = 1 <----- Broken ends nmi_watchdog() if (park_in_progress) return; The only effect of this variable was to make the window where the breakage can hit small enough that it was not longer observable in testing. From a correctness point of view it is a pointless bandaid which merily papers over the root cause: the unsychronized update of the variable. Looking deeper into the related code pathes unearthed similar problems in the watchdog_start()/stop() functions. watchdog_start() perf_nmi_event_start() hrtimer_start() watchdog_stop() hrtimer_cancel() perf_nmi_event_stop() In both cases the call order is wrong because if the tasks gets preempted or the VM gets scheduled out long enough after the first call, then there is a chance that the next NMI will see a stale hrtimer interrupt count and trigger a false positive hard lockup splat. Get rid of park_in_progress so the code can be gradually deobfuscated and pruned from several layers of duct tape papering over the root cause, which has been either ignored or not understood at all. Once this is removed the underlying problem will be fixed by rewriting the proc interface to do a proper synchronized update. Address the start/stop() ordering problem as well by reverting the call order, so this part is at least correct now. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1709052038270.2393@nanos Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 37 +++++++++++++++++-------------------- kernel/watchdog_hld.c | 7 ++----- 2 files changed, 19 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index dd1fd59683c5..c290135fb415 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -136,8 +136,6 @@ void __weak watchdog_nmi_reconfigure(void) #define for_each_watchdog_cpu(cpu) \ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) -atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); - static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -322,8 +320,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; - if (!watchdog_enabled || - atomic_read(&watchdog_park_in_progress) != 0) + if (!watchdog_enabled) return HRTIMER_NORESTART; /* kick the hardlockup detector */ @@ -437,32 +434,37 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) static void watchdog_enable(unsigned int cpu) { - struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); - /* kick off the timer for the hardlockup detector */ + /* + * Start the timer first to prevent the NMI watchdog triggering + * before the timer has a chance to fire. + */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ns_to_ktime(sample_period), + HRTIMER_MODE_REL_PINNED); + /* Initialize timestamp */ + __touch_watchdog(); /* Enable the perf event */ watchdog_nmi_enable(cpu); - /* done here because hrtimer_start can only pin to smp_processor_id() */ - hrtimer_start(hrtimer, ns_to_ktime(sample_period), - HRTIMER_MODE_REL_PINNED); - - /* initialize timestamp */ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); - __touch_watchdog(); } static void watchdog_disable(unsigned int cpu) { - struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); watchdog_set_prio(SCHED_NORMAL, 0); - hrtimer_cancel(hrtimer); - /* disable the perf event */ + /* + * Disable the perf event first. That prevents that a large delay + * between disabling the timer and disabling the perf event causes + * the perf NMI to detect a false positive. + */ watchdog_nmi_disable(cpu); + hrtimer_cancel(hrtimer); } static void watchdog_cleanup(unsigned int cpu, bool online) @@ -518,16 +520,11 @@ static int watchdog_park_threads(void) { int cpu, ret = 0; - atomic_set(&watchdog_park_in_progress, 1); - for_each_watchdog_cpu(cpu) { ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); if (ret) break; } - - atomic_set(&watchdog_park_in_progress, 0); - return ret; } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 94111ccb09b5..0aa191ee3d51 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -106,15 +106,12 @@ static struct perf_event_attr wd_hw_attr = { /* Callback function for perf event subsystem */ static void watchdog_overflow_callback(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) + struct perf_sample_data *data, + struct pt_regs *regs) { /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; - if (atomic_read(&watchdog_park_in_progress) != 0) - return; - if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; -- cgit v1.2.3 From 2b9d7f233b835663cbc7b6b3f88dd20f61118d1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:06 +0200 Subject: watchdog/core: Clean up stub functions Having stub functions which take a full page is not helping the readablility of code. Condense them and move the doubled #ifdef variant into the SYSFS section. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.045545271@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 68 ++++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c290135fb415..af37c040436c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -125,10 +125,7 @@ void __weak watchdog_nmi_disable(unsigned int cpu) * - sysctl_hardlockup_all_cpu_backtrace * - hardlockup_panic */ -void __weak watchdog_nmi_reconfigure(void) -{ -} - +void __weak watchdog_nmi_reconfigure(void) { } #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -136,6 +133,11 @@ void __weak watchdog_nmi_reconfigure(void) #define for_each_watchdog_cpu(cpu) \ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) +/* Global variables, exported for sysctl */ +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; +int __read_mostly soft_watchdog_enabled; + static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -149,13 +151,9 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static unsigned long soft_lockup_nmi_warn; -unsigned int __read_mostly softlockup_panic = - CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; - static int __init softlockup_panic_setup(char *str) { softlockup_panic = simple_strtoul(str, NULL, 0); - return 1; } __setup("softlockup_panic=", softlockup_panic_setup); @@ -593,44 +591,13 @@ static void watchdog_disable_all_cpus(void) } } -#ifdef CONFIG_SYSCTL -static int watchdog_update_cpus(void) -{ - return smpboot_update_cpumask_percpu_thread( - &watchdog_threads, &watchdog_cpumask); -} -#endif - -#else /* SOFTLOCKUP */ -static int watchdog_park_threads(void) -{ - return 0; -} - -static void watchdog_unpark_threads(void) -{ -} - -static int watchdog_enable_all_cpus(void) -{ - return 0; -} - -static void watchdog_disable_all_cpus(void) -{ -} - -#ifdef CONFIG_SYSCTL -static int watchdog_update_cpus(void) -{ - return 0; -} -#endif - -static void set_sample_period(void) -{ -} -#endif /* SOFTLOCKUP */ +#else /* CONFIG_SOFTLOCKUP_DETECTOR */ +static inline int watchdog_park_threads(void) { return 0; } +static inline void watchdog_unpark_threads(void) { } +static inline int watchdog_enable_all_cpus(void) { return 0; } +static inline void watchdog_disable_all_cpus(void) { } +static inline void set_sample_period(void) { } +#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) { @@ -827,6 +794,15 @@ out: return err; } +static int watchdog_update_cpus(void) +{ + if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) { + return smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_cpumask); + } + return 0; +} + /* * The cpumask is the mask of possible cpus that the watchdog can run * on, not the mask of cpus it is actually running on. This allows the -- cgit v1.2.3 From 368a7e2ce8ff0ddcdcb37eadb76530b033f6eb2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:07 +0200 Subject: watchdog/core: Clean up the #ifdef maze The #ifdef maze in this file is horrible, group stuff at least a bit so one can figure out what belongs to what. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.139629546@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index af37c040436c..a9bdfde73e4b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,7 +41,6 @@ unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif #ifdef CONFIG_HARDLOCKUP_DETECTOR -/* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: */ @@ -74,19 +73,21 @@ static int __init hardlockup_panic_setup(char *str) } __setup("nmi_watchdog=", hardlockup_panic_setup); -#endif +# ifdef CONFIG_SMP +int __read_mostly sysctl_hardlockup_all_cpu_backtrace; -#ifdef CONFIG_SOFTLOCKUP_DETECTOR -int __read_mostly soft_watchdog_enabled; -#endif +static int __init hardlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); +# endif /* CONFIG_SMP */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ int __read_mostly watchdog_user_enabled; int __read_mostly watchdog_thresh = 10; -#ifdef CONFIG_SMP -int __read_mostly sysctl_softlockup_all_cpu_backtrace; -int __read_mostly sysctl_hardlockup_all_cpu_backtrace; -#endif struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -173,22 +174,14 @@ static int __init nosoftlockup_setup(char *str) __setup("nosoftlockup", nosoftlockup_setup); #ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; + static int __init softlockup_all_cpu_backtrace_setup(char *str) { - sysctl_softlockup_all_cpu_backtrace = - !!simple_strtol(str, NULL, 0); + sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static int __init hardlockup_all_cpu_backtrace_setup(char *str) -{ - sysctl_hardlockup_all_cpu_backtrace = - !!simple_strtol(str, NULL, 0); - return 1; -} -__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); -#endif #endif static void __lockup_detector_cleanup(void); -- cgit v1.2.3 From 05ba3de74a3f499dcaa37b186220aaf174c95a4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:08 +0200 Subject: watchdog/core: Split out cpumask write function Split the write part of the cpumask proc handler out into a separate helper to avoid deep indentation. This also reduces the patch complexity in the following cleanups. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.218075991@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a9bdfde73e4b..cedf45ab4d81 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -792,10 +792,29 @@ static int watchdog_update_cpus(void) if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) { return smpboot_update_cpumask_percpu_thread(&watchdog_threads, &watchdog_cpumask); + __lockup_detector_cleanup(); } return 0; } +static void proc_watchdog_cpumask_update(void) +{ + /* Remove impossible cpus to keep sysctl output clean. */ + cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); + + if (watchdog_running) { + /* + * Failure would be due to being unable to allocate a + * temporary cpumask, so we are likely not in a position to + * do much else to make things better. + */ + if (watchdog_update_cpus() != 0) + pr_err("cpumask update failed\n"); + } + + watchdog_nmi_reconfigure(); +} + /* * The cpumask is the mask of possible cpus that the watchdog can run * on, not the mask of cpus it is actually running on. This allows the @@ -811,30 +830,13 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); - if (!err && write) { - /* Remove impossible cpus to keep sysctl output cleaner. */ - cpumask_and(&watchdog_cpumask, &watchdog_cpumask, - cpu_possible_mask); - - if (watchdog_running) { - /* - * Failure would be due to being unable to allocate - * a temporary cpumask, so we are likely not in a - * position to do much else to make things better. - */ - if (watchdog_update_cpus() != 0) - pr_err("cpumask update failed\n"); - } - - watchdog_nmi_reconfigure(); - __lockup_detector_cleanup(); - } + if (!err && write) + proc_watchdog_cpumask_update(); mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } - #endif /* CONFIG_SYSCTL */ void __init lockup_detector_init(void) -- cgit v1.2.3 From 0d85923c7a81719567311ba0eae8ecb2efd4c8a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:09 +0200 Subject: smpboot/threads, watchdog/core: Avoid runtime allocation smpboot_update_cpumask_threads_percpu() allocates a temporary cpumask at runtime. This is suboptimal because the call site needs more code size for proper error handling than a statically allocated temporary mask requires data size. Add static temporary cpumask. The function is globaly serialized, so no further protection required. Remove the half baken error handling in the watchdog code and get rid of the export as there are no in tree modular users of that function. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.297288838@linutronix.de Signed-off-by: Ingo Molnar --- kernel/smpboot.c | 22 +++++++--------------- kernel/watchdog.c | 21 +++++---------------- 2 files changed, 12 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 1d71c051a951..ed7507b69b48 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -344,39 +344,31 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); * by the client, but only by calling this function. * This function can only be called on a registered smp_hotplug_thread. */ -int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *new) +void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *new) { struct cpumask *old = plug_thread->cpumask; - cpumask_var_t tmp; + static struct cpumask tmp; unsigned int cpu; - if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) - return -ENOMEM; - get_online_cpus(); mutex_lock(&smpboot_threads_lock); /* Park threads that were exclusively enabled on the old mask. */ - cpumask_andnot(tmp, old, new); - for_each_cpu_and(cpu, tmp, cpu_online_mask) + cpumask_andnot(&tmp, old, new); + for_each_cpu_and(cpu, &tmp, cpu_online_mask) smpboot_park_thread(plug_thread, cpu); /* Unpark threads that are exclusively enabled on the new mask. */ - cpumask_andnot(tmp, new, old); - for_each_cpu_and(cpu, tmp, cpu_online_mask) + cpumask_andnot(&tmp, new, old); + for_each_cpu_and(cpu, &tmp, cpu_online_mask) smpboot_unpark_thread(plug_thread, cpu); cpumask_copy(old, new); mutex_unlock(&smpboot_threads_lock); put_online_cpus(); - - free_cpumask_var(tmp); - - return 0; } -EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread); static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index cedf45ab4d81..8935a3a4c2fb 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -787,31 +787,20 @@ out: return err; } -static int watchdog_update_cpus(void) +static void watchdog_update_cpus(void) { - if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) { - return smpboot_update_cpumask_percpu_thread(&watchdog_threads, - &watchdog_cpumask); + if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR) && watchdog_running) { + smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_cpumask); __lockup_detector_cleanup(); } - return 0; } static void proc_watchdog_cpumask_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - - if (watchdog_running) { - /* - * Failure would be due to being unable to allocate a - * temporary cpumask, so we are likely not in a position to - * do much else to make things better. - */ - if (watchdog_update_cpus() != 0) - pr_err("cpumask update failed\n"); - } - + watchdog_update_cpus(); watchdog_nmi_reconfigure(); } -- cgit v1.2.3 From 2eb2527f847d1bd8d8fb9db1e8139db5d6eddb36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:10 +0200 Subject: watchdog/core: Create new thread handling infrastructure The lockup detector reconfiguration tears down all watchdog threads when the watchdog is disabled and sets them up again when its enabled. That's a pointless exercise. The watchdog threads are not consuming an insane amount of resources, so it's enough to set them up at init time and keep them in parked position when the watchdog is disabled and unpark them when it is reenabled. The smpboot thread infrastructure takes care of keeping the force parked threads in place even across cpu hotplug. Another horrible mechanism are the open coded park/unpark loops which are used for reconfiguration of the watchdog. The smpboot infrastructure allows exactly the same via smpboot_update_cpumask_thread_percpu(), which is cpu hotplug safe. Using that instead of the open coded loops allows to get rid of the hotplug locking mess in the watchdog code. Implement a clean infrastructure which allows to replace the open coded nonsense. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.377182587@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8935a3a4c2fb..b35518375fb7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -139,6 +139,9 @@ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; int __read_mostly soft_watchdog_enabled; +struct cpumask watchdog_allowed_mask __read_mostly; +static bool softlockup_threads_initialized __read_mostly; + static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -584,12 +587,84 @@ static void watchdog_disable_all_cpus(void) } } +static void softlockup_update_smpboot_threads(void) +{ + lockdep_assert_held(&watchdog_mutex); + + if (!softlockup_threads_initialized) + return; + + smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_allowed_mask); + __lockup_detector_cleanup(); +} + +/* Temporarily park all watchdog threads */ +static void softlockup_park_all_threads(void) +{ + cpumask_clear(&watchdog_allowed_mask); + softlockup_update_smpboot_threads(); +} + +/* + * Park threads which are not longer enabled and unpark threads which have + * been newly enabled. + */ +static void softlockup_update_threads(void) +{ + cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); + softlockup_update_smpboot_threads(); +} + +static void softlockup_reconfigure_threads(bool enabled) +{ + softlockup_park_all_threads(); + set_sample_period(); + if (enabled) + softlockup_update_threads(); +} + +/* + * Create the watchdog thread infrastructure. + * + * The threads are not unparked as watchdog_allowed_mask is empty. When + * the threads are sucessfully initialized, take the proper locks and + * unpark the threads in the watchdog_cpumask if the watchdog is enabled. + */ +static __init void softlockup_init_threads(void) +{ + int ret; + + /* + * If sysctl is off and watchdog got disabled on the command line, + * nothing to do here. + */ + if (!IS_ENABLED(CONFIG_SYSCTL) && + !(watchdog_enabled && watchdog_thresh)) + return; + + ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, + &watchdog_allowed_mask); + if (ret) { + pr_err("Failed to initialize soft lockup detector threads\n"); + return; + } + + mutex_lock(&watchdog_mutex); + softlockup_threads_initialized = true; + softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); + mutex_unlock(&watchdog_mutex); +} + #else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline int watchdog_park_threads(void) { return 0; } static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } static inline void set_sample_period(void) { } +static inline void softlockup_init_threads(void) { } +static inline void softlockup_update_threads(void) { } +static inline void softlockup_reconfigure_threads(bool enabled) { } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) -- cgit v1.2.3 From d57108d4f6791291e89d980e7f7a3566c32ab188 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:11 +0200 Subject: watchdog/core: Get rid of the thread teardown/setup dance The lockup detector reconfiguration tears down all watchdog threads when the watchdog is disabled and sets them up again when its enabled. That's a pointless exercise. The watchdog threads are not consuming an insane amount of resources, so it's enough to set them up at init time and keep them in parked position when the watchdog is disabled and unpark them when it is reenabled. The smpboot thread infrastructure takes care of keeping the force parked threads in place even across cpu hotplug. Aside of that the code implements the park/unpark facility of smp hotplug threads on its own, which is even more pointless. We have functionality in the smpboot thread code to do so. Use the new thread management functions and get rid of the unholy mess. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.470370113@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 190 ++++++------------------------------------------------ 1 file changed, 19 insertions(+), 171 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b35518375fb7..762d3ed82a08 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -91,13 +91,6 @@ int __read_mostly watchdog_thresh = 10; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); -/* - * The 'watchdog_running' variable is set to 1 when the watchdog threads - * are registered/started and is set to 0 when the watchdog threads are - * unregistered/stopped, so it is an indicator whether the threads exist. - */ -static int __read_mostly watchdog_running; - /* * These functions can be overridden if an architecture implements its * own hardlockup detector. @@ -130,10 +123,6 @@ void __weak watchdog_nmi_reconfigure(void) { } #ifdef CONFIG_SOFTLOCKUP_DETECTOR -/* Helper for online, unparked cpus. */ -#define for_each_watchdog_cpu(cpu) \ - for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) - /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -259,11 +248,15 @@ void touch_all_softlockup_watchdogs(void) int cpu; /* - * this is done lockless - * do we care if a 0 races with a timestamp? - * all it means is the softlock check starts one cycle later + * watchdog_mutex cannpt be taken here, as this might be called + * from (soft)interrupt context, so the access to + * watchdog_allowed_cpumask might race with a concurrent update. + * + * The watchdog time stamp can race against a concurrent real + * update as well, the only side effect might be a cycle delay for + * the softlockup check. */ - for_each_watchdog_cpu(cpu) + for_each_cpu(cpu, &watchdog_allowed_mask) per_cpu(watchdog_touch_ts, cpu) = 0; wq_watchdog_touch(-1); } @@ -303,9 +296,6 @@ static void watchdog_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } -static int watchdog_enable_all_cpus(void); -static void watchdog_disable_all_cpus(void); - /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -498,95 +488,6 @@ static struct smp_hotplug_thread watchdog_threads = { .unpark = watchdog_enable, }; -/* - * park all watchdog threads that are specified in 'watchdog_cpumask' - * - * This function returns an error if kthread_park() of a watchdog thread - * fails. In this situation, the watchdog threads of some CPUs can already - * be parked and the watchdog threads of other CPUs can still be runnable. - * Callers are expected to handle this special condition as appropriate in - * their context. - * - * This function may only be called in a context that is protected against - * races with CPU hotplug - for example, via get_online_cpus(). - */ -static int watchdog_park_threads(void) -{ - int cpu, ret = 0; - - for_each_watchdog_cpu(cpu) { - ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); - if (ret) - break; - } - return ret; -} - -/* - * unpark all watchdog threads that are specified in 'watchdog_cpumask' - * - * This function may only be called in a context that is protected against - * races with CPU hotplug - for example, via get_online_cpus(). - */ -static void watchdog_unpark_threads(void) -{ - int cpu; - - for_each_watchdog_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); -} - -static int update_watchdog_all_cpus(void) -{ - int ret; - - ret = watchdog_park_threads(); - if (ret) - return ret; - - watchdog_unpark_threads(); - - return 0; -} - -static int watchdog_enable_all_cpus(void) -{ - int err = 0; - - if (!watchdog_running) { - err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, - &watchdog_cpumask); - if (err) - pr_err("Failed to create watchdog threads, disabled\n"); - else - watchdog_running = 1; - } else { - /* - * Enable/disable the lockup detectors or - * change the sample period 'on the fly'. - */ - err = update_watchdog_all_cpus(); - - if (err) { - watchdog_disable_all_cpus(); - pr_err("Failed to update lockup detectors, disabled\n"); - } - } - - if (err) - watchdog_enabled = 0; - - return err; -} - -static void watchdog_disable_all_cpus(void) -{ - if (watchdog_running) { - watchdog_running = 0; - smpboot_unregister_percpu_thread(&watchdog_threads); - } -} - static void softlockup_update_smpboot_threads(void) { lockdep_assert_held(&watchdog_mutex); @@ -661,7 +562,6 @@ static inline int watchdog_park_threads(void) { return 0; } static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } -static inline void set_sample_period(void) { } static inline void softlockup_init_threads(void) { } static inline void softlockup_update_threads(void) { } static inline void softlockup_reconfigure_threads(bool enabled) { } @@ -701,28 +601,10 @@ void lockup_detector_soft_poweroff(void) /* * Update the run state of the lockup detectors. */ -static int proc_watchdog_update(void) +static void proc_watchdog_update(void) { - int err = 0; - - /* - * Watchdog threads won't be started if they are already active. - * The 'watchdog_running' variable in watchdog_*_all_cpus() takes - * care of this. If those threads are already active, the sample - * period will be updated and the lockup detectors will be enabled - * or disabled 'on the fly'. - */ - if (watchdog_enabled && watchdog_thresh) - err = watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); - + softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); watchdog_nmi_reconfigure(); - - __lockup_detector_cleanup(); - - return err; - } /* @@ -778,17 +660,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, new = old & ~which; } while (cmpxchg(&watchdog_enabled, old, new) != old); - /* - * Update the run state of the lockup detectors. There is _no_ - * need to check the value returned by proc_watchdog_update() - * and to restore the previous value of 'watchdog_enabled' as - * both lockup detectors are disabled if proc_watchdog_update() - * returns an error. - */ - if (old == new) - goto out; - - err = proc_watchdog_update(); + if (old != new) + proc_watchdog_update(); } out: mutex_unlock(&watchdog_mutex); @@ -832,50 +705,28 @@ int proc_soft_watchdog(struct ctl_table *table, int write, int proc_watchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int err, old, new; + int err, old; cpu_hotplug_disable(); mutex_lock(&watchdog_mutex); - old = ACCESS_ONCE(watchdog_thresh); + old = READ_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (err || !write) - goto out; + if (!err && write && old != READ_ONCE(watchdog_thresh)) + proc_watchdog_update(); - /* - * Update the sample period. Restore on failure. - */ - new = ACCESS_ONCE(watchdog_thresh); - if (old == new) - goto out; - - set_sample_period(); - err = proc_watchdog_update(); - if (err) { - watchdog_thresh = old; - set_sample_period(); - } -out: mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } -static void watchdog_update_cpus(void) -{ - if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR) && watchdog_running) { - smpboot_update_cpumask_percpu_thread(&watchdog_threads, - &watchdog_cpumask); - __lockup_detector_cleanup(); - } -} - static void proc_watchdog_cpumask_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - watchdog_update_cpus(); + + softlockup_update_threads(); watchdog_nmi_reconfigure(); } @@ -905,8 +756,6 @@ int proc_watchdog_cpumask(struct ctl_table *table,