From 1fb9d6ad2766a1dd70d167552988375049a97f21 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 5 Feb 2010 21:47:04 -0500
Subject: nmi_watchdog: Add new, generic implementation, using perf events

This is a new generic nmi_watchdog implementation using the perf
events infrastructure as suggested by Ingo.

The implementation is simple, just create an in-kernel perf
event and register an overflow handler to check for cpu lockups.

I created a generic implementation that lives in kernel/ and
the hardware specific part that for now lives in arch/x86.

This approach has a number of advantages:

 - It simplifies the x86 PMU implementation in the long run,
   in that it removes the hardcoded low-level PMU implementation
   that was the NMI watchdog before.

 - It allows new NMI watchdog features to be added in a central
   place.

 - It allows other architectures to enable the NMI watchdog,
   as long as they have perf events (that provide NMIs)
   implemented.

 - It also allows for more graceful co-existence of existing
   perf events apps and the NMI watchdog - before these changes
   the relationship was exclusive. (The NMI watchdog will 'spend'
   a perf event when enabled. In later iterations we might be
   able to piggyback from an existing NMI event without having
   to allocate a hardware event for the NMI watchdog - turning
   this into a no-hardware-cost feature.)

As for compatibility, we'll keep the old NMI watchdog code as
well until the new one can 100% replace it on all CPUs, old and
new alike.  That might take some time as the NMI watchdog has
been ported to many CPU models.

I have done light testing to make sure the framework works
correctly and it does.

 v2: Set the correct timeout values based on the old nmi
     watchdog

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
Cc: peterz@infradead.org
LKML-Reference: <1265424425-31562-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/nmi_watchdog.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 kernel/nmi_watchdog.c

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
new file mode 100644
index 000000000000..36817b214d69
--- /dev/null
+++ b/kernel/nmi_watchdog.c
@@ -0,0 +1,191 @@
+/*
+ * Detect Hard Lockups using the NMI
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * this code detects hard lockups: incidents in where on a CPU
+ * the kernel does not respond to anything except NMI.
+ *
+ * Note: Most of this code is borrowed heavily from softlockup.c,
+ * so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * to those contributors as well.
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/lockdep.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+#include <asm/irq_regs.h>
+#include <linux/perf_event.h>
+
+static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
+static DEFINE_PER_CPU(int, nmi_watchdog_touch);
+static DEFINE_PER_CPU(long, alert_counter);
+
+void touch_nmi_watchdog(void)
+{
+	__raw_get_cpu_var(nmi_watchdog_touch) = 1;
+	touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+void touch_all_nmi_watchdog(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		per_cpu(nmi_watchdog_touch, cpu) = 1;
+	touch_softlockup_watchdog();
+}
+
+#ifdef CONFIG_SYSCTL
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int cpu;
+
+	if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL)
+		nmi_watchdog_enabled = 0;
+	else
+		nmi_watchdog_enabled = 1;
+
+	touch_all_nmi_watchdog();
+	proc_dointvec(table, write, buffer, length, ppos);
+	if (nmi_watchdog_enabled)
+		for_each_online_cpu(cpu)
+			perf_event_enable(per_cpu(nmi_watchdog_ev, cpu));
+	else
+		for_each_online_cpu(cpu)
+			perf_event_disable(per_cpu(nmi_watchdog_ev, cpu));
+	return 0;
+}
+
+#endif /* CONFIG_SYSCTL */
+
+struct perf_event_attr wd_attr = {
+	.type = PERF_TYPE_HARDWARE,
+	.config = PERF_COUNT_HW_CPU_CYCLES,
+	.size = sizeof(struct perf_event_attr),
+	.pinned = 1,
+	.disabled = 1,
+};
+
+static int panic_on_timeout;
+
+void wd_overflow(struct perf_event *event, int nmi,
+		 struct perf_sample_data *data,
+		 struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	int touched = 0;
+
+	if (__get_cpu_var(nmi_watchdog_touch)) {
+		per_cpu(nmi_watchdog_touch, cpu) = 0;
+		touched = 1;
+	}
+
+	/* check to see if the cpu is doing anything */
+	if (!touched && hw_nmi_is_cpu_stuck(regs)) {
+		/*
+		 * Ayiee, looks like this CPU is stuck ...
+		 * wait a few IRQs (5 seconds) before doing the oops ...
+		 */
+		per_cpu(alert_counter,cpu) += 1;
+		if (per_cpu(alert_counter,cpu) == 5) {
+			/*
+			 * die_nmi will return ONLY if NOTIFY_STOP happens..
+			 */
+			die_nmi("BUG: NMI Watchdog detected LOCKUP",
+				regs, panic_on_timeout);
+		}
+	} else {
+		per_cpu(alert_counter,cpu) = 0;
+	}
+
+	return;
+}
+
+/*
+ * Create/destroy watchdog threads as CPUs come and go:
+ */
+static int __cpuinit
+cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct perf_event *event;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		per_cpu(nmi_watchdog_touch, hotcpu) = 0;
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		/* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
+		wd_attr.sample_period = cpu_khz * 1000;
+		event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
+		if (IS_ERR(event)) {
+			printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
+			return NOTIFY_BAD;
+		}
+		per_cpu(nmi_watchdog_ev, hotcpu) = event;
+		perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu));
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		perf_event_disable(per_cpu(nmi_watchdog_ev, hotcpu));
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		event = per_cpu(nmi_watchdog_ev, hotcpu);
+		per_cpu(nmi_watchdog_ev, hotcpu) = NULL;
+		perf_event_release_kernel(event);
+		break;
+#endif /* CONFIG_HOTPLUG_CPU */
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cpu_nfb = {
+	.notifier_call = cpu_callback
+};
+
+static int __initdata nonmi_watchdog;
+
+static int __init nonmi_watchdog_setup(char *str)
+{
+	nonmi_watchdog = 1;
+	return 1;
+}
+__setup("nonmi_watchdog", nonmi_watchdog_setup);
+
+static int __init spawn_nmi_watchdog_task(void)
+{
+	void *cpu = (void *)(long)smp_processor_id();
+	int err;
+
+	if (nonmi_watchdog)
+		return 0;
+
+	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+	if (err == NOTIFY_BAD) {
+		BUG();
+		return 1;
+	}
+	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+	register_cpu_notifier(&cpu_nfb);
+
+	return 0;
+}
+early_initcall(spawn_nmi_watchdog_task);
-- 
cgit v1.2.3


From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 5 Feb 2010 21:47:05 -0500
Subject: nmi_watchdog: Config option to enable new nmi_watchdog

These are the bits that enable the new nmi_watchdog and safely
isolate the old nmi_watchdog.  Only one or the other can run,
not both at the same time.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
Cc: peterz@infradead.org
LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75d65f2..8a5abe53ebad 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += kgdb.o
 obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
-- 
cgit v1.2.3


From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 12 Feb 2010 17:19:19 -0500
Subject: nmi_watchdog: Compile and portability fixes

The original patch was x86_64 centric.  Changed the code to make
it less so.

ested by building and running on a powerpc.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/nmi_watchdog.c | 52 ++++++++++++++++++++++++++++++++++++++++-----------
 kernel/sysctl.c       | 15 ++++++++++++++-
 2 files changed, 55 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 36817b214d69..73c1954a97bb 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
 static DEFINE_PER_CPU(int, nmi_watchdog_touch);
 static DEFINE_PER_CPU(long, alert_counter);
 
+static int panic_on_timeout;
+
 void touch_nmi_watchdog(void)
 {
 	__raw_get_cpu_var(nmi_watchdog_touch) = 1;
@@ -46,19 +48,49 @@ void touch_all_nmi_watchdog(void)
 	touch_softlockup_watchdog();
 }
 
+static int __init setup_nmi_watchdog(char *str)
+{
+        if (!strncmp(str, "panic", 5)) {
+                panic_on_timeout = 1;
+                str = strchr(str, ',');
+                if (!str)
+                        return 1;
+                ++str;
+        }
+        return 1;
+}
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
 #ifdef CONFIG_SYSCTL
 /*
  * proc handler for /proc/sys/kernel/nmi_watchdog
  */
+int nmi_watchdog_enabled;
+
 int proc_nmi_enabled(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *length, loff_t *ppos)
 {
 	int cpu;
 
-	if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL)
+	if (!write) {
+		struct perf_event *event;
+		for_each_online_cpu(cpu) {
+			event = per_cpu(nmi_watchdog_ev, cpu);
+			if (event->state > PERF_EVENT_STATE_OFF) {
+				nmi_watchdog_enabled = 1;
+				break;
+			}
+		}
+		proc_dointvec(table, write, buffer, length, ppos);
+		return 0;
+	}
+
+	if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) {
 		nmi_watchdog_enabled = 0;
-	else
-		nmi_watchdog_enabled = 1;
+		proc_dointvec(table, write, buffer, length, ppos);
+		printk("NMI watchdog failed configuration, can not be enabled\n");
+		return 0;
+	}
 
 	touch_all_nmi_watchdog();
 	proc_dointvec(table, write, buffer, length, ppos);
@@ -81,8 +113,6 @@ struct perf_event_attr wd_attr = {
 	.disabled = 1,
 };
 
-static int panic_on_timeout;
-
 void wd_overflow(struct perf_event *event, int nmi,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
@@ -103,11 +133,11 @@ void wd_overflow(struct perf_event *event, int nmi,
 		 */
 		per_cpu(alert_counter,cpu) += 1;
 		if (per_cpu(alert_counter,cpu) == 5) {
-			/*
-			 * die_nmi will return ONLY if NOTIFY_STOP happens..
-			 */
-			die_nmi("BUG: NMI Watchdog detected LOCKUP",
-				regs, panic_on_timeout);
+			if (panic_on_timeout) {
+				panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
+			} else {
+				WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
+			}
 		}
 	} else {
 		per_cpu(alert_counter,cpu) = 0;
@@ -133,7 +163,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		/* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
-		wd_attr.sample_period = cpu_khz * 1000;
+		wd_attr.sample_period = hw_nmi_get_sample_period();
 		event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
 		if (IS_ERR(event)) {
 			printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b2448468..ac72c9e6bd9b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -60,6 +60,10 @@
 #include <asm/io.h>
 #endif
 
+#ifdef CONFIG_NMI_WATCHDOG
+#include <linux/nmi.h>
+#endif
+
 
 #if defined(CONFIG_SYSCTL)
 
@@ -692,7 +696,16 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_NMI_WATCHDOG)
+	{
+		.procname       = "nmi_watchdog",
+		.data           = &nmi_watchdog_enabled,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_nmi_enabled,
+	},
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG)
 	{
 		.procname       = "unknown_nmi_panic",
 		.data           = &unknown_nmi_panic,
-- 
cgit v1.2.3


From cf454aecb31741a0438ed1201b3dd153c7c7b19a Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 12 Feb 2010 17:19:20 -0500
Subject: nmi_watchdog: Fallback to software events when no hardware pmu
 detected

Not all arches have a PMU or have perf_event support for their
PMU.  The nmi_watchdog will fail in those cases.  Fallback to
using software events to generate nmi_watchdog traffic with
local apic interrupts.

Tested on a Pentium4 and it worked as expected, excepting for
detecting cpu lockups.

The problem with using software events as a cpu lock up detector
is the nmi_watchdog uses the logic that if local apic interrupts
stop incrementing then the cpu is probably locked up.  But with
software events we use the local apic to trigger the
nmi_watchdog callback to see if local apic interrupts are still
firing, which obviously they are otherwise we wouldn't have been
triggered.

The algorithm to detect cpu lock ups is the same as the old
nmi_watchdog. Perhaps we need to find a better way to detect
lock ups?

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
LKML-Reference: <1266013161-31197-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/nmi_watchdog.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 73c1954a97bb..4f23505d887d 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -166,8 +166,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		wd_attr.sample_period = hw_nmi_get_sample_period();
 		event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
 		if (IS_ERR(event)) {
-			printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
-			return NOTIFY_BAD;
+			wd_attr.type = PERF_TYPE_SOFTWARE;
+			event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
+			if (IS_ERR(event)) {
+				printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
+				return NOTIFY_BAD;
+			}
 		}
 		per_cpu(nmi_watchdog_ev, hotcpu) = event;
 		perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu));
-- 
cgit v1.2.3


From 6081b6cd9702967889de34fe5da1f96bb96d0ab8 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 16 Feb 2010 17:04:52 -0500
Subject: nmi_watchdog: support for oprofile

Re-arrange the code so that when someone disables nmi_watchdog
with:

  echo 0 > /proc/sys/kernel/nmi_watchdog

it releases the hardware reservation on the PMUs.  This allows
the oprofile module to grab those PMUs and do its thing.
Otherwise oprofile fails to load because the hardware is
reserved by the perf_events subsystem.

Tested using:

  oprofile --vm-linux --start

and watched it failed when nmi_watchdog is enabled and succeed
when:

  oprofile --deinit && echo 0 > /proc/sys/kernel/nmi_watchdog

is run.

Note:  this has the side quirk of having the nmi_watchdog latch
onto the software events instead of hardware events if oprofile
has already reserved the hardware first.  User beware! :-)

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
Cc: eranian@google.com
LKML-Reference: <1266357892-30504-1-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/nmi_watchdog.c | 144 ++++++++++++++++++++++++++++----------------------
 1 file changed, 82 insertions(+), 62 deletions(-)

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 4f23505d887d..633b230c2319 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -61,50 +61,6 @@ static int __init setup_nmi_watchdog(char *str)
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
-#ifdef CONFIG_SYSCTL
-/*
- * proc handler for /proc/sys/kernel/nmi_watchdog
- */
-int nmi_watchdog_enabled;
-
-int proc_nmi_enabled(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *length, loff_t *ppos)
-{
-	int cpu;
-
-	if (!write) {
-		struct perf_event *event;
-		for_each_online_cpu(cpu) {
-			event = per_cpu(nmi_watchdog_ev, cpu);
-			if (event->state > PERF_EVENT_STATE_OFF) {
-				nmi_watchdog_enabled = 1;
-				break;
-			}
-		}
-		proc_dointvec(table, write, buffer, length, ppos);
-		return 0;
-	}
-
-	if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) {
-		nmi_watchdog_enabled = 0;
-		proc_dointvec(table, write, buffer, length, ppos);
-		printk("NMI watchdog failed configuration, can not be enabled\n");
-		return 0;
-	}
-
-	touch_all_nmi_watchdog();
-	proc_dointvec(table, write, buffer, length, ppos);
-	if (nmi_watchdog_enabled)
-		for_each_online_cpu(cpu)
-			perf_event_enable(per_cpu(nmi_watchdog_ev, cpu));
-	else
-		for_each_online_cpu(cpu)
-			perf_event_disable(per_cpu(nmi_watchdog_ev, cpu));
-	return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
 struct perf_event_attr wd_attr = {
 	.type = PERF_TYPE_HARDWARE,
 	.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -146,6 +102,85 @@ void wd_overflow(struct perf_event *event, int nmi,
 	return;
 }
 
+static int enable_nmi_watchdog(int cpu)
+{
+	struct perf_event *event;
+
+	event = per_cpu(nmi_watchdog_ev, cpu);
+	if (event && event->state > PERF_EVENT_STATE_OFF)
+		return 0;
+
+	if (event == NULL) {
+		/* Try to register using hardware perf events first */
+		wd_attr.sample_period = hw_nmi_get_sample_period();
+		event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow);
+		if (IS_ERR(event)) {
+			wd_attr.type = PERF_TYPE_SOFTWARE;
+			event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow);
+			if (IS_ERR(event)) {
+				printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", cpu, event);
+				return -1;
+			}
+		}
+		per_cpu(nmi_watchdog_ev, cpu) = event;
+	}
+	perf_event_enable(per_cpu(nmi_watchdog_ev, cpu));
+	return 0;
+}
+
+static void disable_nmi_watchdog(int cpu)
+{
+	struct perf_event *event;
+
+	event = per_cpu(nmi_watchdog_ev, cpu);
+	if (event) {
+		perf_event_disable(per_cpu(nmi_watchdog_ev, cpu));
+		per_cpu(nmi_watchdog_ev, cpu) = NULL;
+		perf_event_release_kernel(event);
+	}
+}
+
+#ifdef CONFIG_SYSCTL
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+int nmi_watchdog_enabled;
+
+int proc_nmi_enabled(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int cpu;
+
+	if (!write) {
+		struct perf_event *event;
+		for_each_online_cpu(cpu) {
+			event = per_cpu(nmi_watchdog_ev, cpu);
+			if (event && event->state > PERF_EVENT_STATE_OFF) {
+				nmi_watchdog_enabled = 1;
+				break;
+			}
+		}
+		proc_dointvec(table, write, buffer, length, ppos);
+		return 0;
+	}
+
+	touch_all_nmi_watchdog();
+	proc_dointvec(table, write, buffer, length, ppos);
+	if (nmi_watchdog_enabled) {
+		for_each_online_cpu(cpu)
+			if (enable_nmi_watchdog(cpu)) {
+				printk("NMI watchdog failed configuration, "
+					" can not be enabled\n");
+			}
+	} else {
+		for_each_online_cpu(cpu)
+			disable_nmi_watchdog(cpu);
+	}
+	return 0;
+}
+
+#endif /* CONFIG_SYSCTL */
+
 /*
  * Create/destroy watchdog threads as CPUs come and go:
  */
@@ -153,7 +188,6 @@ static int __cpuinit
 cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	int hotcpu = (unsigned long)hcpu;
-	struct perf_event *event;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
@@ -162,29 +196,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		/* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
-		wd_attr.sample_period = hw_nmi_get_sample_period();
-		event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
-		if (IS_ERR(event)) {
-			wd_attr.type = PERF_TYPE_SOFTWARE;
-			event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
-			if (IS_ERR(event)) {
-				printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
-				return NOTIFY_BAD;
-			}
-		}
-		per_cpu(nmi_watchdog_ev, hotcpu) = event;
-		perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu));
+		if (enable_nmi_watchdog(hotcpu))
+			return NOTIFY_BAD;
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
-		perf_event_disable(per_cpu(nmi_watchdog_ev, hotcpu));
+		disable_nmi_watchdog(hotcpu);
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		event = per_cpu(nmi_watchdog_ev, hotcpu);
-		per_cpu(nmi_watchdog_ev, hotcpu) = NULL;
-		perf_event_release_kernel(event);
 		break;
 #endif /* CONFIG_HOTPLUG_CPU */
 	}
-- 
cgit v1.2.3


From 96ca4028aca0638d0e11dcbfabc4283054072933 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 16 Feb 2010 17:02:25 -0500
Subject: nmi_watchdog: Properly configure for software events

Paul Mackerras brought up a good point that when fallbacking to
software events, I may have been lucky in my configuration.

Modified the code to explicit provide a new configuration for
software events.

Suggested-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1266357745-26671-1-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/nmi_watchdog.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 633b230c2319..3c75cbf3acb8 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -61,7 +61,7 @@ static int __init setup_nmi_watchdog(char *str)
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
-struct perf_event_attr wd_attr = {
+struct perf_event_attr wd_hw_attr = {
 	.type = PERF_TYPE_HARDWARE,
 	.config = PERF_COUNT_HW_CPU_CYCLES,
 	.size = sizeof(struct perf_event_attr),
@@ -69,6 +69,14 @@ struct perf_event_attr wd_attr = {
 	.disabled = 1,
 };
 
+struct perf_event_attr wd_sw_attr = {
+	.type = PERF_TYPE_SOFTWARE,
+	.config = PERF_COUNT_SW_CPU_CLOCK,
+	.size = sizeof(struct perf_event_attr),
+	.pinned = 1,
+	.disabled = 1,
+};
+
 void wd_overflow(struct perf_event *event, int nmi,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
@@ -105,6 +113,7 @@ void wd_overflow(struct perf_event *event, int nmi,
 static int enable_nmi_watchdog(int cpu)
 {
 	struct perf_event *event;
+	struct perf_event_attr *wd_attr;
 
 	event = per_cpu(nmi_watchdog_ev, cpu);
 	if (event && event->state > PERF_EVENT_STATE_OFF)
@@ -112,11 +121,15 @@ static int enable_nmi_watchdog(int cpu)
 
 	if (event == NULL) {
 		/* Try to register using hardware perf events first */
-		wd_attr.sample_period = hw_nmi_get_sample_period();
-		event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow);
+		wd_attr = &wd_hw_attr;
+		wd_attr->sample_period = hw_nmi_get_sample_period();
+		event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
 		if (IS_ERR(event)) {
-			wd_attr.type = PERF_TYPE_SOFTWARE;
-			event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow);
+			/* hardware doesn't exist or not supported, fallback to software events */
+			printk("nmi_watchdog: hardware not available, trying software events\n");
+			wd_attr = &wd_sw_attr;
+			wd_attr->sample_period = NSEC_PER_SEC;
+			event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
 			if (IS_ERR(event)) {
 				printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", cpu, event);
 				return -1;
-- 
cgit v1.2.3


From 47195d57636604ff6048b0d7aa3e4ed9643f6073 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Mon, 22 Feb 2010 18:09:03 -0500
Subject: nmi_watchdog: Clean up various small details

Mostly copy/paste whitespace damage with a couple of nitpicks by
the checkpatch script. Fix the struct definition as requested by Ingo too.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
--
 arch/x86/kernel/apic/hw_nmi.c |   14 +++++------
 arch/x86/kernel/traps.c       |    6 ++--
 include/linux/nmi.h           |    2 -
 kernel/nmi_watchdog.c         |   51 ++++++++++++++++++++----------------------
 4 files changed, 36 insertions(+), 37 deletions(-)
---
 kernel/nmi_watchdog.c | 51 +++++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 3c75cbf3acb8..0a6f57f537a7 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -50,31 +50,31 @@ void touch_all_nmi_watchdog(void)
 
 static int __init setup_nmi_watchdog(char *str)
 {
-        if (!strncmp(str, "panic", 5)) {
-                panic_on_timeout = 1;
-                str = strchr(str, ',');
-                if (!str)
-                        return 1;
-                ++str;
-        }
-        return 1;
+	if (!strncmp(str, "panic", 5)) {
+		panic_on_timeout = 1;
+		str = strchr(str, ',');
+		if (!str)
+			return 1;
+		++str;
+	}
+	return 1;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
 struct perf_event_attr wd_hw_attr = {
-	.type = PERF_TYPE_HARDWARE,
-	.config = PERF_COUNT_HW_CPU_CYCLES,
-	.size = sizeof(struct perf_event_attr),
-	.pinned = 1,
-	.disabled = 1,
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
 };
 
 struct perf_event_attr wd_sw_attr = {
-	.type = PERF_TYPE_SOFTWARE,
-	.config = PERF_COUNT_SW_CPU_CLOCK,
-	.size = sizeof(struct perf_event_attr),
-	.pinned = 1,
-	.disabled = 1,
+	.type		= PERF_TYPE_SOFTWARE,
+	.config		= PERF_COUNT_SW_CPU_CLOCK,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
 };
 
 void wd_overflow(struct perf_event *event, int nmi,
@@ -95,16 +95,15 @@ void wd_overflow(struct perf_event *event, int nmi,
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 */
-		per_cpu(alert_counter,cpu) += 1;
-		if (per_cpu(alert_counter,cpu) == 5) {
-			if (panic_on_timeout) {
+		per_cpu(alert_counter, cpu) += 1;
+		if (per_cpu(alert_counter, cpu) == 5) {
+			if (panic_on_timeout)
 				panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
-			} else {
+			else
 				WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
-			}
 		}
 	} else {
-		per_cpu(alert_counter,cpu) = 0;
+		per_cpu(alert_counter, cpu) = 0;
 	}
 
 	return;
@@ -126,7 +125,7 @@ static int enable_nmi_watchdog(int cpu)
 		event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
 		if (IS_ERR(event)) {
 			/* hardware doesn't exist or not supported, fallback to software events */
-			printk("nmi_watchdog: hardware not available, trying software events\n");
+			printk(KERN_INFO "nmi_watchdog: hardware not available, trying software events\n");
 			wd_attr = &wd_sw_attr;
 			wd_attr->sample_period = NSEC_PER_SEC;
 			event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
@@ -182,7 +181,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write,
 	if (nmi_watchdog_enabled) {
 		for_each_online_cpu(cpu)
 			if (enable_nmi_watchdog(cpu)) {
-				printk("NMI watchdog failed configuration, "
+				printk(KERN_ERR "NMI watchdog failed configuration, "
 					" can not be enabled\n");
 			}
 	} else {
-- 
cgit v1.2.3


From 5671a10e2bc7f99d9157c6044faf8be2ef302361 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Mar 2010 14:20:14 +0100
Subject: nmi_watchdog: Tell the world we're active

Because I was wondering why perf stat wasn't working as expected..

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Don Zickus <dzickus@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/nmi_watchdog.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 0a6f57f537a7..a79d211c30df 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -244,6 +244,8 @@ static int __init spawn_nmi_watchdog_task(void)
 	if (nonmi_watchdog)
 		return 0;
 
+	printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
+
 	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	if (err == NOTIFY_BAD) {
 		BUG();
-- 
cgit v1.2.3


From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 7 May 2010 17:11:44 -0400
Subject: lockup_detector: Combine nmi_watchdog and softlockup detector

The new nmi_watchdog (which uses the perf event subsystem) is very
similar in structure to the softlockup detector.  Using Ingo's
suggestion, I combined the two functionalities into one file:
kernel/watchdog.c.

Now both the nmi_watchdog (or hardlockup detector) and softlockup
detector sit on top of the perf event subsystem, which is run every
60 seconds or so to see if there are any lockups.

To detect hardlockups, cpus not responding to interrupts, I
implemented an hrtimer that runs 5 times for every perf event
overflow event.  If that stops counting on a cpu, then the cpu is
most likely in trouble.

To detect softlockups, tasks not yielding to the scheduler, I used the
previous kthread idea that now gets kicked every time the hrtimer fires.
If the kthread isn't being scheduled neither is anyone else and the
warning is printed to the console.

I tested this on x86_64 and both the softlockup and hardlockup paths
work.

V2:
- cleaned up the Kconfig and softlockup combination
- surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI
- seperated out the softlockup case from perf event subsystem
- re-arranged the enabling/disabling nmi watchdog from proc space
- added cpumasks for hardlockup failure cases
- removed fallback to soft events if no PMU exists for hard events

V3:
- comment cleanups
- drop support for older softlockup code
- per_cpu cleanups
- completely remove software clock base hardlockup detector
- use per_cpu masking on hard/soft lockup detection
- #ifdef cleanups
- rename config option NMI_WATCHDOG to LOCKUP_DETECTOR
- documentation additions

V4:
- documentation fixes
- convert per_cpu to __get_cpu_var
- powerpc compile fixes

V5:
- split apart warn flags for hard and soft lockups

TODO:
- figure out how to make an arch-agnostic clock2cycles call
  (if possible) to feed into perf events as a sample period

[fweisbec: merged conflict patch]

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/Makefile   |   3 +-
 kernel/sysctl.c   |  21 +-
 kernel/watchdog.c | 592 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 608 insertions(+), 8 deletions(-)
 create mode 100644 kernel/watchdog.c

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index d5c30060ac14..6adeafc3e259 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,9 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
-obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a38af430f0d8..0f9adda85f97 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -74,7 +74,7 @@
 #include <scsi/sg.h>
 #endif
 
-#ifdef CONFIG_NMI_WATCHDOG
+#ifdef CONFIG_LOCKUP_DETECTOR
 #include <linux/nmi.h>
 #endif
 
@@ -686,16 +686,25 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
-#if defined(CONFIG_NMI_WATCHDOG)
+#if defined(CONFIG_LOCKUP_DETECTOR)
 	{
-		.procname       = "nmi_watchdog",
-		.data           = &nmi_watchdog_enabled,
+		.procname       = "watchdog",
+		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_nmi_enabled,
+		.proc_handler   = proc_dowatchdog_enabled,
+	},
+	{
+		.procname	= "watchdog_thresh",
+		.data		= &softlockup_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dowatchdog_thresh,
+		.extra1		= &neg_one,
+		.extra2		= &sixty,
 	},
 #endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
 	{
 		.procname       = "unknown_nmi_panic",
 		.data           = &unknown_nmi_panic,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
new file mode 100644
index 000000000000..6b7fad8497af
--- /dev/null
+++ b/kernel/watchdog.c
@@ -0,0 +1,592 @@
+/*
+ * Detect hard and soft lockups on a system
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * this code detects hard lockups: incidents in where on a CPU
+ * the kernel does not respond to anything except NMI.
+ *
+ * Note: Most of this code is borrowed heavily from softlockup.c,
+ * so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * to those contributors as well.
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+#include <asm/irq_regs.h>
+#include <linux/perf_event.h>
+
+int watchdog_enabled;
+int __read_mostly softlockup_thresh = 60;
+
+static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
+static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
+static DEFINE_PER_CPU(bool, softlockup_touch_sync);
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, soft_watchdog_warn);
+#ifdef CONFIG_PERF_EVENTS_NMI
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+#endif
+
+static int __read_mostly did_panic;
+static int __initdata no_watchdog;
+
+
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+#ifdef CONFIG_PERF_EVENTS_NMI
+static int hardlockup_panic;
+
+static int __init hardlockup_panic_setup(char *str)
+{
+	if (!strncmp(str, "panic", 5))
+		hardlockup_panic = 1;
+	return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+#endif
+
+unsigned int __read_mostly softlockup_panic =
+			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+	softlockup_panic = simple_strtoul(str, NULL, 0);
+
+	return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
+
+static int __init nowatchdog_setup(char *str)
+{
+	no_watchdog = 1;
+	return 1;
+}
+__setup("nowatchdog", nowatchdog_setup);
+
+/* deprecated */
+static int __init nosoftlockup_setup(char *str)
+{
+	no_watchdog = 1;
+	return 1;
+}
+__setup("nosoftlockup", nosoftlockup_setup);
+/*  */
+
+
+/*
+ * Returns seconds, approximately.  We don't need nanosecond
+ * resolution, and we don't need to waste time with a big divide when
+ * 2^30ns == 1.074s.
+ */
+static unsigned long get_timestamp(int this_cpu)
+{
+	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
+}
+
+static unsigned long get_sample_period(void)
+{
+	/*
+	 * convert softlockup_thresh from seconds to ns
+	 * the divide by 5 is to give hrtimer 5 chances to
+	 * increment before the hardlockup detector generates
+	 * a warning
+	 */
+	return softlockup_thresh / 5 * NSEC_PER_SEC;
+}
+
+/* Commands for resetting the watchdog */
+static void __touch_watchdog(void)
+{
+	int this_cpu = raw_smp_processor_id();
+
+	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
+}
+
+void touch_watchdog(void)
+{
+	__get_cpu_var(watchdog_touch_ts) = 0;
+}
+EXPORT_SYMBOL(touch_watchdog);
+
+void touch_all_watchdog(void)
+{
+	int cpu;
+
+	/*
+	 * this is done lockless
+	 * do we care if a 0 races with a timestamp?
+	 * all it means is the softlock check starts one cycle later
+	 */
+	for_each_online_cpu(cpu)
+		per_cpu(watchdog_touch_ts, cpu) = 0;
+}
+
+void touch_nmi_watchdog(void)
+{
+	touch_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+void touch_all_nmi_watchdog(void)
+{
+	touch_all_watchdog();
+}
+
+void touch_softlockup_watchdog(void)
+{
+	touch_watchdog();
+}
+
+void touch_all_softlockup_watchdogs(void)
+{
+	touch_all_watchdog();
+}
+
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlockup_touch_sync) = true;
+	__raw_get_cpu_var(watchdog_touch_ts) = 0;
+}
+
+void softlockup_tick(void)
+{
+}
+
+#ifdef CONFIG_PERF_EVENTS_NMI
+/* watchdog detector functions */
+static int is_hardlockup(int cpu)
+{
+	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+		return 1;
+
+	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+	return 0;
+}
+#endif
+
+static int is_softlockup(unsigned long touch_ts, int cpu)
+{
+	unsigned long now = get_timestamp(cpu);
+
+	/* Warn about unreasonable delays: */
+	if (time_after(now, touch_ts + softlockup_thresh))
+		return now - touch_ts;
+
+	return 0;
+}
+
+static int
+watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	did_panic = 1;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+	.notifier_call = watchdog_panic,
+};
+
+#ifdef CONFIG_PERF_EVENTS_NMI
+static struct perf_event_attr wd_hw_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
+};
+
+/* Callback function for perf event subsystem */
+void watchdog_overflow_callback(struct perf_event *event, int nmi,
+		 struct perf_sample_data *data,
+		 struct pt_regs *regs)
+{
+	int this_cpu = smp_processor_id();
+	unsigned long touch_ts = per_cpu(watchdog_touch_ts, this_cpu);
+
+	if (touch_ts == 0) {
+		__touch_watchdog();
+		return;
+	}
+
+	/* check for a hardlockup
+	 * This is done by making sure our timer interrupt
+	 * is incrementing.  The timer interrupt should have
+	 * fired multiple times before we overflow'd.  If it hasn't
+	 * then this is a good indication the cpu is stuck
+	 */
+	if (is_hardlockup(this_cpu)) {
+		/* only print hardlockups once */
+		if (__get_cpu_var(hard_watchdog_warn) == true)
+			return;
+
+		if (hardlockup_panic)
+			panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+		else
+			WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+
+		__get_cpu_var(hard_watchdog_warn) = true;
+		return;
+	}
+
+	__get_cpu_var(hard_watchdog_warn) = false;
+	return;
+}
+static void watchdog_interrupt_count(void)
+{
+	__get_cpu_var(hrtimer_interrupts)++;
+}
+#else
+static inline void watchdog_interrupt_count(void) { return; }
+#endif /* CONFIG_PERF_EVENTS_NMI */
+
+/* watchdog kicker functions */
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
+{
+	int this_cpu = smp_processor_id();
+	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
+	struct pt_regs *regs = get_irq_regs();
+	int duration;
+
+	/* kick the hardlockup detector */
+	watchdog_interrupt_count();
+
+	/* kick the softlockup detector */
+	wake_up_process(__get_cpu_var(softlockup_watchdog));
+
+	/* .. and repeat */
+	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+
+	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlockup_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
+		__touch_watchdog();
+		return HRTIMER_RESTART;
+	}
+
+	/* check for a softlockup
+	 * This is done by making sure a high priority task is
+	 * being scheduled.  The task touches the watchdog to
+	 * indicate it is getting cpu time.  If it hasn't then
+	 * this is a good indication some task is hogging the cpu
+	 */
+	duration = is_softlockup(touch_ts, this_cpu);
+	if (unlikely(duration)) {
+		/* only warn once */
+		if (__get_cpu_var(soft_watchdog_warn) == true)
+			return HRTIMER_RESTART;
+
+		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+			this_cpu, duration,
+			current->comm, task_pid_nr(current));
+		print_modules();
+		print_irqtrace_events(current);
+		if (regs)
+			show_regs(regs);
+		else
+			dump_stack();
+
+		if (softlockup_panic)
+			panic("softlockup: hung tasks");
+		__get_cpu_var(soft_watchdog_warn) = true;
+	} else
+		__get_cpu_var(soft_watchdog_warn) = false;
+
+	return HRTIMER_RESTART;
+}
+
+
+/*
+ * The watchdog thread - touches the timestamp.
+ */
+static int watchdog(void *__bind_cpu)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
+
+	sched_setscheduler(current, SCHED_FIFO, &param);
+
+	/* initialize timestamp */
+	__touch_watchdog();
+
+	/* kick off the timer for the hardlockup detector */
+	/* done here because hrtimer_start can only pin to smp_processor_id() */
+	hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
+		      HRTIMER_MODE_REL_PINNED);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	/*
+	 * Run briefly once per second to reset the softlockup timestamp.
+	 * If this gets delayed for more than 60 seconds then the
+	 * debug-printout triggers in softlockup_tick().
+	 */
+	while (!kthread_should_stop()) {
+		__touch_watchdog();
+		schedule();
+
+		if (kthread_should_stop())
+			break;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_PERF_EVENTS_NMI
+static int watchdog_nmi_enable(int cpu)
+{
+	struct perf_event_attr *wd_attr;
+	struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+	/* is it already setup and enabled? */
+	if (event && event->state > PERF_EVENT_STATE_OFF)
+		goto out;
+
+	/* it is setup but not enabled */
+	if (event != NULL)
+		goto out_enable;
+
+	/* Try to register using hardware perf events */
+	wd_attr = &wd_hw_attr;
+	wd_attr->sample_period = hw_nmi_get_sample_period();
+	event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
+	if (!IS_ERR(event)) {
+		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
+		goto out_save;
+	}
+
+	printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
+	return -1;
+
+	/* success path */
+out_save:
+	per_cpu(watchdog_ev, cpu) = event;
+out_enable:
+	perf_event_enable(per_cpu(watchdog_ev, cpu));
+out:
+	return 0;
+}
+
+static void watchdog_nmi_disable(int cpu)
+{
+	struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+	if (event) {
+		perf_event_disable(event);
+		per_cpu(watchdog_ev, cpu) = NULL;
+
+		/* should be in cleanup, but blocks oprofile */
+		perf_event_release_kernel(event);
+	}
+	return;
+}
+#else
+static int watchdog_nmi_enable(int cpu) { return 0; }
+static void watchdog_nmi_disable(int cpu) { return; }
+#endif /* CONFIG_PERF_EVENTS_NMI */
+
+/* prepare/enable/disable routines */
+static int watchdog_prepare_cpu(int cpu)
+{
+	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
+
+	WARN_ON(per_cpu(softlockup_watchdog, cpu));
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer->function = watchdog_timer_fn;
+
+	return 0;
+}
+
+static int watchdog_enable(int cpu)
+{
+	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+
+	/* enable the perf event */
+	if (watchdog_nmi_enable(cpu) != 0)
+		return -1;
+
+	/* create the watchdog thread */
+	if (!p) {
+		p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
+		if (IS_ERR(p)) {
+			printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
+			return -1;
+		}
+		kthread_bind(p, cpu);
+		per_cpu(watchdog_touch_ts, cpu) = 0;
+		per_cpu(softlockup_watchdog, cpu) = p;
+		wake_up_process(p);
+	}
+
+	return 0;
+}
+
+static void watchdog_disable(int cpu)
+{
+	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
+
+	/*
+	 * cancel the timer first to stop incrementing the stats
+	 * and waking up the kthread
+	 */
+	hrtimer_cancel(hrtimer);
+
+	/* disable the perf event */
+	watchdog_nmi_disable(cpu);
+
+	/* stop the watchdog thread */
+	if (p) {
+		per_cpu(softlockup_watchdog, cpu) = NULL;
+		kthread_stop(p);
+	}
+
+	/* if any cpu succeeds, watchdog is considered enabled for the system */
+	watchdog_enabled = 1;
+}
+
+static void watchdog_enable_all_cpus(void)
+{
+	int cpu;
+	int result;
+
+	for_each_online_cpu(cpu)
+		result += watchdog_enable(cpu);
+
+	if (result)
+		printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
+
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		watchdog_disable(cpu);
+
+	/* if all watchdogs are disabled, then they are disabled for the system */
+	watchdog_enabled = 0;
+}
+
+
+/* sysctl functions */
+#ifdef CONFIG_SYSCTL
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+
+int proc_dowatchdog_enabled(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_dointvec(table, write, buffer, length, ppos);
+
+	if (watchdog_enabled)
+		watchdog_enable_all_cpus();
+	else
+		watchdog_disable_all_cpus();
+	return 0;
+}
+
+int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+			     void __user *buffer,
+			     size_t *lenp, loff_t *ppos)
+{
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
+/* stub functions */
+int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
+			     void __user *buffer,
+			     size_t *lenp, loff_t *ppos)
+{
+	return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos);
+}
+/* end of stub functions */
+#endif /* CONFIG_SYSCTL */
+
+
+/*
+ * Create/destroy watchdog threads as CPUs come and go:
+ */
+static int __cpuinit
+cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		if (watchdog_prepare_cpu(hotcpu))
+			return NOTIFY_BAD;
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		if (watchdog_enable(hotcpu))
+			return NOTIFY_BAD;
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		watchdog_disable(hotcpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		watchdog_disable(hotcpu);
+		break;
+#endif /* CONFIG_HOTPLUG_CPU */
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cpu_nfb = {
+	.notifier_call = cpu_callback
+};
+
+static int __init spawn_watchdog_task(void)
+{
+	void *cpu = (void *)(long)smp_processor_id();
+	int err;
+
+	if (no_watchdog)
+		return 0;
+
+	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+	WARN_ON(err == NOTIFY_BAD);
+
+	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+	register_cpu_notifier(&cpu_nfb);
+
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
+	return 0;
+}
+early_initcall(spawn_watchdog_task);
-- 
cgit v1.2.3


From 332fbdbca3f7716c5620970755ae054d213bcc4e Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 7 May 2010 17:11:45 -0400
Subject: lockup_detector: Touch_softlockup cleanups and softlockup_tick
 removal

Just some code cleanup to make touch_softlockup clearer and remove the
softlockup_tick function as it is no longer needed.

Also remove the /proc softlockup_thres call as it has been changed to
watchdog_thres.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <1273266711-18706-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/sysctl.c   |  9 ---------
 kernel/timer.c    |  1 -
 kernel/watchdog.c | 35 +++--------------------------------
 3 files changed, 3 insertions(+), 42 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f9adda85f97..999bc3fccf47 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -817,15 +817,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-	{
-		.procname	= "softlockup_thresh",
-		.data		= &softlockup_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dosoftlockup_thresh,
-		.extra1		= &neg_one,
-		.extra2		= &sixty,
-	},
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 	{
diff --git a/kernel/timer.c b/kernel/timer.c
index aeb6a54f2771..e8de5eb07a02 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1225,7 +1225,6 @@ void run_local_timers(void)
 {
 	hrtimer_run_queues();
 	raise_softirq(TIMER_SOFTIRQ);
-	softlockup_tick();
 }
 
 /*
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6b7fad8497af..f1541b7e3244 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -119,13 +119,12 @@ static void __touch_watchdog(void)
 	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
 }
 
-void touch_watchdog(void)
+void touch_softlockup_watchdog(void)
 {
 	__get_cpu_var(watchdog_touch_ts) = 0;
 }
-EXPORT_SYMBOL(touch_watchdog);
 
-void touch_all_watchdog(void)
+void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
 
@@ -140,35 +139,16 @@ void touch_all_watchdog(void)
 
 void touch_nmi_watchdog(void)
 {
-	touch_watchdog();
+	touch_softlockup_watchdog();
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
-void touch_all_nmi_watchdog(void)
-{
-	touch_all_watchdog();
-}
-
-void touch_softlockup_watchdog(void)
-{
-	touch_watchdog();
-}
-
-void touch_all_softlockup_watchdogs(void)
-{
-	touch_all_watchdog();
-}
-
 void touch_softlockup_watchdog_sync(void)
 {
 	__raw_get_cpu_var(softlockup_touch_sync) = true;
 	__raw_get_cpu_var(watchdog_touch_ts) = 0;
 }
 
-void softlockup_tick(void)
-{
-}
-
 #ifdef CONFIG_PERF_EVENTS_NMI
 /* watchdog detector functions */
 static int is_hardlockup(int cpu)
@@ -522,15 +502,6 @@ int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 {
 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
-
-/* stub functions */
-int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos);
-}
-/* end of stub functions */
 #endif /* CONFIG_SYSCTL */
 
 
-- 
cgit v1.2.3


From 2508ce1845a3b256798532b2c6b7997c2dc6533b Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 7 May 2010 17:11:46 -0400
Subject: lockup_detector: Remove old softlockup code

Now that is no longer compiled or used, just remove it.

Also move some of the code wrapped with DETECT_SOFTLOCKUP to the
LOCKUP_DETECTOR wrappers because that is the code that uses it now.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <1273266711-18706-4-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/softlockup.c | 293 ----------------------------------------------------
 kernel/sysctl.c     |  22 ++--
 2 files changed, 10 insertions(+), 305 deletions(-)
 delete mode 100644 kernel/softlockup.c

(limited to 'kernel')

diff --git a/kernel/softlockup.c b/kernel/softlockup.c
deleted file mode 100644
index 4b493f67dcb5..000000000000
--- a/kernel/softlockup.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Detect Soft Lockups
- *
- * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
- *
- * this code detects soft lockups: incidents in where on a CPU
- * the kernel does not reschedule for 10 seconds or more.
- */
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/nmi.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/lockdep.h>
-#include <linux/notifier.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-
-#include <asm/irq_regs.h>
-
-static DEFINE_SPINLOCK(print_lock);
-
-static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
-static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
-static DEFINE_PER_CPU(bool, softlock_touch_sync);
-
-static int __read_mostly did_panic;
-int __read_mostly softlockup_thresh = 60;
-
-/*
- * Should we panic (and reboot, if panic_timeout= is set) when a
- * soft-lockup occurs:
- */
-unsigned int __read_mostly softlockup_panic =
-				CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
-
-static int __init softlockup_panic_setup(char *str)
-{
-	softlockup_panic = simple_strtoul(str, NULL, 0);
-
-	return 1;
-}
-__setup("softlockup_panic=", softlockup_panic_setup);
-
-static int
-softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	did_panic = 1;
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
-	.notifier_call = softlock_panic,
-};
-
-/*
- * Returns seconds, approximately.  We don't need nanosecond
- * resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
- */
-static unsigned long get_timestamp(int this_cpu)
-{
-	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
-}
-
-static void __touch_softlockup_watchdog(void)
-{
-	int this_cpu = raw_smp_processor_id();
-
-	__raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
-}
-
-void touch_softlockup_watchdog(void)
-{
-	__raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-EXPORT_SYMBOL(touch_softlockup_watchdog);
-
-void touch_softlockup_watchdog_sync(void)
-{
-	__raw_get_cpu_var(softlock_touch_sync) = true;
-	__raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-
-void touch_all_softlockup_watchdogs(void)
-{
-	int cpu;
-
-	/* Cause each CPU to re-update its timestamp rather than complain */
-	for_each_online_cpu(cpu)
-		per_cpu(softlockup_touch_ts, cpu) = 0;
-}
-EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
-
-int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	touch_all_softlockup_watchdogs();
-	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-
-/*
- * This callback runs from the timer interrupt, and checks
- * whether the watchdog thread has hung or not:
- */
-void softlockup_tick(void)
-{
-	int this_cpu = smp_processor_id();
-	unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
-	unsigned long print_ts;
-	struct pt_regs *regs = get_irq_regs();
-	unsigned long now;
-
-	/* Is detection switched off? */
-	if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
-		/* Be sure we don't false trigger if switched back on */
-		if (touch_ts)
-			per_cpu(softlockup_touch_ts, this_cpu) = 0;
-		return;
-	}
-
-	if (touch_ts == 0) {
-		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
-			/*
-			 * If the time stamp was touched atomically
-			 * make sure the scheduler tick is up to date.
-			 */
-			per_cpu(softlock_touch_sync, this_cpu) = false;
-			sched_clock_tick();
-		}
-		__touch_softlockup_watchdog();
-		return;
-	}
-
-	print_ts = per_cpu(softlockup_print_ts, this_cpu);
-
-	/* report at most once a second */
-	if (print_ts == touch_ts || did_panic)
-		return;
-
-	/* do not print during early bootup: */
-	if (unlikely(system_state != SYSTEM_RUNNING)) {
-		__touch_softlockup_watchdog();
-		return;
-	}
-
-	now = get_timestamp(this_cpu);
-
-	/*
-	 * Wake up the high-prio watchdog task twice per
-	 * threshold timespan.
-	 */
-	if (time_after(now - softlockup_thresh/2, touch_ts))
-		wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
-
-	/* Warn about unreasonable delays: */
-	if (time_before_eq(now - softlockup_thresh, touch_ts))
-		return;
-
-	per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
-
-	spin_lock(&print_lock);
-	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
-			this_cpu, now - touch_ts,
-			current->comm, task_pid_nr(current));
-	print_modules();
-	print_irqtrace_events(current);
-	if (regs)
-		show_regs(regs);
-	else
-		dump_stack();
-	spin_unlock(&print_lock);
-
-	if (softlockup_panic)
-		panic("softlockup: hung tasks");
-}
-
-/*
- * The watchdog thread - runs every second and touches the timestamp.
- */
-static int watchdog(void *__bind_cpu)
-{
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
-	sched_setscheduler(current, SCHED_FIFO, &param);
-
-	/* initialize timestamp */
-	__touch_softlockup_watchdog();
-
-	set_current_state(TASK_INTERRUPTIBLE);
-	/*
-	 * Run briefly once per second to reset the softlockup timestamp.
-	 * If this gets delayed for more than 60 seconds then the
-	 * debug-printout triggers in softlockup_tick().
-	 */
-	while (!kthread_should_stop()) {
-		__touch_softlockup_watchdog();
-		schedule();
-
-		if (kthread_should_stop())
-			break;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-	__set_current_state(TASK_RUNNING);
-
-	return 0;
-}
-
-/*
- * Create/destroy watchdog threads as CPUs come and go:
- */
-static int __cpuinit
-cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-	int hotcpu = (unsigned long)hcpu;
-	struct task_struct *p;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
-		p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
-		if (IS_ERR(p)) {
-			printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
-			return NOTIFY_BAD;
-		}
-		per_cpu(softlockup_touch_ts, hotcpu) = 0;
-		per_cpu(softlockup_watchdog, hotcpu) = p;
-		kthread_bind(p, hotcpu);
-		break;
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		if (!per_cpu(softlockup_watchdog, hotcpu))
-			break;
-		/* Unbind so it can run.  Fall thru. */
-		kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
-			     cpumask_any(cpu_online_mask));
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		p = per_cpu(softlockup_watchdog, hotcpu);
-		per_cpu(softlockup_watchdog, hotcpu) = NULL;
-		kthread_stop(p);
-		break;
-#endif /* CONFIG_HOTPLUG_CPU */
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cpu_nfb = {
-	.notifier_call = cpu_callback
-};
-
-static int __initdata nosoftlockup;
-
-static int __init nosoftlockup_setup(char *str)
-{
-	nosoftlockup = 1;
-	return 1;
-}
-__setup("nosoftlockup", nosoftlockup_setup);
-
-static int __init spawn_softlockup_task(void)
-{
-	void *cpu = (void *)(long)smp_processor_id();
-	int err;
-
-	if (nosoftlockup)
-		return 0;
-
-	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
-	if (err == NOTIFY_BAD) {
-		BUG();
-		return 1;
-	}
-	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
-	register_cpu_notifier(&cpu_nfb);
-
-	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
-	return 0;
-}
-early_initcall(spawn_softlockup_task);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 999bc3fccf47..04bcd8abe09c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -108,7 +108,7 @@ extern int blk_iopoll_enabled;
 #endif
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
 static int neg_one = -1;
 #endif
@@ -703,6 +703,15 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &neg_one,
 		.extra2		= &sixty,
 	},
+	{
+		.procname	= "softlockup_panic",
+		.data		= &softlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
 	{
@@ -807,17 +816,6 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-	{
-		.procname	= "softlockup_panic",
-		.data		= &softlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
-	},
-#endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 	{
 		.procname	= "hung_task_panic",
-- 
cgit v1.2.3


From f69bcf60c3f17aa367e16eef7bc6ab001ea6d58a Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 7 May 2010 17:11:47 -0400
Subject: lockup_detector: Remove nmi_watchdog.c file

This file migrated to kernel/watchdog.c and then combined with
kernel/softlockup.c.  As a result kernel/nmi_watchdog.c is no longer
needed.  Just remove it.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <1273266711-18706-5-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/nmi_watchdog.c | 259 --------------------------------------------------
 1 file changed, 259 deletions(-)
 delete mode 100644 kernel/nmi_watchdog.c

(limited to 'kernel')

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
deleted file mode 100644
index a79d211c30df..000000000000
--- a/kernel/nmi_watchdog.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Detect Hard Lockups using the NMI
- *
- * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
- *
- * this code detects hard lockups: incidents in where on a CPU
- * the kernel does not respond to anything except NMI.
- *
- * Note: Most of this code is borrowed heavily from softlockup.c,
- * so thanks to Ingo for the initial implementation.
- * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
- * to those contributors as well.
- */
-
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/nmi.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/lockdep.h>
-#include <linux/notifier.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-
-#include <asm/irq_regs.h>
-#include <linux/perf_event.h>
-
-static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
-static DEFINE_PER_CPU(int, nmi_watchdog_touch);
-static DEFINE_PER_CPU(long, alert_counter);
-
-static int panic_on_timeout;
-
-void touch_nmi_watchdog(void)
-{
-	__raw_get_cpu_var(nmi_watchdog_touch) = 1;
-	touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-void touch_all_nmi_watchdog(void)
-{
-	int cpu;
-
-	for_each