#include <linux/config.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sysdev.h>
+#include <linux/sysctl.h>
#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
+#include <asm/div64.h>
#include <asm/nmi.h>
+#include "mach_traps.h"
+
unsigned int nmi_watchdog = NMI_NONE;
+extern int unknown_nmi_panic;
static unsigned int nmi_hz = HZ;
-unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
+static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
+static unsigned int nmi_p4_cccr_val;
extern void show_registers(struct pt_regs *regs);
/*
* This is maintained separately from nmi_active because the NMI
* watchdog may also be driven from the I/O APIC timer.
*/
-static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
static unsigned int lapic_nmi_owner;
#define LAPIC_NMI_WATCHDOG (1<<0)
#define LAPIC_NMI_RESERVED (1<<1)
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
#define P4_ESCR_OS (1<<3)
#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI (1<<26)
+#define P4_CCCR_OVF_PMI0 (1<<26)
+#define P4_CCCR_OVF_PMI1 (1<<27)
#define P4_CCCR_THRESHOLD(N) ((N)<<20)
#define P4_CCCR_COMPLEMENT (1<<19)
#define P4_CCCR_COMPARE (1<<18)
#define MSR_P4_IQ_COUNTER0 0x30C
#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
#define P4_NMI_IQ_CCCR0 \
- (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
+ (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-int __init check_nmi_watchdog (void)
+#ifdef CONFIG_SMP
+/* The performance counters used by NMI_LOCAL_APIC don't trigger when
+ * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ * CPUs during the test make them busy.
+ */
+static __init void nmi_cpu_busy(void *data)
+{
+ volatile int *endflag = data;
+ local_irq_enable();
+ /* Intentionally don't use cpu_relax here. This is
+ to make sure that the performance counter really ticks,
+ even if there is a simulator or similar that catches the
+ pause instruction. On a real HT machine this is fine because
+ all other CPUs are busy with "useless" delay loops and don't
+ care if they get somewhat less cycles. */
+ while (*endflag == 0)
+ barrier();
+}
+#endif
+
+static int __init check_nmi_watchdog(void)
{
- unsigned int prev_nmi_count[NR_CPUS];
+ volatile int endflag = 0;
+ unsigned int *prev_nmi_count;
int cpu;
- printk(KERN_INFO "testing NMI watchdog ... ");
+ if (nmi_watchdog == NMI_NONE)
+ return 0;
+
+ prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+ if (!prev_nmi_count)
+ return -1;
+
+ printk(KERN_INFO "Testing NMI watchdog ... ");
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
+ for_each_cpu(cpu)
+ prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
local_irq_enable();
mdelay((10*1000)/nmi_hz); // wait 10 ticks
- /* FIXME: Only boot CPU is online at this stage. Check CPUs
- as they come up. */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
- if (!cpu_online(cpu))
+#ifdef CONFIG_SMP
+ /* Check cpu_callin_map here because that is set
+ after the timer is started. */
+ if (!cpu_isset(cpu, cpu_callin_map))
continue;
+#endif
if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- printk("CPU#%d: NMI appears to be stuck!\n", cpu);
+ endflag = 1;
+ printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+ cpu,
+ prev_nmi_count[cpu],
+ nmi_count(cpu));
nmi_active = 0;
lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
+ kfree(prev_nmi_count);
return -1;
}
}
+ endflag = 1;
printk("OK.\n");
/* now that we know it works we can reduce NMI frequency to
if (nmi_watchdog == NMI_LOCAL_APIC)
nmi_hz = 1;
+ kfree(prev_nmi_count);
return 0;
}
+/* This needs to happen later in boot so counters are working */
+late_initcall(check_nmi_watchdog);
static int __init setup_nmi_watchdog(char *str)
{
wrmsr(MSR_P6_EVNTSEL0, 0, 0);
break;
case 15:
- if (boot_cpu_data.x86_model > 0x3)
+ if (boot_cpu_data.x86_model > 0x4)
break;
wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
static int nmi_pm_active; /* nmi_active before suspend */
-static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
+static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
nmi_pm_active = nmi_active;
disable_lapic_nmi_watchdog();
wrmsr(base+i, 0, 0);
}
+static void write_watchdog_counter(const char *descr)
+{
+ u64 count = (u64)cpu_khz * 1000;
+
+ do_div(count, nmi_hz);
+ if(descr)
+ Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ wrmsrl(nmi_perfctr_msr, 0 - count);
+}
+
static void setup_k7_watchdog(void)
{
unsigned int evntsel;
| K7_NMI_EVENT;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
- Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+ write_watchdog_counter("K7_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
| P6_NMI_EVENT;
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
- Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+ write_watchdog_counter("P6_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
return 0;
nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
+ nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
+#ifdef CONFIG_SMP
+ if (smp_num_siblings == 2)
+ nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+#endif
if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
clear_msr_range(0x3F1, 2);
/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
docs doesn't fully define it, so leave it alone for now. */
- clear_msr_range(0x3A0, 31);
+ if (boot_cpu_data.x86_model >= 0x3) {
+ /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
+ clear_msr_range(0x3A0, 26);
+ clear_msr_range(0x3BC, 3);
+ } else {
+ clear_msr_range(0x3A0, 31);
+ }
clear_msr_range(0x3C0, 6);
clear_msr_range(0x3C8, 6);
clear_msr_range(0x3E0, 2);
wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+ write_watchdog_counter("P4_IQ_COUNTER0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
return 1;
}
setup_p6_watchdog();
break;
case 15:
- if (boot_cpu_data.x86_model > 0x3)
+ if (boot_cpu_data.x86_model > 0x4)
return;
if (!setup_p4_watchdog())
nmi_active = 1;
}
-static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
-
/*
* the best way to detect whether a CPU has a 'hard lockup' problem
* is to check it's local APIC timer IRQ counts. If they are not
*/
for (i = 0; i < NR_CPUS; i++)
alert_counter[i] = 0;
+
+ /*
+ * Tickle the softlockup detector too:
+ */
+ touch_softlockup_watchdog();
}
+extern void die_nmi(struct pt_regs *, const char *msg);
+
void nmi_watchdog_tick (struct pt_regs * regs)
{
*/
int sum, cpu = smp_processor_id();
- sum = irq_stat[cpu].apic_timer_irqs;
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
if (last_irq_sums[cpu] == sum) {
/*
* wait a few IRQs (5 seconds) before doing the oops ...
*/
alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz) {
- spin_lock(&nmi_print_lock);
+ if (alert_counter[cpu] == 5*nmi_hz)
/*
- * We are in trouble anyway, lets at least try
- * to get a message out.
+ * die_nmi will return ONLY if NOTIFY_STOP happens..
*/
- bust_spinlocks(1);
- printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip);
- show_registers(regs);
- printk("console shuts up ...\n");
- console_silent();
- spin_unlock(&nmi_print_lock);
- bust_spinlocks(0);
- do_exit(SIGSEGV);
- }
+ die_nmi(regs, "NMI Watchdog detected LOCKUP");
} else {
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
* - LVTPC is masked on interrupt and must be
* unmasked by the LVTPC handler.
*/
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
* other P6 variant */
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+ write_watchdog_counter(NULL);
}
}
+#ifdef CONFIG_SYSCTL
+
+static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
+{
+ unsigned char reason = get_nmi_reason();
+ char buf[64];
+
+ if (!(reason & 0xc0)) {
+ sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+ die_nmi(regs, buf);
+ }
+ return 0;
+}
+
+/*
+ * proc handler for /proc/sys/kernel/unknown_nmi_panic
+ */
+int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ int old_state;
+
+ old_state = unknown_nmi_panic;
+ proc_dointvec(table, write, file, buffer, length, ppos);
+ if (!!old_state == !!unknown_nmi_panic)
+ return 0;
+
+ if (unknown_nmi_panic) {
+ if (reserve_lapic_nmi() < 0) {
+ unknown_nmi_panic = 0;
+ return -EBUSY;
+ } else {
+ set_nmi_callback(unknown_nmi_panic_callback);
+ }
+ } else {
+ release_lapic_nmi();
+ unset_nmi_callback();
+ }
+ return 0;
+}
+
+#endif
+
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
EXPORT_SYMBOL(reserve_lapic_nmi);