* Mikael Pettersson : PM converted to driver model. Disable/enable API.
*/
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
+#include <linux/percpu.h>
#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
#include <asm/nmi.h>
+#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-int __init check_nmi_watchdog (void)
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+#ifdef CONFIG_SMP
+/* The performance counters used by NMI_LOCAL_APIC don't trigger when
+ * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ * CPUs during the test make them busy.
+ */
+static __init void nmi_cpu_busy(void *data)
+{
+ volatile int *endflag = data;
+ local_irq_enable_in_hardirq();
+ /* Intentionally don't use cpu_relax here. This is
+ to make sure that the performance counter really ticks,
+ even if there is a simulator or similar that catches the
+ pause instruction. On a real HT machine this is fine because
+ all other CPUs are busy with "useless" delay loops and don't
+ care if they get somewhat less cycles. */
+ while (*endflag == 0)
+ barrier();
+}
+#endif
+
+static int __init check_nmi_watchdog(void)
{
- unsigned int prev_nmi_count[NR_CPUS];
+ volatile int endflag = 0;
+ unsigned int *prev_nmi_count;
int cpu;
- printk(KERN_INFO "testing NMI watchdog ... ");
+ if (nmi_watchdog == NMI_NONE)
+ return 0;
+
+ prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+ if (!prev_nmi_count)
+ return -1;
+
+ printk(KERN_INFO "Testing NMI watchdog ... ");
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
+ for_each_possible_cpu(cpu)
+ prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
local_irq_enable();
mdelay((10*1000)/nmi_hz); // wait 10 ticks
- /* FIXME: Only boot CPU is online at this stage. Check CPUs
- as they come up. */
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ for_each_possible_cpu(cpu) {
#ifdef CONFIG_SMP
/* Check cpu_callin_map here because that is set
after the timer is started. */
continue;
#endif
if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- printk("CPU#%d: NMI appears to be stuck!\n", cpu);
+ endflag = 1;
+ printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+ cpu,
+ prev_nmi_count[cpu],
+ nmi_count(cpu));
nmi_active = 0;
lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
+ kfree(prev_nmi_count);
return -1;
}
}
+ endflag = 1;
printk("OK.\n");
/* now that we know it works we can reduce NMI frequency to
if (nmi_watchdog == NMI_LOCAL_APIC)
nmi_hz = 1;
+ kfree(prev_nmi_count);
return 0;
}
+/* This needs to happen later in boot so counters are working */
+late_initcall(check_nmi_watchdog);
static int __init setup_nmi_watchdog(char *str)
{
__setup("nmi_watchdog=", setup_nmi_watchdog);
+static void disable_intel_arch_watchdog(void);
+
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
wrmsr(MSR_K7_EVNTSEL0, 0, 0);
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ disable_intel_arch_watchdog();
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
wrmsr(MSR_P6_EVNTSEL0, 0, 0);
break;
case 15:
- if (boot_cpu_data.x86_model > 0x3)
+ if (boot_cpu_data.x86_model > 0x4)
break;
wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
static int nmi_pm_active; /* nmi_active before suspend */
-static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
+static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
nmi_pm_active = nmi_active;
disable_lapic_nmi_watchdog();
wrmsr(base+i, 0, 0);
}
+static void write_watchdog_counter(const char *descr)
+{
+ u64 count = (u64)cpu_khz * 1000;
+
+ do_div(count, nmi_hz);
+ if(descr)
+ Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ wrmsrl(nmi_perfctr_msr, 0 - count);
+}
+
static void setup_k7_watchdog(void)
{
unsigned int evntsel;
| K7_NMI_EVENT;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
- Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+ write_watchdog_counter("K7_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
| P6_NMI_EVENT;
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
- Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+ write_watchdog_counter("P6_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+ write_watchdog_counter("P4_IQ_COUNTER0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
return 1;
}
+static void disable_intel_arch_watchdog(void)
+{
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int evntsel;
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return 0;
+
+ nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+ clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+ clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ return 1;
+}
+
void setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
setup_p6_watchdog();
break;
case 15:
- if (boot_cpu_data.x86_model > 0x3)
+ if (boot_cpu_data.x86_model > 0x4)
return;
if (!setup_p4_watchdog())
* Just reset the alert counters, (other CPUs might be
* spinning on locks we hold):
*/
- for (i = 0; i < NR_CPUS; i++)
+ for_each_possible_cpu(i)
alert_counter[i] = 0;
+
+ /*
+ * Tickle the softlockup detector too:
+ */
+ touch_softlockup_watchdog();
}
+EXPORT_SYMBOL(touch_nmi_watchdog);
extern void die_nmi(struct pt_regs *, const char *msg);
* always switch the stack NMI-atomically, it's safe to use
* smp_processor_id().
*/
- int sum, cpu = smp_processor_id();
+ unsigned int sum;
+ int cpu = smp_processor_id();
- sum = irq_stat[cpu].apic_timer_irqs;
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
if (last_irq_sums[cpu] == sum) {
/*
*/
alert_counter[cpu]++;
if (alert_counter[cpu] == 5*nmi_hz)
- die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ /*
+ * die_nmi will return ONLY if NOTIFY_STOP happens..
+ */
+ die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
} else {
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+ else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
+ nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
/* Only P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
* other P6 variant */
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+ write_watchdog_counter(NULL);
}
}