linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / arch / i386 / kernel / nmi.c
index 2725ff9..be87c5e 100644 (file)
@@ -15,7 +15,6 @@
 
 #include <linux/config.h>
 #include <linux/mm.h>
-#include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
 #include <linux/sysdev.h>
-#include <linux/dump.h>
 #include <linux/sysctl.h>
 
 #include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
+#include <asm/div64.h>
 #include <asm/nmi.h>
 
 #include "mach_traps.h"
@@ -103,17 +100,45 @@ int nmi_active;
        (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|     \
         P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
 
+#ifdef CONFIG_SMP
+/* The performance counters used by NMI_LOCAL_APIC don't trigger when
+ * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ * CPUs during the test make them busy.
+ */
+static __init void nmi_cpu_busy(void *data)
+{
+       volatile int *endflag = data;
+       local_irq_enable();
+       /* Intentionally don't use cpu_relax here. This is
+          to make sure that the performance counter really ticks,
+          even if there is a simulator or similar that catches the
+          pause instruction. On a real HT machine this is fine because
+          all other CPUs are busy with "useless" delay loops and don't
+          care if they get somewhat less cycles. */
+       while (*endflag == 0)
+               barrier();
+}
+#endif
+
 static int __init check_nmi_watchdog(void)
 {
-       unsigned int prev_nmi_count[NR_CPUS];
+       volatile int endflag = 0;
+       unsigned int *prev_nmi_count;
        int cpu;
 
        if (nmi_watchdog == NMI_NONE)
                return 0;
 
+       prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+       if (!prev_nmi_count)
+               return -1;
+
        printk(KERN_INFO "Testing NMI watchdog ... ");
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++)
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+
+       for_each_cpu(cpu)
                prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
        local_irq_enable();
        mdelay((10*1000)/nmi_hz); // wait 10 ticks
@@ -126,12 +151,18 @@ static int __init check_nmi_watchdog(void)
                        continue;
 #endif
                if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
-                       printk("CPU#%d: NMI appears to be stuck!\n", cpu);
+                       endflag = 1;
+                       printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+                               cpu,
+                               prev_nmi_count[cpu],
+                               nmi_count(cpu));
                        nmi_active = 0;
                        lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
+                       kfree(prev_nmi_count);
                        return -1;
                }
        }
+       endflag = 1;
        printk("OK.\n");
 
        /* now that we know it works we can reduce NMI frequency to
@@ -139,6 +170,7 @@ static int __init check_nmi_watchdog(void)
        if (nmi_watchdog == NMI_LOCAL_APIC)
                nmi_hz = 1;
 
+       kfree(prev_nmi_count);
        return 0;
 }
 /* This needs to happen later in boot so counters are working */
@@ -197,7 +229,7 @@ static void disable_lapic_nmi_watchdog(void)
                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
                        break;
                case 15:
-                       if (boot_cpu_data.x86_model > 0x3)
+                       if (boot_cpu_data.x86_model > 0x4)
                                break;
 
                        wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
@@ -325,6 +357,16 @@ static void clear_msr_range(unsigned int base, unsigned int n)
                wrmsr(base+i, 0, 0);
 }
 
+static void write_watchdog_counter(const char *descr)
+{
+       u64 count = (u64)cpu_khz * 1000;
+
+       do_div(count, nmi_hz);
+       if(descr)
+               Dprintk("setting %s to -0x%08Lx\n", descr, count);
+       wrmsrl(nmi_perfctr_msr, 0 - count);
+}
+
 static void setup_k7_watchdog(void)
 {
        unsigned int evntsel;
@@ -340,8 +382,7 @@ static void setup_k7_watchdog(void)
                | K7_NMI_EVENT;
 
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-       Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+       write_watchdog_counter("K7_PERFCTR0");
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        evntsel |= K7_EVNTSEL_ENABLE;
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
@@ -362,8 +403,7 @@ static void setup_p6_watchdog(void)
                | P6_NMI_EVENT;
 
        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
-       Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+       write_watchdog_counter("P6_PERFCTR0");
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        evntsel |= P6_EVNTSEL0_ENABLE;
        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
@@ -403,8 +443,7 @@ static int setup_p4_watchdog(void)
 
        wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
-       Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+       write_watchdog_counter("P4_IQ_COUNTER0");
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
        return 1;
@@ -427,7 +466,7 @@ void setup_apic_nmi_watchdog (void)
                        setup_p6_watchdog();
                        break;
                case 15:
-                       if (boot_cpu_data.x86_model > 0x3)
+                       if (boot_cpu_data.x86_model > 0x4)
                                return;
 
                        if (!setup_p4_watchdog())
@@ -473,6 +512,11 @@ void touch_nmi_watchdog (void)
         */
        for (i = 0; i < NR_CPUS; i++)
                alert_counter[i] = 0;
+
+       /*
+        * Tickle the softlockup detector too:
+        */
+       touch_softlockup_watchdog();
 }
 
 extern void die_nmi(struct pt_regs *, const char *msg);
@@ -495,7 +539,10 @@ void nmi_watchdog_tick (struct pt_regs * regs)
                 * wait a few IRQs (5 seconds) before doing the oops ...
                 */
                alert_counter[cpu]++;
-               if (alert_counter[cpu] == 30*nmi_hz)
+               if (alert_counter[cpu] == 5*nmi_hz)
+                       /*
+                        * die_nmi will return ONLY if NOTIFY_STOP happens..
+                        */
                        die_nmi(regs, "NMI Watchdog detected LOCKUP");
        } else {
                last_irq_sums[cpu] = sum;
@@ -519,7 +566,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
                         * other P6 variant */
                        apic_write(APIC_LVTPC, APIC_DM_NMI);
                }
-               wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+               write_watchdog_counter(NULL);
        }
 }
 
@@ -572,4 +619,3 @@ EXPORT_SYMBOL(reserve_lapic_nmi);
 EXPORT_SYMBOL(release_lapic_nmi);
 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
 EXPORT_SYMBOL(enable_timer_nmi_watchdog);
-EXPORT_SYMBOL_GPL(touch_nmi_watchdog);