Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / arch / i386 / kernel / nmi.c
index 2c0ee9c..d43b498 100644 (file)
@@ -15,7 +15,6 @@
 
 #include <linux/config.h>
 #include <linux/mm.h>
-#include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/smp_lock.h>
@@ -28,8 +27,7 @@
 #include <linux/sysctl.h>
 
 #include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
+#include <asm/div64.h>
 #include <asm/nmi.h>
 
 #include "mach_traps.h"
@@ -102,22 +100,50 @@ int nmi_active;
        (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|     \
         P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
 
+#ifdef CONFIG_SMP
+/* The performance counters used by NMI_LOCAL_APIC don't trigger when
+ * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ * CPUs during the test make them busy.
+ */
+static __init void nmi_cpu_busy(void *data)
+{
+       volatile int *endflag = data;
+       local_irq_enable();
+       /* Intentionally don't use cpu_relax here. This is
+          to make sure that the performance counter really ticks,
+          even if there is a simulator or similar that catches the
+          pause instruction. On a real HT machine this is fine because
+          all other CPUs are busy with "useless" delay loops and don't
+          care if they get somewhat less cycles. */
+       while (*endflag == 0)
+               barrier();
+}
+#endif
+
 static int __init check_nmi_watchdog(void)
 {
-       unsigned int prev_nmi_count[NR_CPUS];
+       volatile int endflag = 0;
+       unsigned int *prev_nmi_count;
        int cpu;
 
        if (nmi_watchdog == NMI_NONE)
                return 0;
 
+       prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+       if (!prev_nmi_count)
+               return -1;
+
        printk(KERN_INFO "Testing NMI watchdog ... ");
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++)
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+
+       for_each_possible_cpu(cpu)
                prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
        local_irq_enable();
        mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+       for_each_possible_cpu(cpu) {
 #ifdef CONFIG_SMP
                /* Check cpu_callin_map here because that is set
                   after the timer is started. */
@@ -125,12 +151,18 @@ static int __init check_nmi_watchdog(void)
                        continue;
 #endif
                if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
-                       printk("CPU#%d: NMI appears to be stuck!\n", cpu);
+                       endflag = 1;
+                       printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+                               cpu,
+                               prev_nmi_count[cpu],
+                               nmi_count(cpu));
                        nmi_active = 0;
                        lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
+                       kfree(prev_nmi_count);
                        return -1;
                }
        }
+       endflag = 1;
        printk("OK.\n");
 
        /* now that we know it works we can reduce NMI frequency to
@@ -138,6 +170,7 @@ static int __init check_nmi_watchdog(void)
        if (nmi_watchdog == NMI_LOCAL_APIC)
                nmi_hz = 1;
 
+       kfree(prev_nmi_count);
        return 0;
 }
 /* This needs to happen later in boot so counters are working */
@@ -196,7 +229,7 @@ static void disable_lapic_nmi_watchdog(void)
                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
                        break;
                case 15:
-                       if (boot_cpu_data.x86_model > 0x3)
+                       if (boot_cpu_data.x86_model > 0x4)
                                break;
 
                        wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
@@ -324,6 +357,16 @@ static void clear_msr_range(unsigned int base, unsigned int n)
                wrmsr(base+i, 0, 0);
 }
 
+static void write_watchdog_counter(const char *descr)
+{
+       u64 count = (u64)cpu_khz * 1000;
+
+       do_div(count, nmi_hz);
+       if(descr)
+               Dprintk("setting %s to -0x%08Lx\n", descr, count);
+       wrmsrl(nmi_perfctr_msr, 0 - count);
+}
+
 static void setup_k7_watchdog(void)
 {
        unsigned int evntsel;
@@ -339,8 +382,7 @@ static void setup_k7_watchdog(void)
                | K7_NMI_EVENT;
 
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-       Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+       write_watchdog_counter("K7_PERFCTR0");
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        evntsel |= K7_EVNTSEL_ENABLE;
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
@@ -361,8 +403,7 @@ static void setup_p6_watchdog(void)
                | P6_NMI_EVENT;
 
        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
-       Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+       write_watchdog_counter("P6_PERFCTR0");
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        evntsel |= P6_EVNTSEL0_ENABLE;
        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
@@ -402,8 +443,7 @@ static int setup_p4_watchdog(void)
 
        wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
-       Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+       write_watchdog_counter("P4_IQ_COUNTER0");
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
        return 1;
@@ -426,7 +466,7 @@ void setup_apic_nmi_watchdog (void)
                        setup_p6_watchdog();
                        break;
                case 15:
-                       if (boot_cpu_data.x86_model > 0x3)
+                       if (boot_cpu_data.x86_model > 0x4)
                                return;
 
                        if (!setup_p4_watchdog())
@@ -470,8 +510,13 @@ void touch_nmi_watchdog (void)
         * Just reset the alert counters, (other CPUs might be
         * spinning on locks we hold):
         */
-       for (i = 0; i < NR_CPUS; i++)
+       for_each_possible_cpu(i)
                alert_counter[i] = 0;
+
+       /*
+        * Tickle the softlockup detector too:
+        */
+       touch_softlockup_watchdog();
 }
 
 extern void die_nmi(struct pt_regs *, const char *msg);
@@ -484,7 +529,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
         * always switch the stack NMI-atomically, it's safe to use
         * smp_processor_id().
         */
-       int sum, cpu = smp_processor_id();
+       unsigned int sum;
+       int cpu = smp_processor_id();
 
        sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 
@@ -495,7 +541,10 @@ void nmi_watchdog_tick (struct pt_regs * regs)
                 */
                alert_counter[cpu]++;
                if (alert_counter[cpu] == 5*nmi_hz)
-                       die_nmi(regs, "NMI Watchdog detected LOCKUP");
+                       /*
+                        * die_nmi will return ONLY if NOTIFY_STOP happens..
+                        */
+                       die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
        } else {
                last_irq_sums[cpu] = sum;
                alert_counter[cpu] = 0;
@@ -518,7 +567,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
                         * other P6 variant */
                        apic_write(APIC_LVTPC, APIC_DM_NMI);
                }
-               wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+               write_watchdog_counter(NULL);
        }
 }