X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fnmi.c;h=d43b498ec745e2ea335d1282df274311eac62234;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=16d91e3e138f884654d4a04c62e674919367dac9;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 16d91e3e1..d43b498ec 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -25,13 +24,16 @@ #include #include #include +#include #include -#include -#include +#include #include +#include "mach_traps.h" + unsigned int nmi_watchdog = NMI_NONE; +extern int unknown_nmi_panic; static unsigned int nmi_hz = HZ; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; @@ -46,7 +48,7 @@ extern void show_registers(struct pt_regs *regs); * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -98,30 +100,69 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) -int __init check_nmi_watchdog (void) +#ifdef CONFIG_SMP +/* The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) +{ + volatile int *endflag = data; + local_irq_enable(); + /* Intentionally don't use cpu_relax here. This is + to make sure that the performance counter really ticks, + even if there is a simulator or similar that catches the + pause instruction. On a real HT machine this is fine because + all other CPUs are busy with "useless" delay loops and don't + care if they get somewhat less cycles. */ + while (*endflag == 0) + barrier(); +} +#endif + +static int __init check_nmi_watchdog(void) { - unsigned int prev_nmi_count[NR_CPUS]; + volatile int endflag = 0; + unsigned int *prev_nmi_count; int cpu; - printk(KERN_INFO "testing NMI watchdog ... "); + if (nmi_watchdog == NMI_NONE) + return 0; + + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) + return -1; + + printk(KERN_INFO "Testing NMI watchdog ... "); + + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - for (cpu = 0; cpu < NR_CPUS; cpu++) - prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count; + for_each_possible_cpu(cpu) + prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - /* FIXME: Only boot CPU is online at this stage. Check CPUs - as they come up. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) + for_each_possible_cpu(cpu) { +#ifdef CONFIG_SMP + /* Check cpu_callin_map here because that is set + after the timer is started. */ + if (!cpu_isset(cpu, cpu_callin_map)) continue; +#endif if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck!\n", cpu); + endflag = 1; + printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, + prev_nmi_count[cpu], + nmi_count(cpu)); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; + kfree(prev_nmi_count); return -1; } } + endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -129,8 +170,11 @@ int __init check_nmi_watchdog (void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; + kfree(prev_nmi_count); return 0; } +/* This needs to happen later in boot so counters are working */ +late_initcall(check_nmi_watchdog); static int __init setup_nmi_watchdog(char *str) { @@ -185,7 +229,7 @@ static void disable_lapic_nmi_watchdog(void) wrmsr(MSR_P6_EVNTSEL0, 0, 0); break; case 15: - if (boot_cpu_data.x86_model > 0x3) + if (boot_cpu_data.x86_model > 0x4) break; wrmsr(MSR_P4_IQ_CCCR0, 0, 0); @@ -257,7 +301,7 @@ void enable_timer_nmi_watchdog(void) static int nmi_pm_active; /* nmi_active before suspend */ -static int lapic_nmi_suspend(struct sys_device *dev, u32 state) +static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) { nmi_pm_active = nmi_active; disable_lapic_nmi_watchdog(); @@ -313,6 +357,16 @@ static void clear_msr_range(unsigned int base, unsigned int n) wrmsr(base+i, 0, 0); } +static void write_watchdog_counter(const char *descr) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(nmi_perfctr_msr, 0 - count); +} + static void setup_k7_watchdog(void) { unsigned int evntsel; @@ -328,8 +382,7 @@ static void setup_k7_watchdog(void) | K7_NMI_EVENT; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); @@ -350,8 +403,7 @@ static void setup_p6_watchdog(void) | P6_NMI_EVENT; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); + write_watchdog_counter("P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); @@ -376,7 +428,13 @@ static int setup_p4_watchdog(void) clear_msr_range(0x3F1, 2); /* MSR 0x3F0 seems to have a default value of 0xFC00, but current docs doesn't fully define it, so leave it alone for now. */ - clear_msr_range(0x3A0, 31); + if (boot_cpu_data.x86_model >= 0x3) { + /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ + clear_msr_range(0x3A0, 26); + clear_msr_range(0x3BC, 3); + } else { + clear_msr_range(0x3A0, 31); + } clear_msr_range(0x3C0, 6); clear_msr_range(0x3C8, 6); clear_msr_range(0x3E0, 2); @@ -385,8 +443,7 @@ static int setup_p4_watchdog(void) wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; @@ -409,7 +466,7 @@ void setup_apic_nmi_watchdog (void) setup_p6_watchdog(); break; case 15: - if (boot_cpu_data.x86_model > 0x3) + if (boot_cpu_data.x86_model > 0x4) return; if (!setup_p4_watchdog()) @@ -426,8 +483,6 @@ void setup_apic_nmi_watchdog (void) nmi_active = 1; } -static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; - /* * the best way to detect whether a CPU has a 'hard lockup' problem * is to check it's local APIC timer IRQ counts. If they are not @@ -455,10 +510,17 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for (i = 0; i < NR_CPUS; i++) + for_each_possible_cpu(i) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } +extern void die_nmi(struct pt_regs *, const char *msg); + void nmi_watchdog_tick (struct pt_regs * regs) { @@ -467,9 +529,10 @@ void nmi_watchdog_tick (struct pt_regs * regs) * always switch the stack NMI-atomically, it's safe to use * smp_processor_id(). */ - int sum, cpu = smp_processor_id(); + unsigned int sum; + int cpu = smp_processor_id(); - sum = irq_stat[cpu].apic_timer_irqs; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs; if (last_irq_sums[cpu] == sum) { /* @@ -477,21 +540,11 @@ void nmi_watchdog_tick (struct pt_regs * regs) * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) { - spin_lock(&nmi_print_lock); + if (alert_counter[cpu] == 5*nmi_hz) /* - * We are in trouble anyway, lets at least try - * to get a message out. + * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - bust_spinlocks(1); - printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); - show_registers(regs); - printk("console shuts up ...\n"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); - do_exit(SIGSEGV); - } + die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; @@ -514,10 +567,53 @@ void nmi_watchdog_tick (struct pt_regs * regs) * other P6 variant */ apic_write(APIC_LVTPC, APIC_DM_NMI); } - wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter(NULL); + } +} + +#ifdef CONFIG_SYSCTL + +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) +{ + unsigned char reason = get_nmi_reason(); + char buf[64]; + + if (!(reason & 0xc0)) { + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(regs, buf); + } + return 0; +} + +/* + * proc handler for /proc/sys/kernel/unknown_nmi_panic + */ +int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + old_state = unknown_nmi_panic; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!unknown_nmi_panic) + return 0; + + if (unknown_nmi_panic) { + if (reserve_lapic_nmi() < 0) { + unknown_nmi_panic = 0; + return -EBUSY; + } else { + set_nmi_callback(unknown_nmi_panic_callback); + } + } else { + release_lapic_nmi(); + unset_nmi_callback(); } + return 0; } +#endif + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(reserve_lapic_nmi);