X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fnmi.c;h=6bd4681df09f100dd4bb7eec4b4f24a9cf7f430e;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=70dd434e56a9368ecc47d742683676614038d333;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 70dd434e5..6bd4681df 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -25,15 +25,20 @@ #include #include #include +#include #include #include #include #include +#include "mach_traps.h" + unsigned int nmi_watchdog = NMI_NONE; +extern int unknown_nmi_panic; static unsigned int nmi_hz = HZ; -unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); /* @@ -45,7 +50,7 @@ extern void show_registers(struct pt_regs *regs); * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -80,7 +85,8 @@ int nmi_active; #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) #define P4_ESCR_OS (1<<3) #define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI (1<<26) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) #define P4_CCCR_THRESHOLD(N) ((N)<<20) #define P4_CCCR_COMPLEMENT (1<<19) #define P4_CCCR_COMPARE (1<<18) @@ -93,7 +99,7 @@ int nmi_active; #define MSR_P4_IQ_COUNTER0 0x30C #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) #define P4_NMI_IQ_CCCR0 \ - (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ + (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) int __init check_nmi_watchdog (void) @@ -111,8 +117,12 @@ int __init check_nmi_watchdog (void) /* FIXME: Only boot CPU is online at this stage. Check CPUs as they come up. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) +#ifdef CONFIG_SMP + /* Check cpu_callin_map here because that is set + after the timer is started. */ + if (!cpu_isset(cpu, cpu_callin_map)) continue; +#endif if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk("CPU#%d: NMI appears to be stuck!\n", cpu); nmi_active = 0; @@ -364,12 +374,23 @@ static int setup_p4_watchdog(void) return 0; nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; + nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; +#endif if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) clear_msr_range(0x3F1, 2); /* MSR 0x3F0 seems to have a default value of 0xFC00, but current docs doesn't fully define it, so leave it alone for now. */ - clear_msr_range(0x3A0, 31); + if (boot_cpu_data.x86_model >= 0x3) { + /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ + clear_msr_range(0x3A0, 26); + clear_msr_range(0x3BC, 3); + } else { + clear_msr_range(0x3A0, 31); + } clear_msr_range(0x3C0, 6); clear_msr_range(0x3C8, 6); clear_msr_range(0x3E0, 2); @@ -381,7 +402,7 @@ static int setup_p4_watchdog(void) Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; } @@ -419,8 +440,6 @@ void setup_apic_nmi_watchdog (void) nmi_active = 1; } -static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; - /* * the best way to detect whether a CPU has a 'hard lockup' problem * is to check it's local APIC timer IRQ counts. If they are not @@ -452,6 +471,8 @@ void touch_nmi_watchdog (void) alert_counter[i] = 0; } +extern void die_nmi(struct pt_regs *, const char *msg); + void nmi_watchdog_tick (struct pt_regs * regs) { @@ -470,21 +491,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) { - spin_lock(&nmi_print_lock); - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - bust_spinlocks(1); - printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); - show_registers(regs); - printk("console shuts up ...\n"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); - do_exit(SIGSEGV); - } + if (alert_counter[cpu] == 5*nmi_hz) + die_nmi(regs, "NMI Watchdog detected LOCKUP"); } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; @@ -498,7 +506,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) * - LVTPC is masked on interrupt and must be * unmasked by the LVTPC handler. */ - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); } else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { @@ -511,6 +519,49 @@ void nmi_watchdog_tick (struct pt_regs * regs) } } +#ifdef CONFIG_SYSCTL + +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) +{ + unsigned char reason = get_nmi_reason(); + char buf[64]; + + if (!(reason & 0xc0)) { + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(regs, buf); + } + return 0; +} + +/* + * proc handler for /proc/sys/kernel/unknown_nmi_panic + */ +int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + old_state = unknown_nmi_panic; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!unknown_nmi_panic) + return 0; + + if (unknown_nmi_panic) { + if (reserve_lapic_nmi() < 0) { + unknown_nmi_panic = 0; + return -EBUSY; + } else { + set_nmi_callback(unknown_nmi_panic_callback); + } + } else { + release_lapic_nmi(); + unset_nmi_callback(); + } + return 0; +} + +#endif + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(reserve_lapic_nmi);