2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/delay.h>
17 #include <linux/interrupt.h>
18 #include <linux/module.h>
19 #include <linux/nmi.h>
20 #include <linux/sysdev.h>
21 #include <linux/sysctl.h>
22 #include <linux/percpu.h>
23 #include <linux/dmi.h>
24 #include <linux/kprobes.h>
25 #include <linux/cpumask.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
32 #include "mach_traps.h"
34 int unknown_nmi_panic;
35 int nmi_watchdog_enabled;
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
44 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
47 #define NMI_MAX_COUNTER_BITS 66
48 #define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
50 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
51 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
53 static cpumask_t backtrace_mask = CPU_MASK_NONE;
55 * >0: the lapic NMI watchdog is active, but can be disabled
56 * <0: the lapic NMI watchdog has not been set up, and cannot
58 * 0: the lapic NMI watchdog is disabled, but can be enabled
60 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
62 unsigned int nmi_watchdog = NMI_NONE;
63 static unsigned int nmi_hz = HZ;
65 struct nmi_watchdog_ctlblk {
68 unsigned int cccr_msr;
69 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
70 unsigned int evntsel_msr; /* the MSR to select the events to handle */
72 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
74 /* local prototypes */
75 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
77 extern void show_registers(struct pt_regs *regs);
78 extern int unknown_nmi_panic;
80 /* converts an msr to an appropriate reservation bit */
81 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
83 /* returns the bit offset of the performance counter register */
84 switch (boot_cpu_data.x86_vendor) {
86 return (msr - MSR_K7_PERFCTR0);
87 case X86_VENDOR_INTEL:
88 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
89 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
91 switch (boot_cpu_data.x86) {
93 return (msr - MSR_P6_PERFCTR0);
95 return (msr - MSR_P4_BPU_PERFCTR0);
101 /* converts an msr to an appropriate reservation bit */
102 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
104 /* returns the bit offset of the event selection register */
105 switch (boot_cpu_data.x86_vendor) {
107 return (msr - MSR_K7_EVNTSEL0);
108 case X86_VENDOR_INTEL:
109 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
110 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
112 switch (boot_cpu_data.x86) {
114 return (msr - MSR_P6_EVNTSEL0);
116 return (msr - MSR_P4_BSU_ESCR0);
122 /* checks for a bit availability (hack for oprofile) */
123 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
127 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
130 /* checks the an msr for availability */
131 int avail_to_resrv_perfctr_nmi(unsigned int msr)
133 unsigned int counter;
135 counter = nmi_perfctr_msr_to_bit(msr);
136 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
138 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
141 int reserve_perfctr_nmi(unsigned int msr)
143 unsigned int counter;
145 counter = nmi_perfctr_msr_to_bit(msr);
146 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
148 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
153 void release_perfctr_nmi(unsigned int msr)
155 unsigned int counter;
157 counter = nmi_perfctr_msr_to_bit(msr);
158 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
160 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
163 int reserve_evntsel_nmi(unsigned int msr)
165 unsigned int counter;
167 counter = nmi_evntsel_msr_to_bit(msr);
168 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
170 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
175 void release_evntsel_nmi(unsigned int msr)
177 unsigned int counter;
179 counter = nmi_evntsel_msr_to_bit(msr);
180 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
182 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
185 static __cpuinit inline int nmi_known_cpu(void)
187 switch (boot_cpu_data.x86_vendor) {
189 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
190 case X86_VENDOR_INTEL:
191 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
194 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
199 static int endflag __initdata = 0;
202 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
203 * the CPU is idle. To make sure the NMI watchdog really ticks on all
204 * CPUs during the test make them busy.
206 static __init void nmi_cpu_busy(void *data)
208 local_irq_enable_in_hardirq();
209 /* Intentionally don't use cpu_relax here. This is
210 to make sure that the performance counter really ticks,
211 even if there is a simulator or similar that catches the
212 pause instruction. On a real HT machine this is fine because
213 all other CPUs are busy with "useless" delay loops and don't
214 care if they get somewhat less cycles. */
220 static int __init check_nmi_watchdog(void)
222 unsigned int *prev_nmi_count;
225 /* Enable NMI watchdog for newer systems.
226 Probably safe on most older systems too, but let's be careful.
227 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
228 which hangs the system. Disable watchdog for all thinkpads */
229 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
230 !dmi_name_in_vendors("ThinkPad"))
231 nmi_watchdog = NMI_LOCAL_APIC;
233 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
236 if (!atomic_read(&nmi_active))
239 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
243 printk(KERN_INFO "Testing NMI watchdog ... ");
245 if (nmi_watchdog == NMI_LOCAL_APIC)
246 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
248 for_each_possible_cpu(cpu)
249 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
251 mdelay((10*1000)/nmi_hz); // wait 10 ticks
253 for_each_possible_cpu(cpu) {
255 /* Check cpu_callin_map here because that is set
256 after the timer is started. */
257 if (!cpu_isset(cpu, cpu_callin_map))
260 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
262 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
263 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
267 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
268 atomic_dec(&nmi_active);
271 if (!atomic_read(&nmi_active)) {
272 kfree(prev_nmi_count);
273 atomic_set(&nmi_active, -1);
279 /* now that we know it works we can reduce NMI frequency to
280 something more reasonable; makes a difference in some configs */
281 if (nmi_watchdog == NMI_LOCAL_APIC) {
282 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
286 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
287 * are writable, with higher bits sign extending from bit 31.
288 * So, we can only program the counter with 31 bit values and
289 * 32nd bit should be 1, for 33.. to be 1.
290 * Find the appropriate nmi_hz
292 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
293 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
294 u64 count = (u64)cpu_khz * 1000;
295 do_div(count, 0x7fffffffUL);
300 kfree(prev_nmi_count);
303 /* This needs to happen later in boot so counters are working */
304 late_initcall(check_nmi_watchdog);
306 static int __init setup_nmi_watchdog(char *str)
310 get_option(&str, &nmi);
312 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
319 __setup("nmi_watchdog=", setup_nmi_watchdog);
321 static void disable_lapic_nmi_watchdog(void)
323 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
325 if (atomic_read(&nmi_active) <= 0)
328 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
330 BUG_ON(atomic_read(&nmi_active) != 0);
333 static void enable_lapic_nmi_watchdog(void)
335 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
337 /* are we already enabled */
338 if (atomic_read(&nmi_active) != 0)
341 /* are we lapic aware */
342 if (nmi_known_cpu() <= 0)
345 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
346 touch_nmi_watchdog();
349 void disable_timer_nmi_watchdog(void)
351 BUG_ON(nmi_watchdog != NMI_IO_APIC);
353 if (atomic_read(&nmi_active) <= 0)
357 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
359 BUG_ON(atomic_read(&nmi_active) != 0);
362 void enable_timer_nmi_watchdog(void)
364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
366 if (atomic_read(&nmi_active) == 0) {
367 touch_nmi_watchdog();
368 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
375 static int nmi_pm_active; /* nmi_active before suspend */
377 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
379 /* only CPU0 goes here, other CPUs should be offline */
380 nmi_pm_active = atomic_read(&nmi_active);
381 stop_apic_nmi_watchdog(NULL);
382 BUG_ON(atomic_read(&nmi_active) != 0);
386 static int lapic_nmi_resume(struct sys_device *dev)
388 /* only CPU0 goes here, other CPUs should be offline */
389 if (nmi_pm_active > 0) {
390 setup_apic_nmi_watchdog(NULL);
391 touch_nmi_watchdog();
397 static struct sysdev_class nmi_sysclass = {
398 set_kset_name("lapic_nmi"),
399 .resume = lapic_nmi_resume,
400 .suspend = lapic_nmi_suspend,
403 static struct sys_device device_lapic_nmi = {
405 .cls = &nmi_sysclass,
408 static int __init init_lapic_nmi_sysfs(void)
412 /* should really be a BUG_ON but b/c this is an
413 * init call, it just doesn't work. -dcz
415 if (nmi_watchdog != NMI_LOCAL_APIC)
418 if ( atomic_read(&nmi_active) < 0 )
421 error = sysdev_class_register(&nmi_sysclass);
423 error = sysdev_register(&device_lapic_nmi);
426 /* must come after the local APIC's device_initcall() */
427 late_initcall(init_lapic_nmi_sysfs);
429 #endif /* CONFIG_PM */
432 * Activate the NMI watchdog via the local APIC.
433 * Original code written by Keith Owens.
436 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
438 u64 count = (u64)cpu_khz * 1000;
440 do_div(count, nmi_hz);
442 Dprintk("setting %s to -0x%08Lx\n", descr, count);
443 wrmsrl(perfctr_msr, 0 - count);
446 /* Note that these events don't tick when the CPU idles. This means
447 the frequency varies with CPU load. */
449 #define K7_EVNTSEL_ENABLE (1 << 22)
450 #define K7_EVNTSEL_INT (1 << 20)
451 #define K7_EVNTSEL_OS (1 << 17)
452 #define K7_EVNTSEL_USR (1 << 16)
453 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
454 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
456 static int setup_k7_watchdog(void)
458 unsigned int perfctr_msr, evntsel_msr;
459 unsigned int evntsel;
460 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
462 perfctr_msr = MSR_K7_PERFCTR0;
463 evntsel_msr = MSR_K7_EVNTSEL0;
464 if (!reserve_perfctr_nmi(perfctr_msr))
467 if (!reserve_evntsel_nmi(evntsel_msr))
470 wrmsrl(perfctr_msr, 0UL);
472 evntsel = K7_EVNTSEL_INT
477 /* setup the timer */
478 wrmsr(evntsel_msr, evntsel, 0);
479 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
480 apic_write(APIC_LVTPC, APIC_DM_NMI);
481 evntsel |= K7_EVNTSEL_ENABLE;
482 wrmsr(evntsel_msr, evntsel, 0);
484 wd->perfctr_msr = perfctr_msr;
485 wd->evntsel_msr = evntsel_msr;
486 wd->cccr_msr = 0; //unused
487 wd->check_bit = 1ULL<<63;
490 release_perfctr_nmi(perfctr_msr);
495 static void stop_k7_watchdog(void)
497 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
499 wrmsr(wd->evntsel_msr, 0, 0);
501 release_evntsel_nmi(wd->evntsel_msr);
502 release_perfctr_nmi(wd->perfctr_msr);
505 #define P6_EVNTSEL0_ENABLE (1 << 22)
506 #define P6_EVNTSEL_INT (1 << 20)
507 #define P6_EVNTSEL_OS (1 << 17)
508 #define P6_EVNTSEL_USR (1 << 16)
509 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
510 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
512 static int setup_p6_watchdog(void)
514 unsigned int perfctr_msr, evntsel_msr;
515 unsigned int evntsel;
516 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
518 perfctr_msr = MSR_P6_PERFCTR0;
519 evntsel_msr = MSR_P6_EVNTSEL0;
520 if (!reserve_perfctr_nmi(perfctr_msr))
523 if (!reserve_evntsel_nmi(evntsel_msr))
526 wrmsrl(perfctr_msr, 0UL);
528 evntsel = P6_EVNTSEL_INT
533 /* setup the timer */
534 wrmsr(evntsel_msr, evntsel, 0);
535 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
536 apic_write(APIC_LVTPC, APIC_DM_NMI);
537 evntsel |= P6_EVNTSEL0_ENABLE;
538 wrmsr(evntsel_msr, evntsel, 0);
540 wd->perfctr_msr = perfctr_msr;
541 wd->evntsel_msr = evntsel_msr;
542 wd->cccr_msr = 0; //unused
543 wd->check_bit = 1ULL<<39;
546 release_perfctr_nmi(perfctr_msr);
551 static void stop_p6_watchdog(void)
553 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
555 wrmsr(wd->evntsel_msr, 0, 0);
557 release_evntsel_nmi(wd->evntsel_msr);
558 release_perfctr_nmi(wd->perfctr_msr);
561 /* Note that these events don't tick when the CPU idles. This means
562 the frequency varies with CPU load. */
564 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
565 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
566 #define P4_ESCR_OS (1<<3)
567 #define P4_ESCR_USR (1<<2)
568 #define P4_CCCR_OVF_PMI0 (1<<26)
569 #define P4_CCCR_OVF_PMI1 (1<<27)
570 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
571 #define P4_CCCR_COMPLEMENT (1<<19)
572 #define P4_CCCR_COMPARE (1<<18)
573 #define P4_CCCR_REQUIRED (3<<16)
574 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
575 #define P4_CCCR_ENABLE (1<<12)
576 #define P4_CCCR_OVF (1<<31)
577 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
578 CRU_ESCR0 (with any non-null event selector) through a complemented
579 max threshold. [IA32-Vol3, Section 14.9.9] */
581 static int setup_p4_watchdog(void)
583 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
584 unsigned int evntsel, cccr_val;
585 unsigned int misc_enable, dummy;
587 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
589 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
590 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
594 /* detect which hyperthread we are on */
595 if (smp_num_siblings == 2) {
596 unsigned int ebx, apicid;
599 apicid = (ebx >> 24) & 0xff;
605 /* performance counters are shared resources
606 * assign each hyperthread its own set
607 * (re-use the ESCR0 register, seems safe
608 * and keeps the cccr_val the same)
612 perfctr_msr = MSR_P4_IQ_PERFCTR0;
613 evntsel_msr = MSR_P4_CRU_ESCR0;
614 cccr_msr = MSR_P4_IQ_CCCR0;
615 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
618 perfctr_msr = MSR_P4_IQ_PERFCTR1;
619 evntsel_msr = MSR_P4_CRU_ESCR0;
620 cccr_msr = MSR_P4_IQ_CCCR1;
621 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
624 if (!reserve_perfctr_nmi(perfctr_msr))
627 if (!reserve_evntsel_nmi(evntsel_msr))
630 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
634 cccr_val |= P4_CCCR_THRESHOLD(15)
639 wrmsr(evntsel_msr, evntsel, 0);
640 wrmsr(cccr_msr, cccr_val, 0);
641 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
642 apic_write(APIC_LVTPC, APIC_DM_NMI);
643 cccr_val |= P4_CCCR_ENABLE;
644 wrmsr(cccr_msr, cccr_val, 0);
645 wd->perfctr_msr = perfctr_msr;
646 wd->evntsel_msr = evntsel_msr;
647 wd->cccr_msr = cccr_msr;
648 wd->check_bit = 1ULL<<39;
651 release_perfctr_nmi(perfctr_msr);
656 static void stop_p4_watchdog(void)
658 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
660 wrmsr(wd->cccr_msr, 0, 0);
661 wrmsr(wd->evntsel_msr, 0, 0);
663 release_evntsel_nmi(wd->evntsel_msr);
664 release_perfctr_nmi(wd->perfctr_msr);
667 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
668 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
670 static int setup_intel_arch_watchdog(void)
673 union cpuid10_eax eax;
675 unsigned int perfctr_msr, evntsel_msr;
676 unsigned int evntsel;
677 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
680 * Check whether the Architectural PerfMon supports
681 * Unhalted Core Cycles Event or not.
682 * NOTE: Corresponding bit = 0 in ebx indicates event present.
684 cpuid(10, &(eax.full), &ebx, &unused, &unused);
685 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
686 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
689 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
690 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
692 if (!reserve_perfctr_nmi(perfctr_msr))
695 if (!reserve_evntsel_nmi(evntsel_msr))
698 wrmsrl(perfctr_msr, 0UL);
700 evntsel = ARCH_PERFMON_EVENTSEL_INT
701 | ARCH_PERFMON_EVENTSEL_OS
702 | ARCH_PERFMON_EVENTSEL_USR
703 | ARCH_PERFMON_NMI_EVENT_SEL
704 | ARCH_PERFMON_NMI_EVENT_UMASK;
706 /* setup the timer */
707 wrmsr(evntsel_msr, evntsel, 0);
708 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
709 apic_write(APIC_LVTPC, APIC_DM_NMI);
710 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
711 wrmsr(evntsel_msr, evntsel, 0);
713 wd->perfctr_msr = perfctr_msr;
714 wd->evntsel_msr = evntsel_msr;
715 wd->cccr_msr = 0; //unused
716 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
719 release_perfctr_nmi(perfctr_msr);
724 static void stop_intel_arch_watchdog(void)
727 union cpuid10_eax eax;
729 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
732 * Check whether the Architectural PerfMon supports
733 * Unhalted Core Cycles Event or not.
734 * NOTE: Corresponding bit = 0 in ebx indicates event present.
736 cpuid(10, &(eax.full), &ebx, &unused, &unused);
737 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
738 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
741 wrmsr(wd->evntsel_msr, 0, 0);
742 release_evntsel_nmi(wd->evntsel_msr);
743 release_perfctr_nmi(wd->perfctr_msr);
746 void setup_apic_nmi_watchdog (void *unused)
748 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
750 /* only support LOCAL and IO APICs for now */
751 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
752 (nmi_watchdog != NMI_IO_APIC))
755 if (wd->enabled == 1)
758 /* cheap hack to support suspend/resume */
759 /* if cpu0 is not active neither should the other cpus */
760 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
763 if (nmi_watchdog == NMI_LOCAL_APIC) {
764 switch (boot_cpu_data.x86_vendor) {
766 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
768 if (!setup_k7_watchdog())
771 case X86_VENDOR_INTEL:
772 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
773 if (!setup_intel_arch_watchdog())
777 switch (boot_cpu_data.x86) {
779 if (boot_cpu_data.x86_model > 0xd)
782 if (!setup_p6_watchdog())
786 if (boot_cpu_data.x86_model > 0x4)
789 if (!setup_p4_watchdog())
801 atomic_inc(&nmi_active);
804 void stop_apic_nmi_watchdog(void *unused)
806 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
808 /* only support LOCAL and IO APICs for now */
809 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
810 (nmi_watchdog != NMI_IO_APIC))
813 if (wd->enabled == 0)
816 if (nmi_watchdog == NMI_LOCAL_APIC) {
817 switch (boot_cpu_data.x86_vendor) {
821 case X86_VENDOR_INTEL:
822 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
823 stop_intel_arch_watchdog();
826 switch (boot_cpu_data.x86) {
828 if (boot_cpu_data.x86_model > 0xd)
833 if (boot_cpu_data.x86_model > 0x4)
844 atomic_dec(&nmi_active);
848 * the best way to detect whether a CPU has a 'hard lockup' problem
849 * is to check it's local APIC timer IRQ counts. If they are not
850 * changing then that CPU has some problem.
852 * as these watchdog NMI IRQs are generated on every CPU, we only
853 * have to check the current processor.
855 * since NMIs don't listen to _any_ locks, we have to be extremely
856 * careful not to rely on unsafe variables. The printk might lock
857 * up though, so we have to break up any console locks first ...
858 * [when there will be more tty-related locks, break them up
863 last_irq_sums [NR_CPUS],
864 alert_counter [NR_CPUS];
866 void touch_nmi_watchdog (void)
868 if (nmi_watchdog > 0) {
872 * Just reset the alert counters, (other CPUs might be
873 * spinning on locks we hold):
875 for_each_present_cpu (cpu)
876 alert_counter[cpu] = 0;
880 * Tickle the softlockup detector too:
882 touch_softlockup_watchdog();
884 EXPORT_SYMBOL(touch_nmi_watchdog);
886 extern void die_nmi(struct pt_regs *, const char *msg);
888 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
892 * Since current_thread_info()-> is always on the stack, and we
893 * always switch the stack NMI-atomically, it's safe to use
894 * smp_processor_id().
898 int cpu = smp_processor_id();
899 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
903 /* check for other users first */
904 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
910 if (cpu_isset(cpu, backtrace_mask)) {
911 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
914 printk("NMI backtrace for cpu %d\n", cpu);
917 cpu_clear(cpu, backtrace_mask);
920 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
922 /* if the apic timer isn't firing, this cpu isn't doing much */
923 if (!touched && last_irq_sums[cpu] == sum) {
925 * Ayiee, looks like this CPU is stuck ...
926 * wait a few IRQs (5 seconds) before doing the oops ...
928 alert_counter[cpu]++;
929 if (alert_counter[cpu] == 5*nmi_hz)
931 * die_nmi will return ONLY if NOTIFY_STOP happens..
933 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
935 last_irq_sums[cpu] = sum;
936 alert_counter[cpu] = 0;
938 /* see if the nmi watchdog went off */
940 if (nmi_watchdog == NMI_LOCAL_APIC) {
941 rdmsrl(wd->perfctr_msr, dummy);
942 if (dummy & wd->check_bit){
943 /* this wasn't a watchdog timer interrupt */
947 /* only Intel P4 uses the cccr msr */
948 if (wd->cccr_msr != 0) {
951 * - An overflown perfctr will assert its interrupt
952 * until the OVF flag in its CCCR is cleared.
953 * - LVTPC is masked on interrupt and must be
954 * unmasked by the LVTPC handler.
956 rdmsrl(wd->cccr_msr, dummy);
957 dummy &= ~P4_CCCR_OVF;
958 wrmsrl(wd->cccr_msr, dummy);
959 apic_write(APIC_LVTPC, APIC_DM_NMI);
961 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
962 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
963 /* P6 based Pentium M need to re-unmask
964 * the apic vector but it doesn't hurt
966 * ArchPerfom/Core Duo also needs this */
967 apic_write(APIC_LVTPC, APIC_DM_NMI);
969 /* start the cycle over again */
970 write_watchdog_counter(wd->perfctr_msr, NULL);
972 } else if (nmi_watchdog == NMI_IO_APIC) {
973 /* don't know how to accurately check for this.
974 * just assume it was a watchdog timer interrupt
975 * This matches the old behaviour.
984 int do_nmi_callback(struct pt_regs * regs, int cpu)
987 if (unknown_nmi_panic)
988 return unknown_nmi_panic_callback(regs, cpu);
995 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
997 unsigned char reason = get_nmi_reason();
1000 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
1006 * proc handler for /proc/sys/kernel/nmi
1008 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1009 void __user *buffer, size_t *length, loff_t *ppos)
1013 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1014 old_state = nmi_watchdog_enabled;
1015 proc_dointvec(table, write, file, buffer, length, ppos);
1016 if (!!old_state == !!nmi_watchdog_enabled)
1019 if (atomic_read(&nmi_active) < 0) {
1020 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1024 if (nmi_watchdog == NMI_DEFAULT) {
1025 if (nmi_known_cpu() > 0)
1026 nmi_watchdog = NMI_LOCAL_APIC;
1028 nmi_watchdog = NMI_IO_APIC;
1031 if (nmi_watchdog == NMI_LOCAL_APIC) {
1032 if (nmi_watchdog_enabled)
1033 enable_lapic_nmi_watchdog();
1035 disable_lapic_nmi_watchdog();
1037 printk( KERN_WARNING
1038 "NMI watchdog doesn't know what hardware to touch\n");
1046 void __trigger_all_cpu_backtrace(void)
1050 backtrace_mask = cpu_online_map;
1051 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1052 for (i = 0; i < 10 * 1000; i++) {
1053 if (cpus_empty(backtrace_mask))
1059 EXPORT_SYMBOL(nmi_active);
1060 EXPORT_SYMBOL(nmi_watchdog);
1061 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1062 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1063 EXPORT_SYMBOL(reserve_perfctr_nmi);
1064 EXPORT_SYMBOL(release_perfctr_nmi);
1065 EXPORT_SYMBOL(reserve_evntsel_nmi);
1066 EXPORT_SYMBOL(release_evntsel_nmi);
1067 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1068 EXPORT_SYMBOL(enable_timer_nmi_watchdog);