X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fmce.c;h=ac085038af2924aab65b5eb82c77fdc26d5430d6;hb=refs%2Fheads%2Fvserver;hp=b8b9529fa89e0db08b90cc6d6bea4da6fa7e0afb;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index b8b9529fa..ac085038a 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -29,6 +29,8 @@ #define MISC_MCELOG_MINOR 227 #define NR_BANKS 6 +atomic_t mce_entry; + static int mce_dont_init; /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, @@ -139,8 +141,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) static int mce_available(struct cpuinfo_x86 *c) { - return test_bit(X86_FEATURE_MCE, &c->x86_capability) && - test_bit(X86_FEATURE_MCA, &c->x86_capability); + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) @@ -173,13 +174,15 @@ void do_machine_check(struct pt_regs * regs, long error_code) int i; int panicm_found = 0; + atomic_inc(&mce_entry); + if (regs) notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); if (!banks) - return; + goto out2; memset(&m, 0, sizeof(struct mce)); - m.cpu = safe_smp_processor_id(); + m.cpu = smp_processor_id(); rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); if (!(m.mcgstatus & MCG_STATUS_RIPV)) kill_it = 1; @@ -267,15 +270,44 @@ void do_machine_check(struct pt_regs * regs, long error_code) out: /* Last thing done in the machine check exception to clear state. */ wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); } +#ifdef CONFIG_X86_MCE_INTEL +/*** + * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog + * @cpu: The CPU on which the event occured. + * @status: Event status information + * + * This function should be called by the thermal interrupt after the + * event has been processed and the decision was made to log the event + * further. + * + * The status parameter will be saved to the 'status' field of 'struct mce' + * and historically has been the register value of the + * MSR_IA32_THERMAL_STATUS (Intel) msr. + */ +void mce_log_therm_throt_event(unsigned int cpu, __u64 status) +{ + struct mce m; + + memset(&m, 0, sizeof(m)); + m.cpu = cpu; + m.bank = MCE_THERMAL_BANK; + m.status = status; + rdtscll(m.tsc); + mce_log(&m); +} +#endif /* CONFIG_X86_MCE_INTEL */ + /* * Periodic polling timer for "silent" machine check errors. */ static int check_interval = 5 * 60; /* 5 minutes */ -static void mcheck_timer(void *data); -static DECLARE_WORK(mcheck_work, mcheck_timer, NULL); +static void mcheck_timer(struct work_struct *work); +static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); static void mcheck_check_cpu(void *info) { @@ -283,7 +315,7 @@ static void mcheck_check_cpu(void *info) do_machine_check(NULL, 0); } -static void mcheck_timer(void *data) +static void mcheck_timer(struct work_struct *work) { on_each_cpu(mcheck_check_cpu, NULL, 1, 1); schedule_delayed_work(&mcheck_work, check_interval * HZ); @@ -502,7 +534,7 @@ static struct miscdevice mce_log_device = { static int __init mcheck_disable(char *str) { mce_dont_init = 1; - return 0; + return 1; } /* mce=off disables machine check. Note you can reenable it later @@ -522,7 +554,7 @@ static int __init mcheck_enable(char *str) get_option(&str, &tolerant); else printk("mce= argument %s ignored. Please use /sys", str); - return 0; + return 1; } __setup("nomce", mcheck_disable); @@ -557,7 +589,7 @@ static struct sysdev_class mce_sysclass = { set_kset_name("machinecheck"), }; -static DEFINE_PER_CPU(struct sys_device, device_mce); +DEFINE_PER_CPU(struct sys_device, device_mce); /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -609,8 +641,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) return err; } -#ifdef CONFIG_HOTPLUG_CPU -static __cpuinit void mce_remove_device(unsigned int cpu) +static void mce_remove_device(unsigned int cpu) { int i; @@ -620,11 +651,11 @@ static __cpuinit void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); } -#endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int +static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -633,11 +664,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_ONLINE: mce_create_device(cpu); break; -#ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: mce_remove_device(cpu); break; -#endif } return NOTIFY_OK; } @@ -659,7 +688,7 @@ static __init int mce_init_device(void) mce_create_device(i); } - register_cpu_notifier(&mce_cpu_notifier); + register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); return err; }