X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fmca.c;h=aa76ea054571f0ac8e85e0001023539a448ecad8;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=3f2e929a207a1423b08188525e49c98b2f7ecf20;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 3f2e929a2..aa76ea054 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -82,11 +82,6 @@ # define IA64_MCA_DEBUG(fmt...) #endif -typedef struct ia64_fptr { - unsigned long fp; - unsigned long gp; -} ia64_fptr_t; - /* Used by mca_asm.S */ ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; @@ -108,6 +103,7 @@ struct ia64_mca_tlb_info ia64_mca_tlb_list[NR_CPUS]; #define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ #define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ #define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ +#define CPE_HISTORY_LENGTH 5 #define CMC_HISTORY_LENGTH 5 static struct timer_list cpe_poll_timer; @@ -129,6 +125,8 @@ static int cpe_poll_enabled = 1; extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); +static int mca_init; + /* * IA64_MCA log support */ @@ -244,7 +242,9 @@ ia64_mca_log_sal_error_record(int sal_info_type) u8 *buffer; u64 size; int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT; +#ifdef IA64_MCA_DEBUG_INFO static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; +#endif size = ia64_log_get(sal_info_type, &buffer, irq_safe); if (!size) @@ -253,7 +253,7 @@ ia64_mca_log_sal_error_record(int sal_info_type) salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); if (irq_safe) - printk(KERN_INFO "CPU %d: SAL log contains %s error record\n", + IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", smp_processor_id(), sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); @@ -267,20 +267,67 @@ ia64_mca_log_sal_error_record(int sal_info_type) */ #ifndef PLATFORM_MCA_HANDLERS +#ifdef CONFIG_ACPI + +static int cpe_vector = -1; + static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) { - IA64_MCA_DEBUG("%s: received interrupt. CPU:%d vector = %#x\n", - __FUNCTION__, smp_processor_id(), cpe_irq); + static unsigned long cpe_history[CPE_HISTORY_LENGTH]; + static int index; + static spinlock_t cpe_history_lock = SPIN_LOCK_UNLOCKED; + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __FUNCTION__, cpe_irq, smp_processor_id()); /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CMC error record and log it */ + /* Get the CPE error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + + spin_lock(&cpe_history_lock); + if (!cpe_poll_enabled && cpe_vector >= 0) { + + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CPE_HISTORY_LENGTH; i++) { + if (now - cpe_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); + if (count >= CPE_HISTORY_LENGTH) { + + cpe_poll_enabled = 1; + spin_unlock(&cpe_history_lock); + disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); + + mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); + + /* lock already released, get out now */ + return IRQ_HANDLED; + } else { + cpe_history[index++] = now; + if (index == CPE_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cpe_history_lock); return IRQ_HANDLED; } +#endif /* CONFIG_ACPI */ + static void show_min_state (pal_min_state_area_t *minstate) { @@ -492,7 +539,7 @@ ia64_mca_register_cpev (int cpev) } IA64_MCA_DEBUG("%s: corrected platform error " - "vector %#x setup and enabled\n", __FUNCTION__, cpev); + "vector %#x registered\n", __FUNCTION__, cpev); } #endif /* CONFIG_ACPI */ @@ -501,8 +548,9 @@ ia64_mca_register_cpev (int cpev) /* * ia64_mca_cmc_vector_setup * - * Setup the corrected machine check vector register in the processor and - * unmask interrupt. This function is invoked on a per-processor basis. + * Setup the corrected machine check vector register in the processor. + * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) + * This function is invoked on a per-processor basis. * * Inputs * None @@ -516,12 +564,12 @@ ia64_mca_cmc_vector_setup (void) cmcv_reg_t cmcv; cmcv.cmcv_regval = 0; - cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ + cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ cmcv.cmcv_vector = IA64_CMC_VECTOR; ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); IA64_MCA_DEBUG("%s: CPU %d corrected " - "machine check vector %#x setup and enabled.\n", + "machine check vector %#x registered.\n", __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR); IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", @@ -548,7 +596,7 @@ ia64_mca_cmc_vector_disable (void *dummy) cmcv = (cmcv_reg_t)ia64_getreg(_IA64_REG_CR_CMCV); cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ - ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval) + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); IA64_MCA_DEBUG("%s: CPU %d corrected " "machine check vector %#x disabled.\n", @@ -575,7 +623,7 @@ ia64_mca_cmc_vector_enable (void *dummy) cmcv = (cmcv_reg_t)ia64_getreg(_IA64_REG_CR_CMCV); cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ - ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval) + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); IA64_MCA_DEBUG("%s: CPU %d corrected " "machine check vector %#x enabled.\n", @@ -778,6 +826,31 @@ ia64_return_to_sal_check(int recover) } +/* Function pointer for extra MCA recovery */ +int (*ia64_mca_ucmc_extension) + (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*) + = NULL; + +int +ia64_reg_MCA_extension(void *fn) +{ + if (ia64_mca_ucmc_extension) + return 1; + + ia64_mca_ucmc_extension = fn; + return 0; +} + +void +ia64_unreg_MCA_extension(void) +{ + if (ia64_mca_ucmc_extension) + ia64_mca_ucmc_extension = NULL; +} + +EXPORT_SYMBOL(ia64_reg_MCA_extension); +EXPORT_SYMBOL(ia64_unreg_MCA_extension); + /* * ia64_mca_ucmc_handler * @@ -799,11 +872,20 @@ ia64_mca_ucmc_handler(void) { pal_processor_state_info_t *psp = (pal_processor_state_info_t *) &ia64_sal_to_os_handoff_state.proc_state_param; - int recover = psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc); + int recover; /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); + /* TLB error is only exist in this SAL error record */ + recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) + /* other error recovery */ + || (ia64_mca_ucmc_extension + && ia64_mca_ucmc_extension( + IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), + &ia64_sal_to_os_handoff_state, + &ia64_os_to_sal_handoff_state)); + /* * Wakeup all the processors which are spinning in the rendezvous * loop. @@ -901,7 +983,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) * handled */ static irqreturn_t -ia64_mca_cmc_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs) { static int start_count = -1; unsigned int cpuid; @@ -912,7 +994,7 @@ ia64_mca_cmc_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) if (start_count == -1) start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); - ia64_mca_cmc_int_handler(cpe_irq, arg, ptregs); + ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs); for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); @@ -967,11 +1049,13 @@ ia64_mca_cmc_poll (unsigned long dummy) * Outputs * handled */ +#ifdef CONFIG_ACPI + static irqreturn_t ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) { static int start_count = -1; - static int poll_time = MAX_CPE_POLL_INTERVAL; + static int poll_time = MIN_CPE_POLL_INTERVAL; unsigned int cpuid; cpuid = smp_processor_id(); @@ -989,20 +1073,30 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) } else { /* * If a log was recorded, increase our polling frequency, - * otherwise, backoff. + * otherwise, backoff or return to interrupt mode. */ if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); - } else { + } else if (cpe_vector < 0) { poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); + } else { + poll_time = MIN_CPE_POLL_INTERVAL; + + printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); + enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); + cpe_poll_enabled = 0; } + + if (cpe_poll_enabled) + mod_timer(&cpe_poll_timer, jiffies + poll_time); start_count = -1; - mod_timer(&cpe_poll_timer, jiffies + poll_time); } return IRQ_HANDLED; } +#endif /* CONFIG_ACPI */ + /* * ia64_mca_cpe_poll * @@ -1231,7 +1325,7 @@ ia64_mca_init(void) */ register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); - ia64_mca_cmc_vector_setup(); /* Setup vector on BSP & enable */ + ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ /* Setup the MCA rendezvous interrupt vector */ register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); @@ -1240,22 +1334,9 @@ ia64_mca_init(void) register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); #ifdef CONFIG_ACPI - /* Setup the CPE interrupt vector */ - { - irq_desc_t *desc; - unsigned int irq; - int cpev = acpi_request_vector(ACPI_INTERRUPT_CPEI); - - if (cpev >= 0) { - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == cpev) { - desc = irq_descp(irq); - desc->status |= IRQ_PER_CPU; - setup_irq(irq, &mca_cpe_irqaction); - } - ia64_mca_register_cpev(cpev); - } - } + /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); #endif /* Initialize the areas set aside by the OS to buffer the @@ -1267,6 +1348,7 @@ ia64_mca_init(void) ia64_log_init(SAL_INFO_TYPE_CMC); ia64_log_init(SAL_INFO_TYPE_CPE); + mca_init = 1; printk(KERN_INFO "MCA related initialization done\n"); } @@ -1283,20 +1365,46 @@ ia64_mca_init(void) static int __init ia64_mca_late_init(void) { + if (!mca_init) + return 0; + + /* Setup the CMCI/P vector and handler */ init_timer(&cmc_poll_timer); cmc_poll_timer.function = ia64_mca_cmc_poll; - /* Reset to the correct state */ + /* Unmask/enable the vector */ cmc_polling_enabled = 0; + schedule_work(&cmc_enable_work); + + IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__); +#ifdef CONFIG_ACPI + /* Setup the CPEI/P vector and handler */ init_timer(&cpe_poll_timer); cpe_poll_timer.function = ia64_mca_cpe_poll; -#ifdef CONFIG_ACPI - /* If platform doesn't support CPEI, get the timer going. */ - if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0 && cpe_poll_enabled) { - register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); - ia64_mca_cpe_poll(0UL); + { + irq_desc_t *desc; + unsigned int irq; + + if (cpe_vector >= 0) { + /* If platform supports CPEI, enable the irq. */ + cpe_poll_enabled = 0; + for (irq = 0; irq < NR_IRQS; ++irq) + if (irq_to_vector(irq) == cpe_vector) { + desc = irq_descp(irq); + desc->status |= IRQ_PER_CPU; + setup_irq(irq, &mca_cpe_irqaction); + } + ia64_mca_register_cpev(cpe_vector); + IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); + } else { + /* If platform doesn't support CPEI, get the timer going. */ + if (cpe_poll_enabled) { + ia64_mca_cpe_poll(0UL); + IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__); + } + } } #endif