X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fmca.c;h=0ee30321dfd6ab1f50f65f55e22919809c84a43e;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=e90caec1bd27b2dc97f097c395a5d0a5ec977940;hpb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;p=linux-2.6.git diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index e90caec1b..0ee30321d 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -54,6 +54,9 @@ * * 2005-10-07 Keith Owens * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto + * Add printing support for MCA/INIT. */ #include #include @@ -79,6 +82,7 @@ #include #include #include +#include #include #include @@ -136,11 +140,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init __initdata; +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk(temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk(temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + spin_lock_init(&mlogbuf_rlock); + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} +EXPORT_SYMBOL(ia64_mlogbuf_finish); + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} static void inline ia64_mca_spin(const char *func) { - printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); while (1) cpu_relax(); } @@ -221,7 +389,7 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) { sal_log_record_header_t *log_buffer; u64 total_len = 0; - int s; + unsigned long s; IA64_LOG_LOCK(sal_info_type); @@ -332,7 +500,7 @@ int cpe_vector = -1; int ia64_cpe_irq = -1; static irqreturn_t -ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_cpe_int_handler (int cpe_irq, void *arg) { static unsigned long cpe_history[CPE_HISTORY_LENGTH]; static int index; @@ -344,9 +512,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CPE error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); - spin_lock(&cpe_history_lock); if (!cpe_poll_enabled && cpe_vector >= 0) { @@ -375,7 +540,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cpe_history[index++] = now; if (index == CPE_HISTORY_LENGTH) @@ -383,6 +548,10 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cpe_history_lock); +out: + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + return IRQ_HANDLED; } @@ -510,7 +679,7 @@ ia64_mca_cmc_vector_enable (void *dummy) * disable the cmc interrupt vector. */ static void -ia64_mca_cmc_vector_disable_keventd(void *unused) +ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) { on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); } @@ -522,7 +691,7 @@ ia64_mca_cmc_vector_disable_keventd(void *unused) * enable the cmc interrupt vector. */ static void -ia64_mca_cmc_vector_enable_keventd(void *unused) +ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) { on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); } @@ -576,7 +745,7 @@ ia64_mca_wakeup_all(void) * Outputs : None */ static irqreturn_t -ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) +ia64_mca_rendez_int_handler(int rendez_irq, void *arg) { unsigned long flags; int cpu = smp_processor_id(); @@ -585,8 +754,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) /* Mask all interrupts */ local_irq_save(flags); - if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) + if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(), + (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; @@ -595,16 +764,16 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) */ ia64_sal_mc_rendez(); - if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) + if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(), + (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) + if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(), + (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); /* Enable all interrupts */ @@ -623,12 +792,11 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) * * Inputs : wakeup_irq (Wakeup-interrupt bit) * arg (Interrupt handler specific argument) - * ptregs (Exception frame at the time of the interrupt) * Outputs : None * */ static irqreturn_t -ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg) { return IRQ_HANDLED; } @@ -988,18 +1156,22 @@ ia64_wait_for_slaves(int monarch, const char *type) } if (!missing) goto all_in; - printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) - printk(" %d", c); + mprintk(" %d", c); } - printk("\n"); + mprintk("\n"); return; all_in: - printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); return; } @@ -1027,10 +1199,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", - sos->proc_state_param, cpu, sos->monarch); + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; @@ -1066,6 +1236,13 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, rh->severity = sal_log_severity_corrected; ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; + } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); +#ifdef CONFIG_KEXEC + atomic_set(&kdump_in_progress, 1); + monarch_cpu = -1; +#endif } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) @@ -1075,8 +1252,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, monarch_cpu = -1; } -static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); -static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL); +static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); +static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); /* * ia64_mca_cmc_int_handler @@ -1088,13 +1265,12 @@ static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL); * Inputs * interrupt number * client data arg ptr - * saved registers ptr * * Outputs * None */ static irqreturn_t -ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_cmc_int_handler(int cmc_irq, void *arg) { static unsigned long cmc_history[CMC_HISTORY_LENGTH]; static int index; @@ -1106,9 +1282,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CMC error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); - spin_lock(&cmc_history_lock); if (!cmc_polling_enabled) { int i, count = 1; /* we know 1 happened now */ @@ -1141,7 +1314,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cmc_history[index++] = now; if (index == CMC_HISTORY_LENGTH) @@ -1149,6 +1322,10 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cmc_history_lock); +out: + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + return IRQ_HANDLED; } @@ -1162,12 +1339,11 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) * Inputs * interrupt number * client data arg ptr - * saved registers ptr * Outputs * handled */ static irqreturn_t -ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_cmc_int_caller(int cmc_irq, void *arg) { static int start_count = -1; unsigned int cpuid; @@ -1178,7 +1354,7 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs) if (start_count == -1) start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); - ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs); + ia64_mca_cmc_int_handler(cmc_irq, arg); for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); @@ -1229,14 +1405,13 @@ ia64_mca_cmc_poll (unsigned long dummy) * Inputs * interrupt number * client data arg ptr - * saved registers ptr * Outputs * handled */ #ifdef CONFIG_ACPI static irqreturn_t -ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_cpe_int_caller(int cpe_irq, void *arg) { static int start_count = -1; static int poll_time = MIN_CPE_POLL_INTERVAL; @@ -1248,7 +1423,7 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) if (start_count == -1) start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); - ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs); + ia64_mca_cpe_int_handler(cpe_irq, arg); for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); @@ -1305,6 +1480,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi struct task_struct *g, *t; if (val != DIE_INIT_MONARCH_PROCESS) return NOTIFY_DONE; + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + printk(KERN_ERR "Processes interrupted by INIT -"); for_each_online_cpu(c) { struct ia64_sal_os_state *s; @@ -1326,6 +1510,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi } while_each_thread (g, t); read_unlock(&tasklist_lock); } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); return NOTIFY_DONE; } @@ -1357,12 +1543,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); @@ -1375,7 +1558,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { - printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", __FUNCTION__, cpu); atomic_dec(&slaves); sos->monarch = 1; @@ -1387,7 +1570,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { - printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", __FUNCTION__, cpu); atomic_dec(&monarchs); sos->monarch = 0; @@ -1408,7 +1591,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("Slave on cpu %d returning to normal service.\n", cpu); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; atomic_dec(&slaves); @@ -1426,7 +1609,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * same serial line, the user will need some time to switch out of the BMC before * the dump begins. */ - printk("Delaying for 5 seconds...\n"); + mprintk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu, "INIT"); /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through @@ -1439,7 +1622,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); monarch_cpu = -1;