X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Ftraps.c;h=a69bcf9e9adb347b9369370ab7ac63ff1db8135c;hb=refs%2Fheads%2Fvserver;hp=72e672ef3260f0f1e5f1e237ce1fe0f37e379feb;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 72e672ef3..a69bcf9e9 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -6,15 +6,12 @@ * * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 - * - * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $ */ /* * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'entry.S'. */ -#include #include #include #include @@ -26,11 +23,17 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include +#include #include -#include #include #include #include @@ -38,17 +41,13 @@ #include #include #include - +#include #include #include #include #include #include - -#include - - -extern struct gate_struct idt_table[256]; +#include asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -71,20 +70,24 @@ asmlinkage void reserved(void); asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); -asmlinkage void call_debug(void); -struct notifier_block *die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(die_chain); +EXPORT_SYMBOL(die_chain); + +extern char last_sysfs_file[]; int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + vmalloc_sync_all(); + return atomic_notifier_chain_register(&die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&die_chain, nb); } +EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ static inline void conditional_sti(struct pt_regs *regs) { @@ -92,132 +95,285 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static int kstack_depth_to_print = 10; +static inline void preempt_conditional_sti(struct pt_regs *regs) +{ + preempt_disable(); + if (regs->eflags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_cli(struct pt_regs *regs) +{ + if (regs->eflags & X86_EFLAGS_IF) + local_irq_disable(); + /* Make sure to not schedule here because we could be running + on an exception stack. */ + preempt_enable_no_resched(); +} + +int kstack_depth_to_print = 12; #ifdef CONFIG_KALLSYMS -#include -int printk_address(unsigned long address) -{ +void printk_address(unsigned long address) +{ unsigned long offset = 0, symsize; const char *symname; char *modname; - char *delim = ":"; + char *delim = ":"; char namebuf[128]; - symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); - if (!symname) - return printk("[<%016lx>]", address); - if (!modname) + symname = kallsyms_lookup(address, &symsize, &offset, + &modname, namebuf); + if (!symname) { + printk(" [<%016lx>]\n", address); + return; + } + if (!modname) modname = delim = ""; - return printk("<%016lx>{%s%s%s%s%+ld}", - address,delim,modname,delim,symname,offset); -} + printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", + address, delim, modname, delim, symname, offset, symsize); +} #else -int printk_address(unsigned long address) -{ - return printk("[<%016lx>]", address); -} +void printk_address(unsigned long address) +{ + printk(" [<%016lx>]\n", address); +} #endif -unsigned long *in_exception_stack(int cpu, unsigned long stack) -{ - int k; +static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, + unsigned *usedp, char **idp) +{ + static char ids[][8] = { + [DEBUG_STACK - 1] = "#DB", + [NMI_STACK - 1] = "NMI", + [DOUBLEFAULT_STACK - 1] = "#DF", + [STACKFAULT_STACK - 1] = "#SS", + [MCE_STACK - 1] = "#MC", +#if DEBUG_STKSZ > EXCEPTION_STKSZ + [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" +#endif + }; + unsigned k; + + /* + * Iterate over all exception stacks, and figure out whether + * 'stack' is in one of them: + */ for (k = 0; k < N_EXCEPTION_STACKS; k++) { - struct tss_struct *tss = &per_cpu(init_tss, cpu); - unsigned long end = tss->ist[k] + EXCEPTION_STKSZ; + unsigned long end = per_cpu(orig_ist, cpu).ist[k]; + /* + * Is 'stack' above this exception frame's end? + * If yes then skip to the next frame. + */ + if (stack >= end) + continue; + /* + * Is 'stack' above this exception frame's start address? + * If yes then we found the right frame. + */ + if (stack >= end - EXCEPTION_STKSZ) { + /* + * Make sure we only iterate through an exception + * stack once. If it comes up for the second time + * then there's something wrong going on - just + * break out and return NULL: + */ + if (*usedp & (1U << k)) + break; + *usedp |= 1U << k; + *idp = ids[k]; + return (unsigned long *)end; + } + /* + * If this is a debug stack, and if it has a larger size than + * the usual exception stacks, then 'stack' might still + * be within the lower portion of the debug stack: + */ +#if DEBUG_STKSZ > EXCEPTION_STKSZ + if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { + unsigned j = N_EXCEPTION_STACKS - 1; - if (stack >= tss->ist[k] && stack <= end) + /* + * Black magic. A large debug stack is composed of + * multiple exception stack entries, which we + * iterate through now. Dont look: + */ + do { + ++j; + end -= EXCEPTION_STKSZ; + ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); + } while (stack < end - EXCEPTION_STKSZ); + if (*usedp & (1U << j)) + break; + *usedp |= 1U << j; + *idp = ids[j]; return (unsigned long *)end; + } +#endif } return NULL; -} +} + +#define MSG(txt) ops->warning(data, txt) /* * x86-64 can have upto three kernel stacks: * process stack * interrupt stack - * severe exception (double fault, nmi, stack fault) hardware stack - * Check and process them in order. + * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void show_trace(unsigned long *stack) +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { - unsigned long addr; - unsigned long *irqstack, *irqstack_end, *estack_end; - const int cpu = safe_smp_processor_id(); - int i; + void *t = (void *)tinfo; + return p > t && p < t + THREAD_SIZE - 3; +} - printk("\nCall Trace:"); - i = 0; - - estack_end = in_exception_stack(cpu, (unsigned long)stack); - if (estack_end) { - while (stack < estack_end) { - addr = *stack++; - if (__kernel_text_address(addr)) { - i += printk_address(addr); - i += printk(" "); - if (i > 50) { - printk("\n"); - i = 0; - } - } - } - i += printk(" "); - i += 7; - stack = (unsigned long *) estack_end[-2]; - } - - irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr); - irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64); - - if (stack >= irqstack && stack < irqstack_end) { - printk(" "); - while (stack < irqstack_end) { - addr = *stack++; +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, + struct stacktrace_ops *ops, void *data) +{ + const unsigned cpu = get_cpu(); + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned used = 0; + struct thread_info *tinfo; + + if (!tsk) + tsk = current; + + if (!stack) { + unsigned long dummy; + stack = &dummy; + if (tsk && tsk != current) + stack = (unsigned long *)tsk->thread.rsp; + } + + /* + * Print function call entries within a stack. 'cond' is the + * "end of stackframe" condition, that the 'stack++' + * iteration will eventually trigger. + */ +#define HANDLE_STACK(cond) \ + do while (cond) { \ + unsigned long addr = *stack++; \ + /* Use unlocked access here because except for NMIs \ + we should be already protected against module unloads */ \ + if (__kernel_text_address(addr)) { \ + /* \ + * If the address is either in the text segment of the \ + * kernel, or in the region which contains vmalloc'ed \ + * memory, it *may* be the address of a calling \ + * routine; if so, print it so that someone tracing \ + * down the cause of the crash will be able to figure \ + * out the call path that was taken. \ + */ \ + ops->address(data, addr); \ + } \ + } while (0) + + /* + * Print function call entries in all stacks, starting at the + * current stack address. If the stacks consist of nested + * exceptions + */ + for (;;) { + char *id; + unsigned long *estack_end; + estack_end = in_exception_stack(cpu, (unsigned long)stack, + &used, &id); + + if (estack_end) { + if (ops->stack(data, id) < 0) + break; + HANDLE_STACK (stack < estack_end); + ops->stack(data, ""); /* - * If the address is either in the text segment of the - * kernel, or in the region which contains vmalloc'ed - * memory, it *may* be the address of a calling - * routine; if so, print it so that someone tracing - * down the cause of the crash will be able to figure - * out the call path that was taken. + * We link to the next stack via the + * second-to-last pointer (index -2 to end) in the + * exception stack: */ - if (__kernel_text_address(addr)) { - i += printk_address(addr); - i += printk(" "); - if (i > 50) { - printk("\n "); - i = 0; - } + stack = (unsigned long *) estack_end[-2]; + continue; + } + if (irqstack_end) { + unsigned long *irqstack; + irqstack = irqstack_end - + (IRQSTACKSIZE - 64) / sizeof(*irqstack); + + if (stack >= irqstack && stack < irqstack_end) { + if (ops->stack(data, "IRQ") < 0) + break; + HANDLE_STACK (stack < irqstack_end); + /* + * We link to the next stack (which would be + * the process stack normally) the last + * pointer (index -1 to end) in the IRQ stack: + */ + stack = (unsigned long *) (irqstack_end[-1]); + irqstack_end = NULL; + ops->stack(data, "EOI"); + continue; } - } - stack = (unsigned long *) (irqstack_end[-1]); - printk(" "); - i += 7; - } - - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (__kernel_text_address(addr)) { - i += printk_address(addr); - i += printk(" "); - if (i > 50) { - printk("\n "); - i = 0; - } } + break; } + + /* + * This handles the process stack: + */ + tinfo = task_thread_info(tsk); + HANDLE_STACK (valid_stack_ptr(tinfo, stack)); +#undef HANDLE_STACK + put_cpu(); +} +EXPORT_SYMBOL(dump_trace); + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + print_symbol(msg, symbol); printk("\n"); } -void show_stack(struct task_struct *tsk, unsigned long * rsp) +static void print_trace_warning(void *data, char *msg) +{ + printk("%s\n", msg); +} + +static int print_trace_stack(void *data, char *name) +{ + printk(" <%s> ", name); + return 0; +} + +static void print_trace_address(void *data, unsigned long addr) +{ + printk_address(addr); +} + +static struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +void +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) +{ + printk("\nCall Trace:\n"); + dump_trace(tsk, regs, stack, &print_trace_ops, NULL); + printk("\n"); +} + +static void +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) { unsigned long *stack; int i; - const int cpu = safe_smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr); - unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE); + const int cpu = smp_processor_id(); + unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); // debugging aid: "show_stack(NULL, NULL);" prints the // back trace for this cpu. @@ -241,10 +397,16 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) break; } if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + printk("\n"); + printk(" %016lx", *stack++); + touch_nmi_watchdog(); } - show_trace((unsigned long *)rsp); + show_trace(tsk, regs, rsp); +} + +void show_stack(struct task_struct *tsk, unsigned long * rsp) +{ + _show_stack(tsk, NULL, rsp); } /* @@ -253,7 +415,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) void dump_stack(void) { unsigned long dummy; - show_trace(&dummy); + show_trace(NULL, NULL, &dummy); } EXPORT_SYMBOL(dump_stack); @@ -261,17 +423,18 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = (regs->cs & 3) == 0; + int in_kernel = !user_mode(regs); unsigned long rsp; - const int cpu = safe_smp_processor_id(); - struct task_struct *cur = cpu_pda[cpu].pcurrent; + const int cpu = smp_processor_id(); + struct task_struct *cur = cpu_pda(cpu)->pcurrent; rsp = regs->rsp; printk("CPU %d ", cpu); __show_regs(regs); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, cur->thread_info, cur); + printk("Process %s (pid: %d[#%u], threadinfo %p, task %p)\n", + cur->comm, cur->pid, cur->xid, + task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the @@ -280,16 +443,15 @@ void show_registers(struct pt_regs *regs) if (in_kernel) { printk("Stack: "); - show_stack(NULL, (unsigned long*)rsp); + _show_stack(NULL, regs, (unsigned long*)rsp); printk("\nCode: "); - if(regs->rip < PAGE_OFFSET) + if (regs->rip < PAGE_OFFSET) goto bad; - for(i=0;i<20;i++) - { + for (i=0; i<20; i++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->rip)[i])) { + if (__get_user(c, &((unsigned char*)regs->rip)[i])) { bad: printk(" Bad RIP value."); break; @@ -300,60 +462,67 @@ bad: printk("\n"); } -void handle_BUG(struct pt_regs *regs) -{ - struct bug_frame f; - char tmp; +int is_valid_bugaddr(unsigned long rip) +{ + unsigned short ud2; - if (regs->cs & 3) - return; - if (__copy_from_user(&f, (struct bug_frame *) regs->rip, - sizeof(struct bug_frame))) - return; - if ((unsigned long)f.filename < __PAGE_OFFSET || - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) - return; - if (__get_user(tmp, f.filename)) - f.filename = "unmapped filename"; - printk("----------- [cut here ] --------- [please bite here ] ---------\n"); - printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line); -} + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) + return 0; + return ud2 == 0x0b0f; +} + +#ifdef CONFIG_BUG void out_of_line_bug(void) { BUG(); } +EXPORT_SYMBOL(out_of_line_bug); +#endif static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; +static unsigned int die_nest_count; -void oops_begin(void) +unsigned __kprobes long oops_begin(void) { - int cpu = safe_smp_processor_id(); - /* racy, but better than risking deadlock. */ - local_irq_disable(); + int cpu = smp_processor_id(); + unsigned long flags; + + oops_enter(); + + /* racy, but better than risking deadlock. */ + local_irq_save(flags); if (!spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + spin_lock(&die_lock); } - die_owner = cpu; + die_nest_count++; + die_owner = cpu; console_verbose(); - bust_spinlocks(1); + bust_spinlocks(1); + return flags; } -void oops_end(void) +void __kprobes oops_end(unsigned long flags) { die_owner = -1; - bust_spinlocks(0); - spin_unlock(&die_lock); - local_irq_enable(); /* make sure back scroll still works */ + bust_spinlocks(0); + die_nest_count--; + if (die_nest_count) + /* We still own the lock */ + local_irq_restore(flags); + else + /* Nest count reaches zero, release the lock. */ + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) - panic("Oops"); -} + panic("Fatal exception"); + oops_exit(); +} -void __die(const char * str, struct pt_regs * regs, long err) +void __kprobes __die(const char * str, struct pt_regs * regs, long err) { static int die_counter; printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); @@ -367,73 +536,67 @@ void __die(const char * str, struct pt_regs * regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); +#ifdef CONFIG_SYSFS + printk(KERN_ALERT "last sysfs file: %s\n", last_sysfs_file); +#endif + notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); show_registers(regs); /* Executive summary in case the oops scrolled away */ printk(KERN_ALERT "RIP "); printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); + if (kexec_should_crash(current)) + crash_kexec(regs); } void die(const char * str, struct pt_regs * regs, long err) { - oops_begin(); - handle_BUG(regs); + unsigned long flags = oops_begin(); + + if (!user_mode(regs)) + report_bug(regs->rip); + __die(str, regs, err); - oops_end(); + oops_end(flags); do_exit(SIGSEGV); } -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) -{ - if (!(regs->eflags & VM_MASK) && (regs->cs == __KERNEL_CS)) - die(str, regs, err); -} -void die_nmi(char *str, struct pt_regs *regs) +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) { - oops_begin(); + unsigned long flags = oops_begin(); + /* * We are in trouble anyway, lets at least try * to get a message out. */ - printk(str, safe_smp_processor_id()); + printk(str, smp_processor_id()); show_registers(regs); - if (panic_on_timeout || panic_on_oops) - panic("nmi watchdog"); - printk("console shuts up ...\n"); - oops_end(); + if (kexec_should_crash(current)) + crash_kexec(regs); + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + oops_end(flags); + nmi_exit(); + local_irq_enable(); do_exit(SIGSEGV); } -static void do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) { - conditional_sti(regs); - -#ifdef CONFIG_CHECKING - { - unsigned long gs; - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda, - regs->rip); - } - } -#endif + struct task_struct *tsk = current; - if ((regs->cs & 3) != 0) { - struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + if (user_mode(regs)) { if (exception_trace && unhandled_signal(tsk, signr)) printk(KERN_INFO - "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", - tsk->comm, tsk->pid, str, - regs->rip,regs->rsp,error_code); + "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, str, + regs->rip, regs->rsp, error_code); - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; if (info) force_sig_info(signr, info, tsk); else @@ -446,9 +609,9 @@ static void do_trap(int trapnr, int signr, char *str, { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->rip); - if (fixup) { + if (fixup) regs->rip = fixup->fixup; - } else + else die(str, regs, error_code); return; } @@ -460,6 +623,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ } @@ -474,13 +638,14 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ } DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) DO_ERROR( 4, SIGSEGV, "overflow", overflow) DO_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip) DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) @@ -488,53 +653,51 @@ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR(18, SIGSEGV, "reserved", reserved) -#define DO_ERROR_STACK(trapnr, signr, str, name) \ -asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \ -{ \ - struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return regs; \ - if (regs->cs & 3) { \ - memcpy(pr, regs, sizeof(struct pt_regs)); \ - regs = pr; \ - } \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ - return regs; \ +/* Runs on IST stack */ +asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_TRAP, "stack segment", regs, error_code, + 12, SIGBUS) == NOTIFY_STOP) + return; + preempt_conditional_sti(regs); + do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); } -DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault) +asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) +{ + static const char str[] = "double fault"; + struct task_struct *tsk = current; + + /* Return not checked because double check cannot be ignored */ + notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 8; + + /* This is always a kernel trap and never fixable (and thus must + never return). */ + for (;;) + die(str, regs, error_code); +} + +asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { - conditional_sti(regs); + struct task_struct *tsk = current; -#ifdef CONFIG_CHECKING - { - unsigned long gs; - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - oops_in_progress++; - printk("general protection handler: wrong gs %lx expected %p\n", gs, pda); - oops_in_progress--; - } - } -#endif + conditional_sti(regs); - if ((regs->cs & 3)!=0) { - struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + if (user_mode(regs)) { if (exception_trace && unhandled_signal(tsk, SIGSEGV)) printk(KERN_INFO - "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", - tsk->comm, tsk->pid, - regs->rip,regs->rsp,error_code); + "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, + regs->rip, regs->rsp, error_code); - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; force_sig(SIGSEGV, tsk); return; } @@ -554,17 +717,25 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) } } -static void mem_parity_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); - printk("You probably have a hardware problem with your RAM chips\n"); + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", + reason); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); /* Clear and disable the memory parity error line. */ reason = (reason & 0xf) | 4; outb(reason, 0x61); } -static void io_check_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +io_check_error(unsigned char reason, struct pt_regs * regs) { printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); @@ -577,38 +748,48 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk("Dazed and confused, but trying to continue\n"); - printk("Do you have a strange power saving mode enabled?\n"); +static __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +{ + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", + reason); + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } -asmlinkage void default_do_nmi(struct pt_regs *regs) +/* Runs on IST stack. This code must keep interrupts off all the time. + Nested NMIs are prevented by the CPU. */ +asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + int cpu; + + cpu = smp_processor_id(); /* Only the BSP gets external NMIs from the system. */ - if (!smp_processor_id()) + if (!cpu) reason = get_nmi_reason(); if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; -#ifdef CONFIG_X86_LOCAL_APIC /* * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. */ - if (nmi_watchdog > 0) { - nmi_watchdog_tick(regs,reason); + if (nmi_watchdog_tick(regs,reason)) return; - } -#endif - unknown_nmi_error(reason, regs); + if (!do_nmi_callback(regs,cpu)) + unknown_nmi_error(reason, regs); + return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; /* AK: following checks seem to be broken on modern chipsets. FIXME */ @@ -617,60 +798,55 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) mem_parity_error(reason, regs); if (reason & 0x40) io_check_error(reason, regs); - - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered. - */ - outb(0x8f, 0x70); - inb(0x71); /* dummy */ - outb(0x0f, 0x70); - inb(0x71); /* dummy */ } -asmlinkage void do_int3(struct pt_regs * regs, long error_code) +/* runs on IST stack. */ +asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } + preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - return; + preempt_conditional_cli(regs); +} + +/* Help handler running on IST stack to switch back to user stack + for scheduling or signal handling. The actual stack switch is done in + entry.S */ +asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +{ + struct pt_regs *regs = eregs; + /* Did already sync */ + if (eregs == (struct pt_regs *)eregs->rsp) + ; + /* Exception from user space */ + else if (user_mode(eregs)) + regs = task_pt_regs(current); + /* Exception from kernel and interrupts are enabled. Move to + kernel process stack. */ + else if (eregs->eflags & X86_EFLAGS_IF) + regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs)); + if (eregs != regs) + *regs = *eregs; + return regs; } /* runs on IST stack. */ -asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code) +asmlinkage void __kprobes do_debug(struct pt_regs * regs, + unsigned long error_code) { - struct pt_regs *pr; unsigned long condition; struct task_struct *tsk = current; siginfo_t info; - pr = (struct pt_regs *)(current->thread.rsp0)-1; - if (regs->cs & 3) { - memcpy(pr, regs, sizeof(struct pt_regs)); - regs = pr; - } - -#ifdef CONFIG_CHECKING - { - /* RED-PEN interaction with debugger - could destroy gs */ - unsigned long gs; - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - printk("debug handler: wrong gs %lx expected %p\n", gs, pda); - } - } -#endif - - asm("movq %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) { - return regs; - } - conditional_sti(regs); + SIGTRAP) == NOTIFY_STOP) + return; + + preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -682,9 +858,7 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code) tsk->thread.debugreg6 = condition; /* Mask out spurious TF errors due to lazy TF clearing */ - if ((condition & DR_STEP) && - (notify_die(DIE_DEBUGSTEP, "debugstep", regs, condition, - 1, SIGTRAP) != NOTIFY_STOP)) { + if (condition & DR_STEP) { /* * The TF error should be masked out only if the current * process is not traced and if the TRAP flag has been set @@ -694,10 +868,8 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code) * allowing programs to debug themselves without the ptrace() * interface. */ - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; } /* Ok, finally something we can handle */ @@ -706,28 +878,21 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; - if ((regs->cs & 3) == 0) - goto clear_dr7; + info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; + force_sig_info(SIGTRAP, &info, tsk); - info.si_addr = (void __user *)regs->rip; - force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - asm volatile("movq %0,%%db7"::"r"(0UL)); - notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP); - return regs; + set_debugreg(0UL, 7); + preempt_conditional_cli(regs); + return; clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - -clear_TF: - /* RED-PEN could cause spurious errors */ - if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP) - != NOTIFY_STOP) regs->eflags &= ~TF_MASK; - return regs; + preempt_conditional_cli(regs); } -static int kernel_math_error(struct pt_regs *regs, char *str) +static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->rip); @@ -735,15 +900,10 @@ static int kernel_math_error(struct pt_regs *regs, char *str) regs->rip = fixup->fixup; return 1; } - notify_die(DIE_GPF, str, regs, 0, 16, SIGFPE); -#if 0 - /* This should be a die, but warn only for now */ + notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); + /* Illegal floating point operation in the kernel */ + current->thread.trap_no = trapnr; die(str, regs, 0); -#else - printk(KERN_DEBUG "%s: %s at ", current->comm, str); - printk_address(regs->rip); - printk("\n"); -#endif return 0; } @@ -760,8 +920,8 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) unsigned short cwd, swd; conditional_sti(regs); - if ((regs->cs & 3) == 0 && - kernel_math_error(regs, "kernel x87 math error")) + if (!user_mode(regs) && + kernel_math_error(regs, "kernel x87 math error", 16)) return; /* @@ -787,13 +947,16 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) */ cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + switch (swd & ~cwd & 0x3f) { case 0x000: default: break; case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ info.si_code = FPE_FLTINV; break; case 0x002: /* Denormalize */ @@ -826,8 +989,8 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) unsigned short mxcsr; conditional_sti(regs); - if ((regs->cs & 3) == 0 && - kernel_math_error(regs, "simd math error")) + if (!user_mode(regs) && + kernel_math_error(regs, "kernel simd math error", 19)) return; /* @@ -880,6 +1043,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } +asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) +{ +} + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task @@ -895,12 +1062,8 @@ asmlinkage void math_state_restore(void) if (!used_math()) init_fpu(me); restore_fpu_checking(&me->thread.i387.fxsave); - me->thread_info->status |= TS_USEDFPU; -} - -void do_call_debug(struct pt_regs *regs) -{ - notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); + task_thread_info(me)->status |= TS_USEDFPU; + me->fpu_counter++; } void __init trap_init(void) @@ -908,9 +1071,9 @@ void __init trap_init(void) set_intr_gate(0,÷_error); set_intr_gate_ist(1,&debug,DEBUG_STACK); set_intr_gate_ist(2,&nmi,NMI_STACK); - set_system_gate(3,&int3); - set_system_gate(4,&overflow); /* int4-5 can be called from all */ - set_system_gate(5,&bounds); + set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ + set_system_gate(4,&overflow); /* int4 can be called from all */ + set_intr_gate(5,&bounds); set_intr_gate(6,&invalid_op); set_intr_gate(7,&device_not_available); set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); @@ -932,8 +1095,6 @@ void __init trap_init(void) set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif - set_intr_gate(KDB_VECTOR, call_debug); - /* * Should be a barrier for any external CPU state. */ @@ -941,18 +1102,21 @@ void __init trap_init(void) } -/* Actual parsing is done early in setup.c. */ -static int __init oops_dummy(char *s) +static int __init oops_setup(char *s) { - panic_on_oops = 1; - return -1; + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; } -__setup("oops=", oops_dummy); +early_param("oops", oops_setup); static int __init kstack_setup(char *s) { + if (!s) + return -EINVAL; kstack_depth_to_print = simple_strtoul(s,NULL,0); return 0; } -__setup("kstack=", kstack_setup); - +early_param("kstack", kstack_setup);