X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Ftraps.c;h=0b9d741f80d2ce492a166e342994c18b4ad3605c;hb=34a75f0025b9cf803b6a88db032e6ad6950c9313;hp=f031de0588089595d4cdee4c11e0df35c407b455;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index f031de058..0b9d741f8 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -25,7 +25,9 @@ #include #include #include -#include +#include +#include +#include #ifdef CONFIG_EISA #include @@ -47,17 +49,15 @@ #include #include -#include #include +#include -#include #include +#include #include "mach_traps.h" asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; @@ -93,58 +93,88 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; +ATOMIC_NOTIFIER_HEAD(i386die_chain); -static int valid_stack_ptr(struct task_struct *task, void *p) +extern char last_sysfs_file[]; + +int register_die_notifier(struct notifier_block *nb) { - if (p <= (void *)task->thread_info) - return 0; - if (kstack_end(p)) - return 0; - return 1; + vmalloc_sync_all(); + return atomic_notifier_chain_register(&i386die_chain, nb); } +EXPORT_SYMBOL(register_die_notifier); -#ifdef CONFIG_FRAME_POINTER -void print_context_stack(struct task_struct *task, unsigned long *stack, - unsigned long ebp) +int unregister_die_notifier(struct notifier_block *nb) { - unsigned long addr; + return atomic_notifier_chain_unregister(&i386die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); - while (valid_stack_ptr(task, (void *)ebp)) { - addr = *(unsigned long *)(ebp + 4); - printk(" [<%08lx>] ", addr); - print_symbol("%s", addr); - printk("\n"); - ebp = *(unsigned long *)ebp; - } +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; } + +/* + * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + */ +static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, + int printed) +{ + if (!printed) + printk(log_lvl); + +#if CONFIG_STACK_BACKTRACE_COLS == 1 + printk(" [<%08lx>] ", addr); #else -void print_context_stack(struct task_struct *task, unsigned long *stack, - unsigned long ebp) + printk(" <%08lx> ", addr); +#endif + print_symbol("%s", addr); + + printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; + if (printed) + printk(" "); + else + printk("\n"); + + return printed; +} + +static inline unsigned long print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long ebp, + char *log_lvl) { unsigned long addr; + int printed = 0; /* nr of entries already printed on current line */ - while (!kstack_end(stack)) { +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { + addr = *(unsigned long *)(ebp + 4); + printed = print_addr_and_symbol(addr, log_lvl, printed); + ebp = *(unsigned long *)ebp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; - if (kernel_text_address(addr)) { - printk(" [<%08lx>] ", addr); - print_symbol("%s\n", addr); - } + if (__kernel_text_address(addr)) + printed = print_addr_and_symbol(addr, log_lvl, printed); } -} #endif + if (printed) + printk("\n"); -void show_trace(struct task_struct *task, unsigned long * stack) + return ebp; +} + +static void show_trace_log_lvl(struct task_struct *task, + unsigned long *stack, char *log_lvl) { unsigned long ebp; if (!task) task = current; - if (!valid_stack_ptr(task, stack)) { - printk("Stack pointer is garbage, not printing trace\n"); - return; - } - if (task == current) { /* Grab ebp right from our regs */ asm ("movl %%ebp, %0" : "=r" (ebp) : ); @@ -157,16 +187,21 @@ void show_trace(struct task_struct *task, unsigned long * stack) struct thread_info *context; context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - print_context_stack(task, stack, ebp); + ebp = print_context_stack(context, stack, ebp, log_lvl); stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk(" =======================\n"); + printk("%s =======================\n", log_lvl); } - printk("\n"); } -void show_stack(struct task_struct *task, unsigned long *esp) +void show_trace(struct task_struct *task, unsigned long * stack) +{ + show_trace_log_lvl(task, stack, ""); +} + +static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, + char *log_lvl) { unsigned long *stack; int i; @@ -183,11 +218,17 @@ void show_stack(struct task_struct *task, unsigned long *esp) if (kstack_end(stack)) break; if (i && ((i % 8) == 0)) - printk("\n "); + printk("\n%s ", log_lvl); printk("%08lx ", *stack++); } - printk("\nCall Trace:\n"); - show_trace(task, esp); + printk("\n%sCall Trace:\n", log_lvl); + show_trace_log_lvl(task, esp, log_lvl); +} + +void show_stack(struct task_struct *task, unsigned long *esp) +{ + printk(" "); + show_stack_log_lvl(task, esp, ""); } /* @@ -210,48 +251,53 @@ void show_registers(struct pt_regs *regs) unsigned short ss; esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; - if (regs->xcs & 3) { + savesegment(ss, ss); + if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx" - " (%s) \n", + printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" + "EFLAGS: %08lx (%s %.*s) \n", smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, UTS_RELEASE); - print_symbol("EIP is at %s\n", regs->eip); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + print_tainted(), regs->eflags, system_utsname.release, + (int)strcspn(system_utsname.version, " "), + system_utsname.version); + print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); + printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", + printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", - current->comm, current->pid, current_thread_info(), current); + printk(KERN_EMERG "Process %s (pid: %d[#%u], threadinfo=%p task=%p)", + current->comm, current->pid, current->xid, + current_thread_info(), current); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ if (in_kernel) { + u8 __user *eip; - printk("\nStack: "); - show_stack(NULL, (unsigned long*)esp); + printk("\n" KERN_EMERG "Stack: "); + show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); - printk("Code: "); - if(regs->eip < PAGE_OFFSET) - goto bad; + printk(KERN_EMERG "Code: "); - for(i=0;i<20;i++) - { + eip = (u8 __user *)regs->eip - 43; + for (i = 0; i < 64; i++, eip++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: + + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { printk(" Bad EIP value."); break; } - printk("%02x ", c); + if (eip == (u8 __user *)regs->eip) + printk("<%02x> ", c); + else + printk("%02x ", c); } } printk("\n"); @@ -265,106 +311,156 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (regs->xcs & 3) - goto no_bug; /* Not in kernel */ - eip = regs->eip; if (eip < PAGE_OFFSET) goto no_bug; - if (__get_user(ud2, (unsigned short *)eip)) + if (__get_user(ud2, (unsigned short __user *)eip)) goto no_bug; if (ud2 != 0x0b0f) goto no_bug; - if (__get_user(line, (unsigned short *)(eip + 2))) + if (__get_user(line, (unsigned short __user *)(eip + 2))) goto bug; - if (__get_user(file, (char **)(eip + 4)) || + if (__get_user(file, (char * __user *)(eip + 4)) || (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) file = ""; - printk("------------[ cut here ]------------\n"); - printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); + printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); no_bug: return; /* Here we know it was a BUG but file-n-line is unavailable */ bug: - printk("Kernel BUG\n"); + printk(KERN_EMERG "Kernel BUG\n"); } -spinlock_t die_lock = SPIN_LOCK_UNLOCKED; - +/* This is gone through when something in the kernel + * has done something bad and is about to be terminated. +*/ void die(const char * str, struct pt_regs * regs, long err) { + static struct { + spinlock_t lock; + u32 lock_owner; + int lock_owner_depth; + } die = { + .lock = SPIN_LOCK_UNLOCKED, + .lock_owner = -1, + .lock_owner_depth = 0 + }; static int die_counter; - int nl = 0; + unsigned long flags; - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - handle_BUG(regs); - printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + oops_enter(); + + vxh_throw_oops(); + + if (die.lock_owner != raw_smp_processor_id()) { + console_verbose(); + spin_lock_irqsave(&die.lock, flags); + die.lock_owner = smp_processor_id(); + die.lock_owner_depth = 0; + bust_spinlocks(1); + } + else + local_save_flags(flags); + + if (++die.lock_owner_depth < 3) { + int nl = 0; + unsigned long esp; + unsigned short ss; + + handle_BUG(regs); + printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; + printk(KERN_EMERG "PREEMPT "); + nl = 1; #endif #ifdef CONFIG_SMP - printk("SMP "); - nl = 1; + if (!nl) + printk(KERN_EMERG); + printk("SMP "); + nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); - nl = 1; + if (!nl) + printk(KERN_EMERG); + printk("DEBUG_PAGEALLOC"); + nl = 1; #endif - if (nl) - printk("\n"); - show_registers(regs); + if (nl) + printk("\n"); +#ifdef CONFIG_SYSFS + printk(KERN_ALERT "last sysfs file: %s\n", last_sysfs_file); +#endif + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { + show_registers(regs); + vxh_dump_history(); + /* Executive summary in case the oops scrolled away */ + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + if (user_mode(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + } + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip); + print_symbol("%s", regs->eip); + printk(" SS:ESP %04x:%08lx\n", ss, esp); + } + else + regs = NULL; + } else + printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); + bust_spinlocks(0); - spin_unlock_irq(&die_lock); + die.lock_owner = -1; + spin_unlock_irqrestore(&die.lock, flags); + + if (!regs) + return; + + if (kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) { printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(5 * HZ); + ssleep(5); panic("Fatal exception"); } + oops_exit(); do_exit(SIGSEGV); } static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) + if (!user_mode_vm(regs)) die(str, regs, err); } -static inline unsigned long get_cr2(void) +static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, + struct pt_regs * regs, long error_code, + siginfo_t *info) { - unsigned long address; - - /* get the address */ - __asm__("movl %%cr2,%0":"=r" (address)); - return address; -} + struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; -static inline void do_trap(int trapnr, int signr, char *str, int vm86, - struct pt_regs * regs, long error_code, siginfo_t *info) -{ if (regs->eflags & VM_MASK) { if (vm86) goto vm86_trap; goto trap_signal; } - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto kernel_trap; trap_signal: { - struct task_struct *tsk = current; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; if (info) force_sig_info(signr, info, tsk); else @@ -386,80 +482,222 @@ static inline void do_trap(int trapnr, int signr, char *str, int vm86, } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ - info.si_addr = (void *)siaddr; \ + info.si_addr = (void __user *)siaddr; \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ } #define DO_VM86_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ } #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ - info.si_addr = (void *)siaddr; \ + info.si_addr = (void __user *)siaddr; \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ } DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +#ifndef CONFIG_KPROBES DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) +#endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2()) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) + -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +/* + * lazy-check for CS validity on exec-shield binaries: + * + * the original non-exec stack patch was written by + * Solar Designer . Thanks! + */ +static int +check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code) { - if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); - + struct desc_struct *desc1, *desc2; + struct vm_area_struct *vma; + unsigned long limit; + + if (current->mm == NULL) + return 0; + + limit = -1UL; + if (current->mm->context.exec_limit != -1UL) { + limit = PAGE_SIZE; + spin_lock(¤t->mm->page_table_lock); + for (vma = current->mm->mmap; vma; vma = vma->vm_next) + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) + limit = vma->vm_end; + spin_unlock(¤t->mm->page_table_lock); + if (limit >= TASK_SIZE) + limit = -1UL; + current->mm->context.exec_limit = limit; + } + set_user_cs(¤t->mm->context.user_cs, limit); + + desc1 = ¤t->mm->context.user_cs; + desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS; + + if (desc1->a != desc2->a || desc1->b != desc2->b) { + /* + * The CS was not in sync - reload it and retry the + * instruction. If the instruction still faults then + * we won't hit this branch next time around. + */ + if (print_fatal_signals >= 2) { + printk("#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id()); + printk(" exec_limit: %08lx, user_cs: %08lx/%08lx, CPU_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, desc1->a, desc1->b, desc2->a, desc2->b); + } + load_user_cs_desc(cpu, current->mm); + return 1; + } + + return 0; +} + +/* + * The fixup code for errors in iret jumps to here (iret_exc). It loses + * the original trap number and error code. The bogus trap 32 and error + * code 0 are what the vanilla kernel delivers via: + * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) + * + * In case of a general protection fault in the iret instruction, we + * need to check for a lazy CS update for exec-shield. + */ +fastcall void do_iret_error(struct pt_regs *regs, long error_code) +{ + int ok = check_lazy_exec_limit(get_cpu(), regs, error_code); + put_cpu(); + if (!ok && notify_die(DIE_TRAP, "iret exception", regs, + error_code, 32, SIGSEGV) != NOTIFY_STOP) { + siginfo_t info; + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = ILL_BADSTK; + info.si_addr = 0; + do_trap(32, SIGSEGV, "iret exception", 0, regs, error_code, + &info); + } +} + +fastcall void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) +{ + int cpu = get_cpu(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct thread_struct *thread = ¤t->thread; + int ok; + + /* + * Perform the lazy TSS's I/O bitmap copy. If the TSS has an + * invalid offset set (the LAZY one) and the faulting thread has + * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS + * and we set the offset field correctly. Then we let the CPU to + * restart the faulting instruction. + */ + if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && + thread->io_bitmap_ptr) { + memcpy(tss->io_bitmap, thread->io_bitmap_ptr, + thread->io_bitmap_max); + /* + * If the previously set map was extending to higher ports + * than the current one, pad extra space with 0xff (no access). + */ + if (thread->io_bitmap_max < tss->io_bitmap_max) + memset((char *) tss->io_bitmap + + thread->io_bitmap_max, 0xff, + tss->io_bitmap_max - thread->io_bitmap_max); + tss->io_bitmap_max = thread->io_bitmap_max; + tss->io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; + put_cpu(); + return; + } + + current->thread.error_code = error_code; + current->thread.trap_no = 13; + if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto gp_in_kernel; + ok = check_lazy_exec_limit(cpu, regs, error_code); + + put_cpu(); + + if (ok) + return; + + if (print_fatal_signals) { + printk("#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id()); + printk(" exec_limit: %08lx, user_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, current->mm->context.user_cs.a, current->mm->context.user_cs.b); + } + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); return; gp_in_vm86: + put_cpu(); local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; gp_in_kernel: - if (!fixup_exception(regs)) + put_cpu(); + if (!fixup_exception(regs)) { + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; die("general protection fault", regs, error_code); + } } static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); - printk("You probably have a hardware problem with your RAM chips\n"); + printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " + "to continue\n"); + printk(KERN_EMERG "You probably have a hardware problem with your RAM " + "chips\n"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); @@ -469,7 +707,7 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) { unsigned long i; - printk("NMI: IOCK error (debug interrupt?)\n"); + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); /* Re-enable the IOCK line, wait for a few seconds */ @@ -497,11 +735,52 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) printk("Do you have a strange power saving mode enabled?\n"); } +static DEFINE_SPINLOCK(nmi_print_lock); + +void die_nmi (struct pt_regs *regs, const char *msg) +{ + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == + NOTIFY_STOP) + return; + + spin_lock(&nmi_print_lock); + /* + * We are in trouble anyway, lets at least try + * to get a message out. + */ + bust_spinlocks(1); + printk(KERN_EMERG "%s", msg); + printk(" on CPU%d, eip %08lx, registers:\n", + smp_processor_id(), regs->eip); + show_registers(regs); + printk(KERN_EMERG "console shuts up ...\n"); + console_silent(); + spin_unlock(&nmi_print_lock); + bust_spinlocks(0); + + /* If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can. + */ + if (!user_mode_vm(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + + do_exit(SIGSEGV); +} + static void default_do_nmi(struct pt_regs * regs) { - unsigned char reason = get_nmi_reason(); + unsigned char reason = 0; + + /* Only the BSP gets external NMIs from the system. */ + if (!smp_processor_id()) + reason = get_nmi_reason(); if (!(reason & 0xc0)) { + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; #ifdef CONFIG_X86_LOCAL_APIC /* * Ok, so this is none of the documented NMI sources, @@ -515,6 +794,8 @@ static void default_do_nmi(struct pt_regs * regs) unknown_nmi_error(reason, regs); return; } + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + return; if (reason & 0x80) mem_parity_error(reason, regs); if (reason & 0x40) @@ -533,16 +814,17 @@ static int dummy_nmi_callback(struct pt_regs * regs, int cpu) static nmi_callback_t nmi_callback = dummy_nmi_callback; -asmlinkage void do_nmi(struct pt_regs * regs, long error_code) +fastcall void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); cpu = smp_processor_id(); + ++nmi_count(cpu); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); @@ -550,13 +832,29 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + vmalloc_sync_all(); + rcu_assign_pointer(nmi_callback, callback); } +EXPORT_SYMBOL_GPL(set_nmi_callback); void unset_nmi_callback(void) { nmi_callback = dummy_nmi_callback; } +EXPORT_SYMBOL_GPL(unset_nmi_callback); + +#ifdef CONFIG_KPROBES +fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; + /* This is an interrupt gate, because kprobes wants interrupts + disabled. Normal trap handlers don't. */ + restore_interrupts(regs); + do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); +} +#endif /* * Our handling of the processor debug registers is non-trivial. @@ -580,14 +878,16 @@ void unset_nmi_callback(void) * find every occurrence of the TF bit that could be saved away even * by user code) */ -asmlinkage void do_debug(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) { unsigned int condition; struct task_struct *tsk = current; - siginfo_t info; - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); + if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, + SIGTRAP) == NOTIFY_STOP) + return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); @@ -604,44 +904,28 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) /* Save debug status register where ptrace can see it */ tsk->thread.debugreg[6] = condition; - /* Mask out spurious TF errors due to lazy TF clearing */ + /* + * Single-stepping through TF: make sure we ignore any events in + * kernel space (but re-enable TF when returning to user mode). + */ if (condition & DR_STEP) { /* - * The TF error should be masked out only if the current - * process is not traced and if the TRAP flag has been set - * previously by a tracing process (condition detected by - * the PT_DTRACE flag); remember that the i386 TRAP flag - * can be modified by the process itself in user mode, - * allowing programs to debug themselves without the ptrace() - * interface. + * We already checked v86 mode above, so we can + * check for kernel mode by just checking the CPL + * of CS. */ - if ((regs->xcs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; } /* Ok, finally something we can handle */ - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ - info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : - (void *)regs->eip; - force_sig_info(SIGTRAP, &info, tsk); + send_sigtrap(tsk, regs, error_code); /* Disable additional traps. They'll be re-enabled when * the signal is delivered. */ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); + set_debugreg(0, 7); return; debug_vm86: @@ -650,7 +934,6 @@ debug_vm86: clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); -clear_TF: regs->eflags &= ~TF_MASK; return; } @@ -660,7 +943,7 @@ clear_TF: * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(void *eip) +void math_error(void __user *eip) { struct task_struct * task; siginfo_t info; @@ -689,15 +972,18 @@ void math_error(void *eip) */ cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { - case 0x000: - default: + switch (swd & ~cwd & 0x3f) { + case 0x000: /* No unmasked exception */ + return; + default: /* Multiple exceptions */ break; case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ info.si_code = FPE_FLTINV; - /* Should we clear the SF or let user space do it ???? */ break; case 0x002: /* Denormalize */ case 0x010: /* Underflow */ @@ -716,13 +1002,13 @@ void math_error(void *eip) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) +fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) { ignore_fpu_irq = 1; - math_error((void *)regs->eip); + math_error((void __user *)regs->eip); } -void simd_math_error(void *eip) +static void simd_math_error(void __user *eip) { struct task_struct * task; siginfo_t info; @@ -770,13 +1056,13 @@ void simd_math_error(void *eip) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, +fastcall void do_simd_coprocessor_error(struct pt_regs * regs, long error_code) { if (cpu_has_xmm) { /* Handle SIMD FPU exceptions on PIII+ processors. */ ignore_fpu_irq = 1; - simd_math_error((void *)regs->eip); + simd_math_error((void __user *)regs->eip); } else { /* * Handle strange cache flush from user space exception @@ -787,14 +1073,14 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, error_code); return; } - die_if_kernel("cache flush denied", regs, error_code); current->thread.trap_no = 19; current->thread.error_code = error_code; + die_if_kernel("cache flush denied", regs, error_code); force_sig(SIGSEGV, current); } } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, +fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, long error_code) { #if 0 @@ -803,6 +1089,51 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, #endif } +fastcall void setup_x86_bogus_stack(unsigned char * stk) +{ + unsigned long *switch16_ptr, *switch32_ptr; + struct pt_regs *regs; + unsigned long stack_top, stack_bot; + unsigned short iret_frame16_off; + int cpu = smp_processor_id(); + /* reserve the space on 32bit stack for the magic switch16 pointer */ + memmove(stk, stk + 8, sizeof(struct pt_regs)); + switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); + regs = (struct pt_regs *)stk; + /* now the switch32 on 16bit stack */ + stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); + stack_top = stack_bot + CPU_16BIT_STACK_SIZE; + switch32_ptr = (unsigned long *)(stack_top - 8); + iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; + /* copy iret frame on 16bit stack */ + memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); + /* fill in the switch pointers */ + switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; + switch16_ptr[1] = __ESPFIX_SS; + switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + + 8 - CPU_16BIT_STACK_SIZE; + switch32_ptr[1] = __KERNEL_DS; +} + +fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) +{ + unsigned long *switch32_ptr; + unsigned char *stack16, *stack32; + unsigned long stack_top, stack_bot; + int len; + int cpu = smp_processor_id(); + stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); + stack_top = stack_bot + CPU_16BIT_STACK_SIZE; + switch32_ptr = (unsigned long *)(stack_top - 8); + /* copy the data from 16bit stack to 32bit stack */ + len = CPU_16BIT_STACK_SIZE - 8 - sp; + stack16 = (unsigned char *)(stack_bot + sp); + stack32 = (unsigned char *) + (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); + memcpy(stack32, stack16, len); + return stack32; +} + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task @@ -819,7 +1150,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) struct task_struct *tsk = thread->task; clts(); /* Allow maths ops (or we recurse) */ - if (!tsk->used_math) + if (!tsk_used_math(tsk)) init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ @@ -829,8 +1160,8 @@ asmlinkage void math_state_restore(struct pt_regs regs) asmlinkage void math_emulate(long arg) { - printk("math-emulation not enabled and no coprocessor found.\n"); - printk("killing %s.\n",current->comm); + printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); + printk(KERN_EMERG "killing %s.\n",current->comm); force_sig(SIGFPE,current); schedule(); } @@ -847,7 +1178,7 @@ void __init trap_init_f00f_bug(void) * it uses the read-only mapped virtual address. */ idt_descr.address = fix_to_virt(FIX_F00F_IDT); - __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); + load_idt(&idt_descr); } #endif @@ -876,6 +1207,14 @@ void set_intr_gate(unsigned int n, void *addr) _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); } +/* + * This routine sets up an interrupt gate at directory privilege level 3. + */ +static inline void set_system_intr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); +} + static void __init set_trap_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); @@ -886,11 +1225,6 @@ static void __init set_system_gate(unsigned int n, void *addr) _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); } -static void __init set_call_gate(void *a, void *addr) -{ - _set_gate(a,12,3,addr,__KERNEL_CS); -} - static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); @@ -900,9 +1234,11 @@ static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) void __init trap_init(void) { #ifdef CONFIG_EISA - if (isa_readl(0x0FFFD9) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { + void __iomem *p = ioremap(0x0FFFD9, 4); + if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { EISA_bus = 1; } + iounmap(p); #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -912,9 +1248,9 @@ void __init trap_init(void) set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); - set_system_gate(3,&int3); /* int3-5 can be called from all */ + set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ set_system_gate(4,&overflow); - set_system_gate(5,&bounds); + set_trap_gate(5,&bounds); set_trap_gate(6,&invalid_op); set_trap_gate(7,&device_not_available); set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); @@ -932,14 +1268,29 @@ void __init trap_init(void) #endif set_trap_gate(19,&simd_coprocessor_error); - set_system_gate(SYSCALL_VECTOR,&system_call); + if (cpu_has_fxsr) { + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + * Generates a compile-time "error: zero width for bit-field" if + * the alignment is wrong. + */ + struct fxsrAlignAssert { + int _:!(offsetof(struct task_struct, + thread.i387.fxsave) & 15); + }; + + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } + if (cpu_has_xmm) { + printk(KERN_INFO "Enabling unmasked SIMD FPU exception " + "support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + } - /* - * default LDT is a single-entry callgate to lcall7 for iBCS - * and a callgate to lcall27 for Solaris/x86 binaries - */ - set_call_gate(&default_ldt[0],lcall7); - set_call_gate(&default_ldt[4],lcall27); + set_system_gate(SYSCALL_VECTOR,&system_call); /* * Should be a barrier for any external CPU state. @@ -948,3 +1299,10 @@ void __init trap_init(void) trap_init_hook(); } + +static int __init kstack_setup(char *s) +{ + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("kstack=", kstack_setup);