vserver 1.9.3
[linux-2.6.git] / arch / i386 / kernel / traps.c
index a3daea0..e888902 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
 #include <linux/version.h>
+#include <linux/kprobes.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -48,6 +49,7 @@
 
 #include <asm/smp.h>
 #include <asm/arch_hooks.h>
+#include <asm/kdebug.h>
 
 #include <linux/irq.h>
 #include <linux/module.h>
@@ -92,6 +94,18 @@ asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
 static int kstack_depth_to_print = 24;
+struct notifier_block *i386die_chain;
+static spinlock_t die_notifier_lock = SPIN_LOCK_UNLOCKED;
+
+int register_die_notifier(struct notifier_block *nb)
+{
+       int err = 0;
+       unsigned long flags;
+       spin_lock_irqsave(&die_notifier_lock, flags);
+       err = notifier_chain_register(&i386die_chain, nb);
+       spin_unlock_irqrestore(&die_notifier_lock, flags);
+       return err;
+}
 
 static int valid_stack_ptr(struct task_struct *task, void *p)
 {
@@ -216,7 +230,7 @@ void show_registers(struct pt_regs *regs)
                ss = regs->xss & 0xffff;
        }
        print_modules();
-       printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s\nEFLAGS: %08lx"
+       printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\nEFLAGS: %08lx"
                        "   (%s) \n",
                smp_processor_id(), 0xffff & regs->xcs, regs->eip,
                print_tainted(), regs->eflags, UTS_RELEASE);
@@ -234,23 +248,25 @@ void show_registers(struct pt_regs *regs)
         * time of the fault..
         */
        if (in_kernel) {
+               u8 *eip;
 
                printk("\nStack: ");
                show_stack(NULL, (unsigned long*)esp);
 
                printk("Code: ");
-               if(regs->eip < PAGE_OFFSET)
-                       goto bad;
 
-               for(i=0;i<20;i++)
-               {
+               eip = (u8 *)regs->eip - 43;
+               for (i = 0; i < 64; i++, eip++) {
                        unsigned char c;
-                       if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
-bad:
+
+                       if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
                                printk(" Bad EIP value.");
                                break;
                        }
-                       printk("%02x ", c);
+                       if (eip == (u8 *)regs->eip)
+                               printk("<%02x> ", c);
+                       else
+                               printk("%02x ", c);
                }
        }
        printk("\n");
@@ -292,35 +308,53 @@ bug:
        printk("Kernel BUG\n");
 }
 
-spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
-
 void die(const char * str, struct pt_regs * regs, long err)
 {
+       static struct {
+               spinlock_t lock;
+               u32 lock_owner;
+               int lock_owner_depth;
+       } die = {
+               .lock =                 SPIN_LOCK_UNLOCKED,
+               .lock_owner =           -1,
+               .lock_owner_depth =     0
+       };
        static int die_counter;
-       int nl = 0;
 
-       console_verbose();
-       spin_lock_irq(&die_lock);
-       bust_spinlocks(1);
-       handle_BUG(regs);
-       printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+       if (die.lock_owner != smp_processor_id()) {
+               console_verbose();
+               spin_lock_irq(&die.lock);
+               die.lock_owner = smp_processor_id();
+               die.lock_owner_depth = 0;
+               bust_spinlocks(1);
+       }
+
+       if (++die.lock_owner_depth < 3) {
+               int nl = 0;
+               handle_BUG(regs);
+               printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
 #ifdef CONFIG_PREEMPT
-       printk("PREEMPT ");
-       nl = 1;
+               printk("PREEMPT ");
+               nl = 1;
 #endif
 #ifdef CONFIG_SMP
-       printk("SMP ");
-       nl = 1;
+               printk("SMP ");
+               nl = 1;
 #endif
 #ifdef CONFIG_DEBUG_PAGEALLOC
-       printk("DEBUG_PAGEALLOC");
-       nl = 1;
+               printk("DEBUG_PAGEALLOC");
+               nl = 1;
 #endif
-       if (nl)
-               printk("\n");
-       show_registers(regs);
+               if (nl)
+                       printk("\n");
+       notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
+               show_registers(regs);
+       } else
+               printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
        bust_spinlocks(0);
-       spin_unlock_irq(&die_lock);
+       die.lock_owner = -1;
+       spin_unlock_irq(&die.lock);
        if (in_interrupt())
                panic("Fatal exception in interrupt");
 
@@ -387,6 +421,9 @@ static inline void do_trap(int trapnr, int signr, char *str, int vm86,
 #define DO_ERROR(trapnr, signr, str, name) \
 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 { \
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+                                               == NOTIFY_STOP) \
+               return; \
        do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
 }
 
@@ -398,12 +435,18 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
        info.si_errno = 0; \
        info.si_code = sicode; \
        info.si_addr = (void __user *)siaddr; \
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+                                               == NOTIFY_STOP) \
+               return; \
        do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
 }
 
 #define DO_VM86_ERROR(trapnr, signr, str, name) \
 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 { \
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+                                               == NOTIFY_STOP) \
+               return; \
        do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
 }
 
@@ -415,11 +458,16 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
        info.si_errno = 0; \
        info.si_code = sicode; \
        info.si_addr = (void __user *)siaddr; \
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+                                               == NOTIFY_STOP) \
+               return; \
        do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
 }
 
 DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+#ifndef CONFIG_KPROBES
 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
+#endif
 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
 DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
@@ -427,13 +475,40 @@ DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
 {
-       if (regs->eflags & X86_EFLAGS_IF)
-               local_irq_enable();
+       int cpu = get_cpu();
+       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct thread_struct *thread = &current->thread;
+
+       /*
+        * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+        * invalid offset set (the LAZY one) and the faulting thread has
+        * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
+        * and we set the offset field correctly. Then we let the CPU to
+        * restart the faulting instruction.
+        */
+       if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+           thread->io_bitmap_ptr) {
+               memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
+                      thread->io_bitmap_max);
+               /*
+                * If the previously set map was extending to higher ports
+                * than the current one, pad extra space with 0xff (no access).
+                */
+               if (thread->io_bitmap_max < tss->io_bitmap_max)
+                       memset((char *) tss->io_bitmap +
+                               thread->io_bitmap_max, 0xff,
+                               tss->io_bitmap_max - thread->io_bitmap_max);
+               tss->io_bitmap_max = thread->io_bitmap_max;
+               tss->io_bitmap_base = IO_BITMAP_OFFSET;
+               put_cpu();
+               return;
+       }
+       put_cpu();
+
        if (regs->eflags & VM_MASK)
                goto gp_in_vm86;
 
@@ -451,8 +526,12 @@ gp_in_vm86:
        return;
 
 gp_in_kernel:
-       if (!fixup_exception(regs))
+       if (!fixup_exception(regs)) {
+               if (notify_die(DIE_GPF, "general protection fault", regs,
+                               error_code, 13, SIGSEGV) == NOTIFY_STOP);
+                       return;
                die("general protection fault", regs, error_code);
+       }
 }
 
 static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
@@ -496,11 +575,35 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
        printk("Do you have a strange power saving mode enabled?\n");
 }
 
+static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+
+void die_nmi (struct pt_regs *regs, const char *msg)
+{
+       spin_lock(&nmi_print_lock);
+       /*
+       * We are in trouble anyway, lets at least try
+       * to get a message out.
+       */
+       bust_spinlocks(1);
+       printk(msg);
+       printk(" on CPU%d, eip %08lx, registers:\n",
+               smp_processor_id(), regs->eip);
+       show_registers(regs);
+       printk("console shuts up ...\n");
+       console_silent();
+       spin_unlock(&nmi_print_lock);
+       bust_spinlocks(0);
+       do_exit(SIGSEGV);
+}
+
 static void default_do_nmi(struct pt_regs * regs)
 {
        unsigned char reason = get_nmi_reason();
  
        if (!(reason & 0xc0)) {
+               if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+                                                       == NOTIFY_STOP)
+                       return;
 #ifdef CONFIG_X86_LOCAL_APIC
                /*
                 * Ok, so this is none of the documented NMI sources,
@@ -514,6 +617,8 @@ static void default_do_nmi(struct pt_regs * regs)
                unknown_nmi_error(reason, regs);
                return;
        }
+       if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+               return;
        if (reason & 0x80)
                mem_parity_error(reason, regs);
        if (reason & 0x40)
@@ -557,6 +662,20 @@ void unset_nmi_callback(void)
        nmi_callback = dummy_nmi_callback;
 }
 
+#ifdef CONFIG_KPROBES
+asmlinkage int do_int3(struct pt_regs *regs, long error_code)
+{
+       if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+                       == NOTIFY_STOP)
+               return 1;
+       /* This is an interrupt gate, because kprobes wants interrupts
+       disabled.  Normal trap handlers don't. */
+       restore_interrupts(regs);
+       do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+       return 0;
+}
+#endif
+
 /*
  * Our handling of the processor debug registers is non-trivial.
  * We do not clear them on entry and exit from the kernel. Therefore
@@ -587,6 +706,9 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
 
        __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
 
+       if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+                                       SIGTRAP) == NOTIFY_STOP)
+               return;
        /* It's safe to allow irq's after DR6 has been saved */
        if (regs->eflags & X86_EFLAGS_IF)
                local_irq_enable();
@@ -875,6 +997,14 @@ void set_intr_gate(unsigned int n, void *addr)
        _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
 }
 
+/*
+ * This routine sets up an interrupt gate at directory privilege level 3.
+ */
+static inline void set_system_intr_gate(unsigned int n, void *addr)
+{
+       _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
+}
+
 static void __init set_trap_gate(unsigned int n, void *addr)
 {
        _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
@@ -911,7 +1041,7 @@ void __init trap_init(void)
        set_trap_gate(0,&divide_error);
        set_intr_gate(1,&debug);
        set_intr_gate(2,&nmi);
-       set_system_gate(3,&int3);       /* int3-5 can be called from all */
+       set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
        set_system_gate(4,&overflow);
        set_system_gate(5,&bounds);
        set_trap_gate(6,&invalid_op);