2 * linux/arch/x86-64/traps.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
14 * 'Traps.c' handles hardware traps and faults after we have saved some
17 #include <linux/config.h>
18 #include <linux/sched.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/ptrace.h>
23 #include <linux/timer.h>
25 #include <linux/init.h>
26 #include <linux/delay.h>
27 #include <linux/spinlock.h>
28 #include <linux/interrupt.h>
29 #include <linux/module.h>
30 #include <linux/moduleparam.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
35 #include <asm/atomic.h>
36 #include <asm/debugreg.h>
39 #include <asm/kdebug.h>
40 #include <asm/processor.h>
43 #include <asm/pgalloc.h>
45 #include <asm/proto.h>
47 #include <linux/irq.h>
49 extern struct gate_struct idt_table[256];
51 asmlinkage void divide_error(void);
52 asmlinkage void debug(void);
53 asmlinkage void nmi(void);
54 asmlinkage void int3(void);
55 asmlinkage void overflow(void);
56 asmlinkage void bounds(void);
57 asmlinkage void invalid_op(void);
58 asmlinkage void device_not_available(void);
59 asmlinkage void double_fault(void);
60 asmlinkage void coprocessor_segment_overrun(void);
61 asmlinkage void invalid_TSS(void);
62 asmlinkage void segment_not_present(void);
63 asmlinkage void stack_segment(void);
64 asmlinkage void general_protection(void);
65 asmlinkage void page_fault(void);
66 asmlinkage void coprocessor_error(void);
67 asmlinkage void simd_coprocessor_error(void);
68 asmlinkage void reserved(void);
69 asmlinkage void alignment_check(void);
70 asmlinkage void machine_check(void);
71 asmlinkage void spurious_interrupt_bug(void);
72 asmlinkage void call_debug(void);
74 struct notifier_block *die_chain;
76 static inline void conditional_sti(struct pt_regs *regs)
78 if (regs->eflags & X86_EFLAGS_IF)
82 static int kstack_depth_to_print = 10;
84 #ifdef CONFIG_KALLSYMS
85 #include <linux/kallsyms.h>
86 int printk_address(unsigned long address)
88 unsigned long offset = 0, symsize;
94 symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf);
96 return printk("[<%016lx>]", address);
99 return printk("<%016lx>{%s%s%s%s%+ld}",
100 address,delim,modname,delim,symname,offset);
103 int printk_address(unsigned long address)
105 return printk("[<%016lx>]", address);
109 unsigned long *in_exception_stack(int cpu, unsigned long stack)
112 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
113 unsigned long end = init_tss[cpu].ist[k] + EXCEPTION_STKSZ;
115 if (stack >= init_tss[cpu].ist[k] && stack <= end)
116 return (unsigned long *)end;
122 * x86-64 can have upto three kernel stacks:
125 * severe exception (double fault, nmi, stack fault) hardware stack
126 * Check and process them in order.
129 void show_trace(unsigned long *stack)
132 unsigned long *irqstack, *irqstack_end, *estack_end;
133 const int cpu = safe_smp_processor_id();
136 printk("\nCall Trace:");
139 estack_end = in_exception_stack(cpu, (unsigned long)stack);
141 while (stack < estack_end) {
143 if (__kernel_text_address(addr)) {
144 i += printk_address(addr);
152 i += printk(" <EOE> ");
154 stack = (unsigned long *) estack_end[-2];
157 irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
158 irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
160 if (stack >= irqstack && stack < irqstack_end) {
162 while (stack < irqstack_end) {
165 * If the address is either in the text segment of the
166 * kernel, or in the region which contains vmalloc'ed
167 * memory, it *may* be the address of a calling
168 * routine; if so, print it so that someone tracing
169 * down the cause of the crash will be able to figure
170 * out the call path that was taken.
172 if (__kernel_text_address(addr)) {
173 i += printk_address(addr);
181 stack = (unsigned long *) (irqstack_end[-1]);
186 while (((long) stack & (THREAD_SIZE-1)) != 0) {
188 if (__kernel_text_address(addr)) {
189 i += printk_address(addr);
200 void show_stack(struct task_struct *tsk, unsigned long * rsp)
202 unsigned long *stack;
204 const int cpu = safe_smp_processor_id();
205 unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
206 unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE);
208 // debugging aid: "show_stack(NULL, NULL);" prints the
209 // back trace for this cpu.
213 rsp = (unsigned long *)tsk->thread.rsp;
215 rsp = (unsigned long *)&rsp;
219 for(i=0; i < kstack_depth_to_print; i++) {
220 if (stack >= irqstack && stack <= irqstack_end) {
221 if (stack == irqstack_end) {
222 stack = (unsigned long *) (irqstack_end[-1]);
226 if (((long) stack & (THREAD_SIZE-1)) == 0)
229 if (i && ((i % 4) == 0))
231 printk("%016lx ", *stack++);
233 show_trace((unsigned long *)rsp);
237 * The architecture-independent dump_stack generator
239 void dump_stack(void)
245 EXPORT_SYMBOL(dump_stack);
247 void show_registers(struct pt_regs *regs)
250 int in_kernel = (regs->cs & 3) == 0;
252 const int cpu = safe_smp_processor_id();
253 struct task_struct *cur = cpu_pda[cpu].pcurrent;
257 printk("CPU %d ", cpu);
259 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
260 cur->comm, cur->pid, cur->thread_info, cur);
263 * When in-kernel, we also print out the stack and code at the
264 * time of the fault..
269 show_stack(NULL, (unsigned long*)rsp);
272 if(regs->rip < PAGE_OFFSET)
278 if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
280 printk(" Bad RIP value.");
289 void handle_BUG(struct pt_regs *regs)
296 if (__copy_from_user(&f, (struct bug_frame *) regs->rip,
297 sizeof(struct bug_frame)))
299 if ((unsigned long)f.filename < __PAGE_OFFSET ||
300 f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
302 if (__get_user(tmp, f.filename))
303 f.filename = "unmapped filename";
304 printk("----------- [cut here ] --------- [please bite here ] ---------\n");
305 printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
308 void out_of_line_bug(void)
313 static spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
314 static int die_owner = -1;
316 void oops_begin(void)
318 int cpu = safe_smp_processor_id();
319 /* racy, but better than risking deadlock. */
321 if (!spin_trylock(&die_lock)) {
322 if (cpu == die_owner)
323 /* nested oops. should stop eventually */;
325 spin_lock(&die_lock);
336 spin_unlock(&die_lock);
337 local_irq_enable(); /* make sure back scroll still works */
345 void __die(const char * str, struct pt_regs * regs, long err)
347 static int die_counter;
348 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
349 #ifdef CONFIG_PREEMPT
355 #ifdef CONFIG_DEBUG_PAGEALLOC
356 printk("DEBUG_PAGEALLOC");
359 notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
360 show_registers(regs);
361 /* Executive summary in case the oops scrolled away */
362 printk(KERN_ALERT "RIP ");
363 printk_address(regs->rip);
364 printk(" RSP <%016lx>\n", regs->rsp);
367 void die(const char * str, struct pt_regs * regs, long err)
371 __die(str, regs, err);
377 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
379 if (!(regs->eflags & VM_MASK) && (regs->cs == __KERNEL_CS))
383 static inline unsigned long get_cr2(void)
385 unsigned long address;
387 /* get the address */
388 __asm__("movq %%cr2,%0":"=r" (address));
392 static void do_trap(int trapnr, int signr, char *str,
393 struct pt_regs * regs, long error_code, siginfo_t *info)
395 conditional_sti(regs);
397 #ifdef CONFIG_CHECKING
400 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
401 rdmsrl(MSR_GS_BASE, gs);
402 if (gs != (unsigned long)pda) {
403 wrmsrl(MSR_GS_BASE, pda);
404 printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
410 if ((regs->cs & 3) != 0) {
411 struct task_struct *tsk = current;
413 if (exception_trace && unhandled_signal(tsk, signr))
415 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
416 tsk->comm, tsk->pid, str,
417 regs->rip,regs->rsp,error_code);
419 tsk->thread.error_code = error_code;
420 tsk->thread.trap_no = trapnr;
422 force_sig_info(signr, info, tsk);
424 force_sig(signr, tsk);
431 const struct exception_table_entry *fixup;
432 fixup = search_exception_tables(regs->rip);
434 regs->rip = fixup->fixup;
436 die(str, regs, error_code);
441 #define DO_ERROR(trapnr, signr, str, name) \
442 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
444 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
446 do_trap(trapnr, signr, str, regs, error_code, NULL); \
449 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
450 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
453 info.si_signo = signr; \
455 info.si_code = sicode; \
456 info.si_addr = (void __user *)siaddr; \
457 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
459 do_trap(trapnr, signr, str, regs, error_code, &info); \
462 DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
463 DO_ERROR( 3, SIGTRAP, "int3", int3);
464 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
465 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
466 DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
467 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
468 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
469 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
470 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
471 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
472 DO_ERROR(18, SIGSEGV, "reserved", reserved)
474 #define DO_ERROR_STACK(trapnr, signr, str, name) \
475 asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \
477 struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
478 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
480 if (regs->cs & 3) { \
481 memcpy(pr, regs, sizeof(struct pt_regs)); \
484 do_trap(trapnr, signr, str, regs, error_code, NULL); \
488 DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment)
489 DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
491 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
493 conditional_sti(regs);
495 #ifdef CONFIG_CHECKING
498 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
499 rdmsrl(MSR_GS_BASE, gs);
500 if (gs != (unsigned long)pda) {
501 wrmsrl(MSR_GS_BASE, pda);
503 printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
509 if ((regs->cs & 3)!=0) {
510 struct task_struct *tsk = current;
512 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
514 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
516 regs->rip,regs->rsp,error_code);
518 tsk->thread.error_code = error_code;
519 tsk->thread.trap_no = 13;
520 force_sig(SIGSEGV, tsk);
526 const struct exception_table_entry *fixup;
527 fixup = search_exception_tables(regs->rip);
529 regs->rip = fixup->fixup;
532 notify_die(DIE_GPF, "general protection fault", regs, error_code,
534 die("general protection fault", regs, error_code);
538 static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
540 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
541 printk("You probably have a hardware problem with your RAM chips\n");
543 /* Clear and disable the memory parity error line. */
544 reason = (reason & 0xf) | 4;
548 static void io_check_error(unsigned char reason, struct pt_regs * regs)
550 printk("NMI: IOCK error (debug interrupt?)\n");
551 show_registers(regs);
553 /* Re-enable the IOCK line, wait for a few seconds */
554 reason = (reason & 0xf) | 8;
561 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
562 { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
563 printk("Dazed and confused, but trying to continue\n");
564 printk("Do you have a strange power saving mode enabled?\n");
567 asmlinkage void default_do_nmi(struct pt_regs * regs)
569 unsigned char reason = inb(0x61);
571 if (!(reason & 0xc0)) {
572 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) == NOTIFY_BAD)
574 #ifdef CONFIG_X86_LOCAL_APIC
576 * Ok, so this is none of the documented NMI sources,
577 * so it must be the NMI watchdog.
579 if (nmi_watchdog > 0) {
580 nmi_watchdog_tick(regs,reason);
584 unknown_nmi_error(reason, regs);
587 if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_BAD)
590 mem_parity_error(reason, regs);
592 io_check_error(reason, regs);
595 * Reassert NMI in case it became active meanwhile
596 * as it's edge-triggered.
599 inb(0x71); /* dummy */
601 inb(0x71); /* dummy */
604 /* runs on IST stack. */
605 asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
608 unsigned long condition;
609 struct task_struct *tsk = current;
612 pr = (struct pt_regs *)(current->thread.rsp0)-1;
614 memcpy(pr, regs, sizeof(struct pt_regs));
618 #ifdef CONFIG_CHECKING
620 /* RED-PEN interaction with debugger - could destroy gs */
622 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
623 rdmsrl(MSR_GS_BASE, gs);
624 if (gs != (unsigned long)pda) {
625 wrmsrl(MSR_GS_BASE, pda);
626 printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
631 asm("movq %%db6,%0" : "=r" (condition));
633 conditional_sti(regs);
635 /* Mask out spurious debug traps due to lazy DR7 setting */
636 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
637 if (!tsk->thread.debugreg7) {
642 tsk->thread.debugreg6 = condition;
644 /* Mask out spurious TF errors due to lazy TF clearing */
645 if (condition & DR_STEP) {
647 * The TF error should be masked out only if the current
648 * process is not traced and if the TRAP flag has been set
649 * previously by a tracing process (condition detected by
650 * the PT_DTRACE flag); remember that the i386 TRAP flag
651 * can be modified by the process itself in user mode,
652 * allowing programs to debug themselves without the ptrace()
655 if ((regs->cs & 3) == 0)
656 goto clear_TF_reenable;
657 if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
661 /* Ok, finally something we can handle */
662 tsk->thread.trap_no = 1;
663 tsk->thread.error_code = error_code;
664 info.si_signo = SIGTRAP;
666 info.si_code = TRAP_BRKPT;
667 if ((regs->cs & 3) == 0)
670 info.si_addr = (void __user *)regs->rip;
671 force_sig_info(SIGTRAP, &info, tsk);
673 asm volatile("movq %0,%%db7"::"r"(0UL));
674 notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
678 printk("clear_tf_reenable\n");
679 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
682 /* RED-PEN could cause spurious errors */
683 if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP)
685 regs->eflags &= ~TF_MASK;
690 * Note that we play around with the 'TS' bit in an attempt to get
691 * the correct behaviour even in the presence of the asynchronous
694 void math_error(void __user *rip)
696 struct task_struct * task;
698 unsigned short cwd, swd;
700 * Save the info for the exception handler and clear the error.
704 task->thread.trap_no = 16;
705 task->thread.error_code = 0;
706 info.si_signo = SIGFPE;
708 info.si_code = __SI_FAULT;
711 * (~cwd & swd) will mask out exceptions that are not set to unmasked
712 * status. 0x3f is the exception bits in these regs, 0x200 is the
713 * C1 reg you need in case of a stack fault, 0x040 is the stack
714 * fault bit. We should only be taking one exception at a time,
715 * so if this combination doesn't produce any single exception,
716 * then we have a bad program that isn't synchronizing its FPU usage
717 * and it will suffer the consequences since we won't be able to
718 * fully reproduce the context of the exception
720 cwd = get_fpu_cwd(task);
721 swd = get_fpu_swd(task);
722 switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
726 case 0x001: /* Invalid Op */
727 case 0x041: /* Stack Fault */
728 case 0x241: /* Stack Fault | Direction */
729 info.si_code = FPE_FLTINV;
731 case 0x002: /* Denormalize */
732 case 0x010: /* Underflow */
733 info.si_code = FPE_FLTUND;
735 case 0x004: /* Zero Divide */
736 info.si_code = FPE_FLTDIV;
738 case 0x008: /* Overflow */
739 info.si_code = FPE_FLTOVF;
741 case 0x020: /* Precision */
742 info.si_code = FPE_FLTRES;
745 force_sig_info(SIGFPE, &info, task);
748 asmlinkage void do_coprocessor_error(struct pt_regs * regs)
750 conditional_sti(regs);
751 math_error((void __user *)regs->rip);
754 asmlinkage void bad_intr(void)
756 printk("bad interrupt");
759 static inline void simd_math_error(void __user *rip)
761 struct task_struct * task;
763 unsigned short mxcsr;
766 * Save the info for the exception handler and clear the error.
770 task->thread.trap_no = 19;
771 task->thread.error_code = 0;
772 info.si_signo = SIGFPE;
774 info.si_code = __SI_FAULT;
777 * The SIMD FPU exceptions are handled a little differently, as there
778 * is only a single status/control register. Thus, to determine which
779 * unmasked exception was caught we must mask the exception mask bits
780 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
782 mxcsr = get_fpu_mxcsr(task);
783 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
787 case 0x001: /* Invalid Op */
788 info.si_code = FPE_FLTINV;
790 case 0x002: /* Denormalize */
791 case 0x010: /* Underflow */
792 info.si_code = FPE_FLTUND;
794 case 0x004: /* Zero Divide */
795 info.si_code = FPE_FLTDIV;
797 case 0x008: /* Overflow */
798 info.si_code = FPE_FLTOVF;
800 case 0x020: /* Precision */
801 info.si_code = FPE_FLTRES;
804 force_sig_info(SIGFPE, &info, task);
807 asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs)
809 conditional_sti(regs);
810 simd_math_error((void __user *)regs->rip);
813 asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
818 * 'math_state_restore()' saves the current math information in the
819 * old math state array, and gets the new ones from the current task
821 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
822 * Don't touch unless you *really* know how it works.
824 asmlinkage void math_state_restore(void)
826 struct task_struct *me = current;
827 clts(); /* Allow maths ops (or we recurse) */
831 restore_fpu_checking(&me->thread.i387.fxsave);
832 me->thread_info->status |= TS_USEDFPU;
835 void do_call_debug(struct pt_regs *regs)
837 notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
840 void __init trap_init(void)
842 set_intr_gate(0,÷_error);
843 set_intr_gate_ist(1,&debug,DEBUG_STACK);
844 set_intr_gate_ist(2,&nmi,NMI_STACK);
845 set_system_gate(3,&int3); /* int3-5 can be called from all */
846 set_system_gate(4,&overflow);
847 set_system_gate(5,&bounds);
848 set_intr_gate(6,&invalid_op);
849 set_intr_gate(7,&device_not_available);
850 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
851 set_intr_gate(9,&coprocessor_segment_overrun);
852 set_intr_gate(10,&invalid_TSS);
853 set_intr_gate(11,&segment_not_present);
854 set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
855 set_intr_gate(13,&general_protection);
856 set_intr_gate(14,&page_fault);
857 set_intr_gate(15,&spurious_interrupt_bug);
858 set_intr_gate(16,&coprocessor_error);
859 set_intr_gate(17,&alignment_check);
860 set_intr_gate_ist(18,&machine_check, MCE_STACK);
861 set_intr_gate(19,&simd_coprocessor_error);
863 #ifdef CONFIG_IA32_EMULATION
864 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
867 set_intr_gate(KDB_VECTOR, call_debug);
870 * Should be a barrier for any external CPU state.
876 /* Actual parsing is done early in setup.c. */
877 static int __init oops_dummy(char *s)
882 __setup("oops=", oops_dummy);