2 * linux/arch/x86-64/traps.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
14 * 'Traps.c' handles hardware traps and faults after we have saved some
17 #include <linux/config.h>
18 #include <linux/sched.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/ptrace.h>
23 #include <linux/timer.h>
25 #include <linux/init.h>
26 #include <linux/delay.h>
27 #include <linux/spinlock.h>
28 #include <linux/interrupt.h>
29 #include <linux/module.h>
30 #include <linux/moduleparam.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
35 #include <asm/atomic.h>
36 #include <asm/debugreg.h>
39 #include <asm/kdebug.h>
40 #include <asm/processor.h>
43 #include <asm/pgalloc.h>
45 #include <asm/proto.h>
47 #include <linux/irq.h>
49 extern struct gate_struct idt_table[256];
51 asmlinkage void divide_error(void);
52 asmlinkage void debug(void);
53 asmlinkage void nmi(void);
54 asmlinkage void int3(void);
55 asmlinkage void overflow(void);
56 asmlinkage void bounds(void);
57 asmlinkage void invalid_op(void);
58 asmlinkage void device_not_available(void);
59 asmlinkage void double_fault(void);
60 asmlinkage void coprocessor_segment_overrun(void);
61 asmlinkage void invalid_TSS(void);
62 asmlinkage void segment_not_present(void);
63 asmlinkage void stack_segment(void);
64 asmlinkage void general_protection(void);
65 asmlinkage void page_fault(void);
66 asmlinkage void coprocessor_error(void);
67 asmlinkage void simd_coprocessor_error(void);
68 asmlinkage void reserved(void);
69 asmlinkage void alignment_check(void);
70 asmlinkage void machine_check(void);
71 asmlinkage void spurious_interrupt_bug(void);
72 asmlinkage void call_debug(void);
74 struct notifier_block *die_chain;
76 static inline void conditional_sti(struct pt_regs *regs)
78 if (regs->eflags & X86_EFLAGS_IF)
82 static int kstack_depth_to_print = 10;
84 #ifdef CONFIG_KALLSYMS
85 #include <linux/kallsyms.h>
86 int printk_address(unsigned long address)
88 unsigned long offset = 0, symsize;
94 symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf);
96 return printk("[<%016lx>]", address);
99 return printk("<%016lx>{%s%s%s%s%+ld}",
100 address,delim,modname,delim,symname,offset);
103 int printk_address(unsigned long address)
105 return printk("[<%016lx>]", address);
109 unsigned long *in_exception_stack(int cpu, unsigned long stack)
112 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
113 struct tss_struct *tss = &per_cpu(init_tss, cpu);
114 unsigned long end = tss->ist[k] + EXCEPTION_STKSZ;
116 if (stack >= tss->ist[k] && stack <= end)
117 return (unsigned long *)end;
123 * x86-64 can have upto three kernel stacks:
126 * severe exception (double fault, nmi, stack fault) hardware stack
127 * Check and process them in order.
130 void show_trace(unsigned long *stack)
133 unsigned long *irqstack, *irqstack_end, *estack_end;
134 const int cpu = safe_smp_processor_id();
137 printk("\nCall Trace:");
140 estack_end = in_exception_stack(cpu, (unsigned long)stack);
142 while (stack < estack_end) {
144 if (__kernel_text_address(addr)) {
145 i += printk_address(addr);
153 i += printk(" <EOE> ");
155 stack = (unsigned long *) estack_end[-2];
158 irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
159 irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
161 if (stack >= irqstack && stack < irqstack_end) {
163 while (stack < irqstack_end) {
166 * If the address is either in the text segment of the
167 * kernel, or in the region which contains vmalloc'ed
168 * memory, it *may* be the address of a calling
169 * routine; if so, print it so that someone tracing
170 * down the cause of the crash will be able to figure
171 * out the call path that was taken.
173 if (__kernel_text_address(addr)) {
174 i += printk_address(addr);
182 stack = (unsigned long *) (irqstack_end[-1]);
187 while (((long) stack & (THREAD_SIZE-1)) != 0) {
189 if (__kernel_text_address(addr)) {
190 i += printk_address(addr);
201 void show_stack(struct task_struct *tsk, unsigned long * rsp)
203 unsigned long *stack;
205 const int cpu = safe_smp_processor_id();
206 unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
207 unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE);
209 // debugging aid: "show_stack(NULL, NULL);" prints the
210 // back trace for this cpu.
214 rsp = (unsigned long *)tsk->thread.rsp;
216 rsp = (unsigned long *)&rsp;
220 for(i=0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) {
222 if (stack == irqstack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]);
227 if (((long) stack & (THREAD_SIZE-1)) == 0)
230 if (i && ((i % 4) == 0))
232 printk("%016lx ", *stack++);
234 show_trace((unsigned long *)rsp);
238 * The architecture-independent dump_stack generator
240 void dump_stack(void)
246 EXPORT_SYMBOL(dump_stack);
248 void show_registers(struct pt_regs *regs)
251 int in_kernel = (regs->cs & 3) == 0;
253 const int cpu = safe_smp_processor_id();
254 struct task_struct *cur = cpu_pda[cpu].pcurrent;
258 printk("CPU %d ", cpu);
260 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
261 cur->comm, cur->pid, cur->thread_info, cur);
264 * When in-kernel, we also print out the stack and code at the
265 * time of the fault..
270 show_stack(NULL, (unsigned long*)rsp);
273 if(regs->rip < PAGE_OFFSET)
279 if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
281 printk(" Bad RIP value.");
290 void handle_BUG(struct pt_regs *regs)
297 if (__copy_from_user(&f, (struct bug_frame *) regs->rip,
298 sizeof(struct bug_frame)))
300 if ((unsigned long)f.filename < __PAGE_OFFSET ||
301 f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
303 if (__get_user(tmp, f.filename))
304 f.filename = "unmapped filename";
305 printk("----------- [cut here ] --------- [please bite here ] ---------\n");
306 printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
309 void out_of_line_bug(void)
314 static spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
315 static int die_owner = -1;
317 void oops_begin(void)
319 int cpu = safe_smp_processor_id();
320 /* racy, but better than risking deadlock. */
322 if (!spin_trylock(&die_lock)) {
323 if (cpu == die_owner)
324 /* nested oops. should stop eventually */;
326 spin_lock(&die_lock);
337 spin_unlock(&die_lock);
338 local_irq_enable(); /* make sure back scroll still works */
343 void __die(const char * str, struct pt_regs * regs, long err)
345 static int die_counter;
346 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
347 #ifdef CONFIG_PREEMPT
353 #ifdef CONFIG_DEBUG_PAGEALLOC
354 printk("DEBUG_PAGEALLOC");
357 notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
358 show_registers(regs);
359 /* Executive summary in case the oops scrolled away */
360 printk(KERN_ALERT "RIP ");
361 printk_address(regs->rip);
362 printk(" RSP <%016lx>\n", regs->rsp);
365 void die(const char * str, struct pt_regs * regs, long err)
369 __die(str, regs, err);
373 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
375 if (!(regs->eflags & VM_MASK) && (regs->cs == __KERNEL_CS))
379 static inline unsigned long get_cr2(void)
381 unsigned long address;
383 /* get the address */
384 __asm__("movq %%cr2,%0":"=r" (address));
388 static void do_trap(int trapnr, int signr, char *str,
389 struct pt_regs * regs, long error_code, siginfo_t *info)
391 conditional_sti(regs);
393 #ifdef CONFIG_CHECKING
396 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
397 rdmsrl(MSR_GS_BASE, gs);
398 if (gs != (unsigned long)pda) {
399 wrmsrl(MSR_GS_BASE, pda);
400 printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
406 if ((regs->cs & 3) != 0) {
407 struct task_struct *tsk = current;
409 if (exception_trace && unhandled_signal(tsk, signr))
411 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
412 tsk->comm, tsk->pid, str,
413 regs->rip,regs->rsp,error_code);
415 tsk->thread.error_code = error_code;
416 tsk->thread.trap_no = trapnr;
418 force_sig_info(signr, info, tsk);
420 force_sig(signr, tsk);
427 const struct exception_table_entry *fixup;
428 fixup = search_exception_tables(regs->rip);
430 regs->rip = fixup->fixup;
432 die(str, regs, error_code);
437 #define DO_ERROR(trapnr, signr, str, name) \
438 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
440 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
443 do_trap(trapnr, signr, str, regs, error_code, NULL); \
446 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
447 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
450 info.si_signo = signr; \
452 info.si_code = sicode; \
453 info.si_addr = (void __user *)siaddr; \
454 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
457 do_trap(trapnr, signr, str, regs, error_code, &info); \
460 DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
461 DO_ERROR( 3, SIGTRAP, "int3", int3);
462 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
463 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
464 DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
465 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
466 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
467 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
468 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
469 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
470 DO_ERROR(18, SIGSEGV, "reserved", reserved)
472 #define DO_ERROR_STACK(trapnr, signr, str, name) \
473 asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \
475 struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
476 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
479 if (regs->cs & 3) { \
480 memcpy(pr, regs, sizeof(struct pt_regs)); \
483 do_trap(trapnr, signr, str, regs, error_code, NULL); \
487 DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment)
488 DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
490 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
492 conditional_sti(regs);
494 #ifdef CONFIG_CHECKING
497 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
498 rdmsrl(MSR_GS_BASE, gs);
499 if (gs != (unsigned long)pda) {
500 wrmsrl(MSR_GS_BASE, pda);
502 printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
508 if ((regs->cs & 3)!=0) {
509 struct task_struct *tsk = current;
511 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
513 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
515 regs->rip,regs->rsp,error_code);
517 tsk->thread.error_code = error_code;
518 tsk->thread.trap_no = 13;
519 force_sig(SIGSEGV, tsk);
525 const struct exception_table_entry *fixup;
526 fixup = search_exception_tables(regs->rip);
528 regs->rip = fixup->fixup;
531 notify_die(DIE_GPF, "general protection fault", regs, error_code,
533 die("general protection fault", regs, error_code);
537 static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
539 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
540 printk("You probably have a hardware problem with your RAM chips\n");
542 /* Clear and disable the memory parity error line. */
543 reason = (reason & 0xf) | 4;
547 static void io_check_error(unsigned char reason, struct pt_regs * regs)
549 printk("NMI: IOCK error (debug interrupt?)\n");
550 show_registers(regs);
552 /* Re-enable the IOCK line, wait for a few seconds */
553 reason = (reason & 0xf) | 8;
560 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
561 { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
562 printk("Dazed and confused, but trying to continue\n");
563 printk("Do you have a strange power saving mode enabled?\n");
566 asmlinkage void default_do_nmi(struct pt_regs * regs)
568 unsigned char reason = inb(0x61);
570 if (!(reason & 0xc0)) {
571 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
574 #ifdef CONFIG_X86_LOCAL_APIC
576 * Ok, so this is none of the documented NMI sources,
577 * so it must be the NMI watchdog.
579 if (nmi_watchdog > 0) {
580 nmi_watchdog_tick(regs,reason);
584 unknown_nmi_error(reason, regs);
587 if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
590 mem_parity_error(reason, regs);
592 io_check_error(reason, regs);
595 * Reassert NMI in case it became active meanwhile
596 * as it's edge-triggered.
599 inb(0x71); /* dummy */
601 inb(0x71); /* dummy */
604 /* runs on IST stack. */
605 asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
608 unsigned long condition;
609 struct task_struct *tsk = current;
612 pr = (struct pt_regs *)(current->thread.rsp0)-1;
614 memcpy(pr, regs, sizeof(struct pt_regs));
618 #ifdef CONFIG_CHECKING
620 /* RED-PEN interaction with debugger - could destroy gs */
622 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
623 rdmsrl(MSR_GS_BASE, gs);
624 if (gs != (unsigned long)pda) {
625 wrmsrl(MSR_GS_BASE, pda);
626 printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
631 asm("movq %%db6,%0" : "=r" (condition));
633 conditional_sti(regs);
635 /* Mask out spurious debug traps due to lazy DR7 setting */
636 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
637 if (!tsk->thread.debugreg7) {
642 tsk->thread.debugreg6 = condition;
644 /* Mask out spurious TF errors due to lazy TF clearing */
645 if (condition & DR_STEP) {
647 * The TF error should be masked out only if the current
648 * process is not traced and if the TRAP flag has been set
649 * previously by a tracing process (condition detected by
650 * the PT_DTRACE flag); remember that the i386 TRAP flag
651 * can be modified by the process itself in user mode,
652 * allowing programs to debug themselves without the ptrace()
655 if ((regs->cs & 3) == 0)
656 goto clear_TF_reenable;
657 if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
661 /* Ok, finally something we can handle */
662 tsk->thread.trap_no = 1;
663 tsk->thread.error_code = error_code;
664 info.si_signo = SIGTRAP;
666 info.si_code = TRAP_BRKPT;
667 if ((regs->cs & 3) == 0)
670 info.si_addr = (void __user *)regs->rip;
671 force_sig_info(SIGTRAP, &info, tsk);
673 asm volatile("movq %0,%%db7"::"r"(0UL));
674 notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
678 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
681 /* RED-PEN could cause spurious errors */
682 if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP)
684 regs->eflags &= ~TF_MASK;
688 static int kernel_math_error(struct pt_regs *regs, char *str)
690 const struct exception_table_entry *fixup;
691 fixup = search_exception_tables(regs->rip);
693 regs->rip = fixup->fixup;
696 notify_die(DIE_GPF, str, regs, 0, 16, SIGFPE);
698 /* This should be a die, but warn only for now */
701 printk(KERN_DEBUG "%s: %s at ", current->comm, str);
702 printk_address(regs->rip);
709 * Note that we play around with the 'TS' bit in an attempt to get
710 * the correct behaviour even in the presence of the asynchronous
713 asmlinkage void do_coprocessor_error(struct pt_regs *regs)
715 void __user *rip = (void __user *)(regs->rip);
716 struct task_struct * task;
718 unsigned short cwd, swd;
720 conditional_sti(regs);
721 if ((regs->cs & 3) == 0 &&
722 kernel_math_error(regs, "kernel x87 math error"))
726 * Save the info for the exception handler and clear the error.
730 task->thread.trap_no = 16;
731 task->thread.error_code = 0;
732 info.si_signo = SIGFPE;
734 info.si_code = __SI_FAULT;
737 * (~cwd & swd) will mask out exceptions that are not set to unmasked
738 * status. 0x3f is the exception bits in these regs, 0x200 is the
739 * C1 reg you need in case of a stack fault, 0x040 is the stack
740 * fault bit. We should only be taking one exception at a time,
741 * so if this combination doesn't produce any single exception,
742 * then we have a bad program that isn't synchronizing its FPU usage
743 * and it will suffer the consequences since we won't be able to
744 * fully reproduce the context of the exception
746 cwd = get_fpu_cwd(task);
747 swd = get_fpu_swd(task);
748 switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
752 case 0x001: /* Invalid Op */
753 case 0x041: /* Stack Fault */
754 case 0x241: /* Stack Fault | Direction */
755 info.si_code = FPE_FLTINV;
757 case 0x002: /* Denormalize */
758 case 0x010: /* Underflow */
759 info.si_code = FPE_FLTUND;
761 case 0x004: /* Zero Divide */
762 info.si_code = FPE_FLTDIV;
764 case 0x008: /* Overflow */
765 info.si_code = FPE_FLTOVF;
767 case 0x020: /* Precision */
768 info.si_code = FPE_FLTRES;
771 force_sig_info(SIGFPE, &info, task);
774 asmlinkage void bad_intr(void)
776 printk("bad interrupt");
779 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
781 void __user *rip = (void __user *)(regs->rip);
782 struct task_struct * task;
784 unsigned short mxcsr;
786 conditional_sti(regs);
787 if ((regs->cs & 3) == 0 &&
788 kernel_math_error(regs, "simd math error"))
792 * Save the info for the exception handler and clear the error.
796 task->thread.trap_no = 19;
797 task->thread.error_code = 0;
798 info.si_signo = SIGFPE;
800 info.si_code = __SI_FAULT;
803 * The SIMD FPU exceptions are handled a little differently, as there
804 * is only a single status/control register. Thus, to determine which
805 * unmasked exception was caught we must mask the exception mask bits
806 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
808 mxcsr = get_fpu_mxcsr(task);
809 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
813 case 0x001: /* Invalid Op */
814 info.si_code = FPE_FLTINV;
816 case 0x002: /* Denormalize */
817 case 0x010: /* Underflow */
818 info.si_code = FPE_FLTUND;
820 case 0x004: /* Zero Divide */
821 info.si_code = FPE_FLTDIV;
823 case 0x008: /* Overflow */
824 info.si_code = FPE_FLTOVF;
826 case 0x020: /* Precision */
827 info.si_code = FPE_FLTRES;
830 force_sig_info(SIGFPE, &info, task);
833 asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
838 * 'math_state_restore()' saves the current math information in the
839 * old math state array, and gets the new ones from the current task
841 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
842 * Don't touch unless you *really* know how it works.
844 asmlinkage void math_state_restore(void)
846 struct task_struct *me = current;
847 clts(); /* Allow maths ops (or we recurse) */
851 restore_fpu_checking(&me->thread.i387.fxsave);
852 me->thread_info->status |= TS_USEDFPU;
855 void do_call_debug(struct pt_regs *regs)
857 notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
860 void __init trap_init(void)
862 set_intr_gate(0,÷_error);
863 set_intr_gate_ist(1,&debug,DEBUG_STACK);
864 set_intr_gate_ist(2,&nmi,NMI_STACK);
865 set_system_gate(3,&int3); /* int3-5 can be called from all */
866 set_system_gate(4,&overflow);
867 set_system_gate(5,&bounds);
868 set_intr_gate(6,&invalid_op);
869 set_intr_gate(7,&device_not_available);
870 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
871 set_intr_gate(9,&coprocessor_segment_overrun);
872 set_intr_gate(10,&invalid_TSS);
873 set_intr_gate(11,&segment_not_present);
874 set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
875 set_intr_gate(13,&general_protection);
876 set_intr_gate(14,&page_fault);
877 set_intr_gate(15,&spurious_interrupt_bug);
878 set_intr_gate(16,&coprocessor_error);
879 set_intr_gate(17,&alignment_check);
880 set_intr_gate_ist(18,&machine_check, MCE_STACK);
881 set_intr_gate(19,&simd_coprocessor_error);
883 #ifdef CONFIG_IA32_EMULATION
884 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
887 set_intr_gate(KDB_VECTOR, call_debug);
890 * Should be a barrier for any external CPU state.
896 /* Actual parsing is done early in setup.c. */
897 static int __init oops_dummy(char *s)
902 __setup("oops=", oops_dummy);
904 static int __init kstack_setup(char *s)
906 kstack_depth_to_print = simple_strtoul(s,NULL,0);
909 __setup("kstack=", kstack_setup);