2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
41 * "current" is in register %ebx during any slow entries.
44 #include <linux/linkage.h>
45 #include <asm/thread_info.h>
46 #include <asm/irqflags.h>
47 #include <asm/errno.h>
48 #include <asm/segment.h>
52 #include <asm/percpu.h>
53 #include <asm/dwarf2.h>
54 #include "irq_vectors.h"
55 #include <xen/interface/xen.h>
58 * We use macros for low-level operations which need to be overridden
59 * for paravirtualization. The following will never clobber any registers:
60 * INTERRUPT_RETURN (aka. "iret")
61 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
62 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
64 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
65 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
66 * Allowing a register to be clobbered can shrink the paravirt replacement
67 * enough to patch inline, increasing performance.
70 #define nr_syscalls ((syscall_table_size)/4)
82 /* Offsets into shared_info_t. */
83 #define evtchn_upcall_pending /* 0 */
84 #define evtchn_upcall_mask 1
86 #define sizeof_vcpu_shift 6
89 #define GET_VCPU_INFO movl %gs:PDA_cpu,%esi ; \
90 shl $sizeof_vcpu_shift,%esi ; \
91 addl HYPERVISOR_shared_info,%esi
93 #define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
96 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
97 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
98 #define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
101 #ifdef CONFIG_PREEMPT
102 #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
104 #define preempt_stop(clobbers)
105 #define resume_kernel restore_nocheck
108 .macro TRACE_IRQS_IRET
109 #ifdef CONFIG_TRACE_IRQFLAGS
110 testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
118 #define resume_userspace_sig check_userspace
120 #define resume_userspace_sig resume_userspace
126 CFI_ADJUST_CFA_OFFSET 4;\
127 /*CFI_REL_OFFSET gs, 0;*/\
129 CFI_ADJUST_CFA_OFFSET 4;\
130 /*CFI_REL_OFFSET es, 0;*/\
132 CFI_ADJUST_CFA_OFFSET 4;\
133 /*CFI_REL_OFFSET ds, 0;*/\
135 CFI_ADJUST_CFA_OFFSET 4;\
136 CFI_REL_OFFSET eax, 0;\
138 CFI_ADJUST_CFA_OFFSET 4;\
139 CFI_REL_OFFSET ebp, 0;\
141 CFI_ADJUST_CFA_OFFSET 4;\
142 CFI_REL_OFFSET edi, 0;\
144 CFI_ADJUST_CFA_OFFSET 4;\
145 CFI_REL_OFFSET esi, 0;\
147 CFI_ADJUST_CFA_OFFSET 4;\
148 CFI_REL_OFFSET edx, 0;\
150 CFI_ADJUST_CFA_OFFSET 4;\
151 CFI_REL_OFFSET ecx, 0;\
153 CFI_ADJUST_CFA_OFFSET 4;\
154 CFI_REL_OFFSET ebx, 0;\
155 movl $(__USER_DS), %edx; \
158 movl $(__KERNEL_PDA), %edx; \
161 #define RESTORE_INT_REGS \
163 CFI_ADJUST_CFA_OFFSET -4;\
166 CFI_ADJUST_CFA_OFFSET -4;\
169 CFI_ADJUST_CFA_OFFSET -4;\
172 CFI_ADJUST_CFA_OFFSET -4;\
175 CFI_ADJUST_CFA_OFFSET -4;\
178 CFI_ADJUST_CFA_OFFSET -4;\
181 CFI_ADJUST_CFA_OFFSET -4;\
184 #define RESTORE_REGS \
187 CFI_ADJUST_CFA_OFFSET -4;\
190 CFI_ADJUST_CFA_OFFSET -4;\
193 CFI_ADJUST_CFA_OFFSET -4;\
195 .pushsection .fixup,"ax"; \
202 .section __ex_table,"a";\
209 #define RING0_INT_FRAME \
210 CFI_STARTPROC simple;\
212 CFI_DEF_CFA esp, 3*4;\
213 /*CFI_OFFSET cs, -2*4;*/\
216 #define RING0_EC_FRAME \
217 CFI_STARTPROC simple;\
219 CFI_DEF_CFA esp, 4*4;\
220 /*CFI_OFFSET cs, -2*4;*/\
223 #define RING0_PTREGS_FRAME \
224 CFI_STARTPROC simple;\
226 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
227 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
228 CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
229 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
230 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
231 CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
232 CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
233 CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
234 CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
235 CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
236 CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
237 CFI_OFFSET ebx, PT_EBX-PT_OLDESP
242 CFI_ADJUST_CFA_OFFSET 4
244 GET_THREAD_INFO(%ebp)
246 CFI_ADJUST_CFA_OFFSET -4
247 pushl $0x0202 # Reset kernel eflags
248 CFI_ADJUST_CFA_OFFSET 4
250 CFI_ADJUST_CFA_OFFSET -4
255 * Return to user mode is not as complex as all this looks,
256 * but we want the default path for a system call return to
257 * go as quickly as possible which is why some of this is
258 * less clear than it otherwise should be.
261 # userspace resumption stub bypassing syscall exit tracing
265 preempt_stop(CLBR_ANY)
267 GET_THREAD_INFO(%ebp)
269 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
270 movb PT_CS(%esp), %al
271 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
273 jb resume_kernel # not returning to v8086 or userspace
275 ENTRY(resume_userspace)
276 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
277 # setting need_resched or sigpending
278 # between sampling and the iret
279 movl TI_flags(%ebp), %ecx
280 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
281 # int/exception return?
285 #ifdef CONFIG_PREEMPT
287 DISABLE_INTERRUPTS(CLBR_ANY)
288 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
291 movl TI_flags(%ebp), %ecx # need_resched set ?
292 testb $_TIF_NEED_RESCHED, %cl
294 testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
296 call preempt_schedule_irq
301 /* SYSENTER_RETURN points to after the "sysenter" instruction in
302 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
304 # sysenter call handler stub
305 ENTRY(sysenter_entry)
309 CFI_REGISTER esp, ebp
310 movl SYSENTER_stack_esp0(%esp),%esp
313 * No need to follow this irqs on/off section: the syscall
314 * disabled irqs and here we enable it straight after entry:
316 ENABLE_INTERRUPTS(CLBR_NONE)
318 CFI_ADJUST_CFA_OFFSET 4
319 /*CFI_REL_OFFSET ss, 0*/
321 CFI_ADJUST_CFA_OFFSET 4
322 CFI_REL_OFFSET esp, 0
324 CFI_ADJUST_CFA_OFFSET 4
326 CFI_ADJUST_CFA_OFFSET 4
327 /*CFI_REL_OFFSET cs, 0*/
328 #ifndef CONFIG_COMPAT_VDSO
330 * Push current_thread_info()->sysenter_return to the stack.
331 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
332 * pushed above; +8 corresponds to copy_thread's esp0 setting.
334 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
336 pushl $SYSENTER_RETURN
338 CFI_ADJUST_CFA_OFFSET 4
339 CFI_REL_OFFSET eip, 0
342 * Load the potential sixth argument from user stack.
343 * Careful about security.
345 cmpl $__PAGE_OFFSET-3,%ebp
348 .section __ex_table,"a"
350 .long 1b,syscall_fault
354 CFI_ADJUST_CFA_OFFSET 4
356 GET_THREAD_INFO(%ebp)
358 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
359 testw $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
360 jnz syscall_trace_entry
361 cmpl $(nr_syscalls), %eax
363 call *sys_call_table(,%eax,4)
364 movl %eax,PT_EAX(%esp)
365 DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
367 movl TI_flags(%ebp), %ecx
368 testw $_TIF_ALLWORK_MASK, %cx
369 jne syscall_exit_work
370 /* if something modifies registers it must also disable sysexit */
371 movl PT_EIP(%esp), %edx
372 movl PT_OLDESP(%esp), %ecx
375 1: mov PT_GS(%esp), %gs
378 sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
380 jnz 14f # process more events if necessary...
381 movl PT_ESI(%esp), %esi
383 14: __DISABLE_INTERRUPTS
384 sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
386 CFI_ADJUST_CFA_OFFSET 4
387 call evtchn_do_upcall
389 CFI_ADJUST_CFA_OFFSET -4
392 ENABLE_INTERRUPTS_SYSEXIT
393 #endif /* !CONFIG_XEN */
395 .pushsection .fixup,"ax"
396 2: movl $0,PT_GS(%esp)
398 .section __ex_table,"a"
403 # system call handler stub
405 RING0_INT_FRAME # can't unwind into user space anyway
406 pushl %eax # save orig_eax
407 CFI_ADJUST_CFA_OFFSET 4
409 GET_THREAD_INFO(%ebp)
410 testl $TF_MASK,PT_EFLAGS(%esp)
412 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
414 # system call tracing in operation / emulation
415 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
416 testw $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
417 jnz syscall_trace_entry
418 cmpl $(nr_syscalls), %eax
421 call *sys_call_table(,%eax,4)
422 movl %eax,PT_EAX(%esp) # store the return value
424 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
425 # setting need_resched or sigpending
426 # between sampling and the iret
428 movl TI_flags(%ebp), %ecx
429 testw $_TIF_ALLWORK_MASK, %cx # current->work
430 jne syscall_exit_work
434 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
435 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
436 # are returning to the kernel.
437 # See comments in process.c:copy_thread() for details.
438 movb PT_OLDSS(%esp), %ah
439 movb PT_CS(%esp), %al
440 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
441 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
443 je ldt_ss # returning to user-space with LDT SS
447 movl PT_EFLAGS(%esp), %eax
448 testl $(VM_MASK|NMI_MASK), %eax
450 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
452 andb evtchn_upcall_mask(%esi),%al
453 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
454 jnz restore_all_enable_events # != 0 => enable event delivery
458 restore_nocheck_notrace:
460 addl $4, %esp # skip orig_eax/error_code
461 CFI_ADJUST_CFA_OFFSET -4
467 ENABLE_INTERRUPTS(CLBR_NONE)
469 pushl $0 # no error code
473 .section __ex_table,"a"
481 larl PT_OLDSS(%esp), %eax
483 testl $0x00400000, %eax # returning to 32bit stack?
484 jnz restore_nocheck # allright, normal return
486 #ifdef CONFIG_PARAVIRT
488 * The kernel can't run on a non-flat stack if paravirt mode
489 * is active. Rather than try to fixup the high bits of
490 * ESP, bypass this code entirely. This may break DOSemu
491 * and/or Wine support in a paravirt VM, although the option
492 * is still available to implement the setting of the high
493 * 16-bits in the INTERRUPT_RETURN paravirt-op.
495 cmpl $0, paravirt_ops+PARAVIRT_enabled
499 /* If returning to userspace with 16bit stack,
500 * try to fix the higher word of ESP, as the CPU
502 * This is an "official" bug of all the x86-compatible
503 * CPUs, which we can try to work around to make
504 * dosemu and wine happy. */
505 movl PT_OLDESP(%esp), %eax
507 call patch_espfix_desc
509 CFI_ADJUST_CFA_OFFSET 4
511 CFI_ADJUST_CFA_OFFSET 4
512 DISABLE_INTERRUPTS(CLBR_EAX)
515 CFI_ADJUST_CFA_OFFSET -8
519 andl $~NMI_MASK, PT_EFLAGS(%esp)
522 jmp hypercall_page + (__HYPERVISOR_iret * 32)
526 # perform work that needs to be done immediately before resumption
528 RING0_PTREGS_FRAME # can't unwind into user space anyway
530 testb $_TIF_NEED_RESCHED, %cl
534 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
535 # setting need_resched or sigpending
536 # between sampling and the iret
538 movl TI_flags(%ebp), %ecx
539 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
540 # than syscall tracing?
542 testb $_TIF_NEED_RESCHED, %cl
545 work_notifysig: # deal with pending signals and
546 # notify-resume requests
548 testl $VM_MASK, PT_EFLAGS(%esp)
550 jne work_notifysig_v86 # returning to kernel-space or
553 call do_notify_resume
554 jmp resume_userspace_sig
558 pushl %ecx # save ti_flags for do_notify_resume
559 CFI_ADJUST_CFA_OFFSET 4
560 call save_v86_state # %eax contains pt_regs pointer
562 CFI_ADJUST_CFA_OFFSET -4
568 call do_notify_resume
569 jmp resume_userspace_sig
571 # perform syscall exit tracing
574 movl $-ENOSYS,PT_EAX(%esp)
577 call do_syscall_trace
578 movl PT_ORIG_EAX(%esp), %eax
579 cmpl $(nr_syscalls), %eax
583 # perform syscall exit tracing
586 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
589 ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
593 call do_syscall_trace
597 RING0_INT_FRAME # can't unwind into user space anyway
599 pushl %eax # save orig_eax
600 CFI_ADJUST_CFA_OFFSET 4
602 GET_THREAD_INFO(%ebp)
603 movl $-EFAULT,PT_EAX(%esp)
607 movl $-ENOSYS,PT_EAX(%esp)
612 #define FIXUP_ESPFIX_STACK \
613 /* since we are on a wrong stack, we cant make it a C code :( */ \
614 movl %gs:PDA_cpu, %ebx; \
615 PER_CPU(cpu_gdt_descr, %ebx); \
616 movl GDS_address(%ebx), %ebx; \
617 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
619 pushl $__KERNEL_DS; \
620 CFI_ADJUST_CFA_OFFSET 4; \
622 CFI_ADJUST_CFA_OFFSET 4; \
624 CFI_ADJUST_CFA_OFFSET -8;
625 #define UNWIND_ESPFIX_STACK \
627 /* see if on espfix stack */ \
628 cmpw $__ESPFIX_SS, %ax; \
630 movl $__KERNEL_DS, %eax; \
633 /* switch to normal stack */ \
634 FIXUP_ESPFIX_STACK; \
638 * Build the entry stubs and pointer table with
639 * some assembler magic.
646 ENTRY(irq_entries_start)
651 CFI_ADJUST_CFA_OFFSET -4
654 CFI_ADJUST_CFA_OFFSET 4
663 * the CPU automatically disables interrupts when executing an IRQ vector,
664 * so IRQ-flags tracing has to follow that:
675 #define BUILD_INTERRUPT(name, nr) \
679 CFI_ADJUST_CFA_OFFSET 4; \
687 /* The include is where all of the SMP etc. interrupts come from */
688 #include "entry_arch.h"
690 #define UNWIND_ESPFIX_STACK
693 KPROBE_ENTRY(page_fault)
696 CFI_ADJUST_CFA_OFFSET 4
699 /* the function address is in %gs's slot on the stack */
701 CFI_ADJUST_CFA_OFFSET 4
702 /*CFI_REL_OFFSET es, 0*/
704 CFI_ADJUST_CFA_OFFSET 4
705 /*CFI_REL_OFFSET ds, 0*/
707 CFI_ADJUST_CFA_OFFSET 4
708 CFI_REL_OFFSET eax, 0
710 CFI_ADJUST_CFA_OFFSET 4
711 CFI_REL_OFFSET ebp, 0
713 CFI_ADJUST_CFA_OFFSET 4
714 CFI_REL_OFFSET edi, 0
716 CFI_ADJUST_CFA_OFFSET 4
717 CFI_REL_OFFSET esi, 0
719 CFI_ADJUST_CFA_OFFSET 4
720 CFI_REL_OFFSET edx, 0
722 CFI_ADJUST_CFA_OFFSET 4
723 CFI_REL_OFFSET ecx, 0
725 CFI_ADJUST_CFA_OFFSET 4
726 CFI_REL_OFFSET ebx, 0
729 CFI_ADJUST_CFA_OFFSET 4
730 /*CFI_REL_OFFSET gs, 0*/
731 movl $(__KERNEL_PDA), %ecx
735 CFI_ADJUST_CFA_OFFSET -4
736 /*CFI_REGISTER es, ecx*/
737 movl PT_GS(%esp), %edi # get the function address
738 movl PT_ORIG_EAX(%esp), %edx # get the error code
739 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
740 mov %ecx, PT_GS(%esp)
741 /*CFI_REL_OFFSET gs, ES*/
742 movl $(__USER_DS), %ecx
745 movl %esp,%eax # pt_regs pointer
747 jmp ret_from_exception
749 KPROBE_END(page_fault)
752 # A note on the "critical region" in our callback handler.
753 # We want to avoid stacking callback handlers due to events occurring
754 # during handling of the last event. To do this, we keep events disabled
755 # until we've done all processing. HOWEVER, we must enable events before
756 # popping the stack frame (can't be done atomically) and so it would still
757 # be possible to get enough handler activations to overflow the stack.
758 # Although unlikely, bugs of that kind are hard to track down, so we'd
759 # like to avoid the possibility.
760 # So, on entry to the handler we detect whether we interrupted an
761 # existing activation in its critical region -- if so, we pop the current
762 # activation and restart the handler using the previous one.
764 # The sysexit critical region is slightly different. sysexit
765 # atomically removes the entire stack frame. If we interrupt in the
766 # critical region we know that the entire frame is present and correct
767 # so we can simply throw away the new one.
768 ENTRY(hypervisor_callback)
771 CFI_ADJUST_CFA_OFFSET 4
773 movl PT_EIP(%esp),%eax
777 jb critical_region_fixup
778 cmpl $sysexit_scrit,%eax
780 cmpl $sysexit_ecrit,%eax
782 # interrupted in sysexit critical
783 addl $0x38,%esp # Remove cs...ebx from stack frame.
784 # this popped off new frame to reuse the old one, therefore no
785 # CFI_ADJUST_CFA_OFFSET here
787 CFI_ADJUST_CFA_OFFSET 4
788 call evtchn_do_upcall
790 CFI_ADJUST_CFA_OFFSET -4
794 restore_all_enable_events:
796 scrit: /**** START OF CRITICAL REGION ****/
798 jnz 14f # process more events if necessary...
801 CFI_ADJUST_CFA_OFFSET -4
803 .section __ex_table,"a"
807 14: __DISABLE_INTERRUPTS
809 ecrit: /**** END OF CRITICAL REGION ****/
810 # [How we do the fixup]. We want to merge the current stack frame with the
811 # just-interrupted frame. How we do this depends on where in the critical
812 # region the interrupted handler was executing, and so how many saved
813 # registers are in each frame. We do this quickly using the lookup table
814 # 'critical_fixup_table'. For each byte offset in the critical region, it
815 # provides the number of bytes which have already been popped from the
816 # interrupted stack frame.
817 critical_region_fixup:
818 addl $critical_fixup_table-scrit,%eax
819 movzbl (%eax),%eax # %eax contains num bytes popped
820 cmpb $0xff,%al # 0xff => vcpu_info critical region
822 GET_THREAD_INFO(%ebp)
825 add %eax,%esi # %esi points at end of src region
827 add $0x38,%edi # %edi points at end of dst region
829 shr $2,%ecx # convert words to bytes
830 je 17f # skip loop if nothing to copy
831 16: subl $4,%esi # pre-decrementing copy loop
836 17: movl %edi,%esp # final %edi is top of merged stack
837 # this popped off new frame to reuse the old one, therefore no
838 # CFI_DEF_CFA_OFFSET here
842 critical_fixup_table:
843 .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING
844 .byte 0xff,0xff # jnz 14f
845 .byte 0x00 # pop %ebx
846 .byte 0x04 # pop %ecx
847 .byte 0x08 # pop %edx
848 .byte 0x0c # pop %esi
849 .byte 0x10 # pop %edi
850 .byte 0x14 # pop %ebp
851 .byte 0x18 # pop %eax
855 .byte 0x28,0x28,0x28 # add $4,%esp
857 .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
858 .byte 0x00,0x00 # jmp 11b
860 # Hypervisor uses this for application faults while it executes.
861 # We get here for two reasons:
862 # 1. Fault while reloading DS, ES, FS or GS
863 # 2. Fault while executing IRET
864 # Category 1 we fix up by reattempting the load, and zeroing the segment
865 # register if the load fails.
866 # Category 2 we fix up by jumping to do_iret_error. We cannot use the
867 # normal Linux return path in this case because if we use the IRET hypercall
868 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
869 # We distinguish between categories by maintaining a status value in EAX.
870 ENTRY(failsafe_callback)
873 CFI_ADJUST_CFA_OFFSET 4
881 CFI_ADJUST_CFA_OFFSET -4
883 addl $16,%esp # EAX != 0 => Category 2 (Bad IRET)
884 CFI_ADJUST_CFA_OFFSET -16
886 CFI_ADJUST_CFA_OFFSET 16
887 5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment)
888 CFI_ADJUST_CFA_OFFSET -16
890 CFI_ADJUST_CFA_OFFSET 4
892 jmp ret_from_exception
893 .section .fixup,"ax"; \
901 movl %eax,12(%esp); \
904 movl %eax,16(%esp); \
907 .section __ex_table,"a"; \
917 ENTRY(coprocessor_error)
920 CFI_ADJUST_CFA_OFFSET 4
921 pushl $do_coprocessor_error
922 CFI_ADJUST_CFA_OFFSET 4
926 ENTRY(simd_coprocessor_error)
929 CFI_ADJUST_CFA_OFFSET 4
930 pushl $do_simd_coprocessor_error
931 CFI_ADJUST_CFA_OFFSET 4
935 ENTRY(device_not_available)
937 pushl $-1 # mark this as an int
938 CFI_ADJUST_CFA_OFFSET 4
942 testl $0x4, %eax # EM (math emulation bit)
943 je device_available_emulate
944 pushl $0 # temporary storage for ORIG_EIP
945 CFI_ADJUST_CFA_OFFSET 4
948 CFI_ADJUST_CFA_OFFSET -4
949 jmp ret_from_exception
950 device_available_emulate:
952 preempt_stop(CLBR_ANY)
953 call math_state_restore
954 jmp ret_from_exception
959 * Debug traps and NMI can happen at the one SYSENTER instruction
960 * that sets up the real kernel stack. Check here, since we can't
961 * allow the wrong stack to be used.
963 * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
964 * already pushed 3 words if it hits on the sysenter instruction:
965 * eflags, cs and eip.
967 * We just load the right stack, and push the three (known) values
968 * by hand onto the new stack - while updating the return eip past
969 * the instruction that would have done it for sysenter.
971 #define FIX_STACK(offset, ok, label) \
972 cmpw $__KERNEL_CS,4(%esp); \
975 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
976 CFI_DEF_CFA esp, 0; \
979 CFI_ADJUST_CFA_OFFSET 4; \
980 pushl $__KERNEL_CS; \
981 CFI_ADJUST_CFA_OFFSET 4; \
982 pushl $sysenter_past_esp; \
983 CFI_ADJUST_CFA_OFFSET 4; \
984 CFI_REL_OFFSET eip, 0
985 #endif /* CONFIG_XEN */
990 cmpl $sysenter_entry,(%esp)
991 jne debug_stack_correct
992 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
994 #endif /* !CONFIG_XEN */
995 pushl $-1 # mark this as an int
996 CFI_ADJUST_CFA_OFFSET 4
998 xorl %edx,%edx # error code 0
999 movl %esp,%eax # pt_regs pointer
1001 jmp ret_from_exception
1007 * NMI is doubly nasty. It can happen _while_ we're handling
1008 * a debug fault, and the debug fault hasn't yet been able to
1009 * clear up the stack. So we first check whether we got an
1010 * NMI on the sysenter entry path, but after that we need to
1011 * check whether we got an NMI on the debug path where the debug
1012 * fault happened on the sysenter path.
1017 CFI_ADJUST_CFA_OFFSET 4
1019 cmpw $__ESPFIX_SS, %ax
1021 CFI_ADJUST_CFA_OFFSET -4
1023 cmpl $sysenter_entry,(%esp)
1026 CFI_ADJUST_CFA_OFFSET 4
1028 /* Do not access memory above the end of our stack page,
1029 * it might not exist.
1031 andl $(THREAD_SIZE-1),%eax
1032 cmpl $(THREAD_SIZE-20),%eax
1034 CFI_ADJUST_CFA_OFFSET -4
1035 jae nmi_stack_correct
1036 cmpl $sysenter_entry,12(%esp)
1037 je nmi_debug_stack_check
1039 /* We have a RING0_INT_FRAME here */
1041 CFI_ADJUST_CFA_OFFSET 4
1043 xorl %edx,%edx # zero error code
1044 movl %esp,%eax # pt_regs pointer
1046 jmp restore_nocheck_notrace
1051 FIX_STACK(12,nmi_stack_correct, 1)
1052 jmp nmi_stack_correct
1054 nmi_debug_stack_check:
1055 /* We have a RING0_INT_FRAME here */
1056 cmpw $__KERNEL_CS,16(%esp)
1057 jne nmi_stack_correct
1059 jb nmi_stack_correct
1060 cmpl $debug_esp_fix_insn,(%esp)
1061 ja nmi_stack_correct
1062 FIX_STACK(24,nmi_stack_correct, 1)
1063 jmp nmi_stack_correct
1066 /* We have a RING0_INT_FRAME here.
1068 * create the pointer to lss back
1071 CFI_ADJUST_CFA_OFFSET 4
1073 CFI_ADJUST_CFA_OFFSET 4
1075 /* copy the iret frame of 12 bytes */
1078 CFI_ADJUST_CFA_OFFSET 4
1081 CFI_ADJUST_CFA_OFFSET 4
1083 FIXUP_ESPFIX_STACK # %eax == %esp
1084 xorl %edx,%edx # zero error code
1087 lss 12+4(%esp), %esp # back to espfix stack
1088 CFI_ADJUST_CFA_OFFSET -24
1091 .section __ex_table,"a"
1100 CFI_ADJUST_CFA_OFFSET 4
1102 xorl %edx,%edx # zero error code
1103 movl %esp,%eax # pt_regs pointer
1105 orl $NMI_MASK, PT_EFLAGS(%esp)
1111 #ifdef CONFIG_PARAVIRT
1114 .section __ex_table,"a"
1119 ENTRY(native_irq_enable_sysexit)
1126 pushl $-1 # mark this as an int
1127 CFI_ADJUST_CFA_OFFSET 4
1129 xorl %edx,%edx # zero error code
1130 movl %esp,%eax # pt_regs pointer
1132 jmp ret_from_exception
1139 CFI_ADJUST_CFA_OFFSET 4
1141 CFI_ADJUST_CFA_OFFSET 4
1148 CFI_ADJUST_CFA_OFFSET 4
1150 CFI_ADJUST_CFA_OFFSET 4
1157 CFI_ADJUST_CFA_OFFSET 4
1158 pushl $do_invalid_op
1159 CFI_ADJUST_CFA_OFFSET 4
1163 ENTRY(coprocessor_segment_overrun)
1166 CFI_ADJUST_CFA_OFFSET 4
1167 pushl $do_coprocessor_segment_overrun
1168 CFI_ADJUST_CFA_OFFSET 4
1174 pushl $do_invalid_TSS
1175 CFI_ADJUST_CFA_OFFSET 4
1179 ENTRY(segment_not_present)
1181 pushl $do_segment_not_present
1182 CFI_ADJUST_CFA_OFFSET 4
1186 ENTRY(stack_segment)
1188 pushl $do_stack_segment
1189 CFI_ADJUST_CFA_OFFSET 4
1193 KPROBE_ENTRY(general_protection)
1195 pushl $do_general_protection
1196 CFI_ADJUST_CFA_OFFSET 4
1199 KPROBE_END(general_protection)
1201 ENTRY(alignment_check)
1203 pushl $do_alignment_check
1204 CFI_ADJUST_CFA_OFFSET 4
1210 pushl $0 # no error code
1211 CFI_ADJUST_CFA_OFFSET 4
1212 pushl $do_divide_error
1213 CFI_ADJUST_CFA_OFFSET 4
1217 #ifdef CONFIG_X86_MCE
1218 ENTRY(machine_check)
1221 CFI_ADJUST_CFA_OFFSET 4
1222 pushl machine_check_vector
1223 CFI_ADJUST_CFA_OFFSET 4
1228 ENTRY(fixup_4gb_segment)
1230 pushl $do_fixup_4gb_segment
1231 CFI_ADJUST_CFA_OFFSET 4
1235 ENTRY(kernel_thread_helper)
1236 pushl $0 # fake return address for unwinder
1240 CFI_ADJUST_CFA_OFFSET 4
1243 CFI_ADJUST_CFA_OFFSET 4
1246 ENDPROC(kernel_thread_helper)
1248 .section .rodata,"a"
1249 #include "syscall_table.S"
1251 syscall_table_size=(.-sys_call_table)