2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
40 * "current" is in register %ebx during any slow entries.
43 #include <linux/config.h>
44 #include <linux/linkage.h>
45 #include <asm/thread_info.h>
46 #include <asm/errno.h>
47 #include <asm/segment.h>
51 #include "irq_vectors.h"
52 #include <xen/interface/xen.h>
54 #define nr_syscalls ((syscall_table_size)/4)
82 #define DISABLE_INTERRUPTS cli
83 #define ENABLE_INTERRUPTS sti
85 /* Offsets into shared_info_t. */
86 #define evtchn_upcall_pending /* 0 */
87 #define evtchn_upcall_mask 1
89 #define sizeof_vcpu_shift 6
92 #define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
93 shl $sizeof_vcpu_shift,%esi ; \
94 addl HYPERVISOR_shared_info,%esi
96 #define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
99 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
100 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
101 #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
103 #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
105 #define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
108 #ifdef CONFIG_PREEMPT
109 #define preempt_stop cli
112 #define resume_kernel restore_nocheck
126 movl $(__USER_DS), %edx; \
130 #define RESTORE_INT_REGS \
139 #define RESTORE_REGS \
143 .section .fixup,"ax"; \
149 .section __ex_table,"a";\
159 GET_THREAD_INFO(%ebp)
164 * Return to user mode is not as complex as all this looks,
165 * but we want the default path for a system call return to
166 * go as quickly as possible which is why some of this is
167 * less clear than it otherwise should be.
170 # userspace resumption stub bypassing syscall exit tracing
175 GET_THREAD_INFO(%ebp)
176 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
178 testl $(VM_MASK | 2), %eax
180 ENTRY(resume_userspace)
181 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
182 # setting need_resched or sigpending
183 # between sampling and the iret
184 movl TI_flags(%ebp), %ecx
185 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
186 # int/exception return?
190 #ifdef CONFIG_PREEMPT
193 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
196 movl TI_flags(%ebp), %ecx # need_resched set ?
197 testb $_TIF_NEED_RESCHED, %cl
199 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
201 call preempt_schedule_irq
205 /* SYSENTER_RETURN points to after the "sysenter" instruction in
206 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
208 # sysenter call handler stub
209 ENTRY(sysenter_entry)
210 movl SYSENTER_stack_esp0(%esp),%esp
218 * Push current_thread_info()->sysenter_return to the stack.
219 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
220 * pushed above; +8 corresponds to copy_thread's esp0 setting.
222 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
224 * Load the potential sixth argument from user stack.
225 * Careful about security.
227 cmpl $__PAGE_OFFSET-3,%ebp
230 .section __ex_table,"a"
232 .long 1b,syscall_fault
237 GET_THREAD_INFO(%ebp)
239 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
240 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
241 jnz syscall_trace_entry
242 cmpl $(nr_syscalls), %eax
244 call *sys_call_table(,%eax,4)
247 movl TI_flags(%ebp), %ecx
248 testw $_TIF_ALLWORK_MASK, %cx
249 jne syscall_exit_work
250 /* if something modifies registers it must also disable sysexit */
252 movl OLDESP(%esp), %ecx
256 sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
258 jnz 14f # process more events if necessary...
261 14: __DISABLE_INTERRUPTS
262 sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
264 call evtchn_do_upcall
270 #endif /* !CONFIG_XEN */
273 # system call handler stub
275 pushl %eax # save orig_eax
277 GET_THREAD_INFO(%ebp)
278 testl $TF_MASK,EFLAGS(%esp)
280 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
282 # system call tracing in operation / emulation
283 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
284 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
285 jnz syscall_trace_entry
286 cmpl $(nr_syscalls), %eax
289 call *sys_call_table(,%eax,4)
290 movl %eax,EAX(%esp) # store the return value
292 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
293 # setting need_resched or sigpending
294 # between sampling and the iret
295 movl TI_flags(%ebp), %ecx
296 testw $_TIF_ALLWORK_MASK, %cx # current->work
297 jne syscall_exit_work
301 movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
302 # Warning: OLDSS(%esp) contains the wrong/random values if we
303 # are returning to the kernel.
304 # See comments in process.c:copy_thread() for details.
305 movb OLDSS(%esp), %ah
307 andl $(VM_MASK | (4 << 8) | 3), %eax
308 cmpl $((4 << 8) | 3), %eax
309 je ldt_ss # returning to user-space with LDT SS
313 movl EFLAGS(%esp), %eax
314 testl $(VM_MASK|NMI_MASK), %eax
316 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
318 andb evtchn_upcall_mask(%esi),%al
319 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
320 jnz restore_all_enable_events # != 0 => enable event delivery
330 pushl $0 # no error code
334 .section __ex_table,"a"
341 larl OLDSS(%esp), %eax
343 testl $0x00400000, %eax # returning to 32bit stack?
344 jnz restore_nocheck # allright, normal return
345 /* If returning to userspace with 16bit stack,
346 * try to fix the higher word of ESP, as the CPU
348 * This is an "official" bug of all the x86-compatible
349 * CPUs, which we can try to work around to make
350 * dosemu and wine happy. */
351 subl $8, %esp # reserve space for switch16 pointer
354 /* Set up the 16bit stack frame with switch32 pointer on top,
355 * and a switch16 pointer on top of the current frame. */
356 call setup_x86_bogus_stack
358 lss 20+4(%esp), %esp # switch to 16bit stack
360 .section __ex_table,"a"
366 andl $~NMI_MASK, EFLAGS(%esp)
369 jmp hypercall_page + (__HYPERVISOR_iret * 32)
372 # perform work that needs to be done immediately before resumption
375 testb $_TIF_NEED_RESCHED, %cl
379 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
380 # setting need_resched or sigpending
381 # between sampling and the iret
382 movl TI_flags(%ebp), %ecx
383 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
384 # than syscall tracing?
386 testb $_TIF_NEED_RESCHED, %cl
389 work_notifysig: # deal with pending signals and
390 # notify-resume requests
391 testl $VM_MASK, EFLAGS(%esp)
393 jne work_notifysig_v86 # returning to kernel-space or
396 call do_notify_resume
402 pushl %ecx # save ti_flags for do_notify_resume
403 call save_v86_state # %eax contains pt_regs pointer
407 call do_notify_resume
411 # perform syscall exit tracing
414 movl $-ENOSYS,EAX(%esp)
417 call do_syscall_trace
419 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
420 # so must skip actual syscall
421 movl ORIG_EAX(%esp), %eax
422 cmpl $(nr_syscalls), %eax
426 # perform syscall exit tracing
429 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
431 ENABLE_INTERRUPTS # could let do_syscall_trace() call
435 call do_syscall_trace
440 pushl %eax # save orig_eax
442 GET_THREAD_INFO(%ebp)
443 movl $-EFAULT,EAX(%esp)
448 movl $-ENOSYS,EAX(%esp)
452 #define FIXUP_ESPFIX_STACK \
454 /* switch to 32bit stack using the pointer on top of 16bit stack */ \
455 lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
456 /* copy data from 16bit stack to 32bit stack */ \
457 call fixup_x86_bogus_stack; \
458 /* put ESP to the proper location */ \
460 #define UNWIND_ESPFIX_STACK \
463 /* see if on 16bit stack */ \
464 cmpw $__ESPFIX_SS, %ax; \
466 movl $__KERNEL_DS, %edx; \
469 /* switch to 32bit stack */ \
474 * Build the entry stubs and pointer table with
475 * some assembler magic.
482 ENTRY(irq_entries_start)
500 #define BUILD_INTERRUPT(name, nr) \
508 /* The include is where all of the SMP etc. interrupts come from */
509 #include "entry_arch.h"
511 #define UNWIND_ESPFIX_STACK
515 pushl $0 # no error code
516 pushl $do_divide_error
533 movl ES(%esp), %edi # get the function address
534 movl ORIG_EAX(%esp), %edx # get the error code
535 movl %eax, ORIG_EAX(%esp)
537 movl $(__USER_DS), %ecx
540 movl %esp,%eax # pt_regs pointer
542 jmp ret_from_exception
545 # A note on the "critical region" in our callback handler.
546 # We want to avoid stacking callback handlers due to events occurring
547 # during handling of the last event. To do this, we keep events disabled
548 # until we've done all processing. HOWEVER, we must enable events before
549 # popping the stack frame (can't be done atomically) and so it would still
550 # be possible to get enough handler activations to overflow the stack.
551 # Although unlikely, bugs of that kind are hard to track down, so we'd
552 # like to avoid the possibility.
553 # So, on entry to the handler we detect whether we interrupted an
554 # existing activation in its critical region -- if so, we pop the current
555 # activation and restart the handler using the previous one.
557 # The sysexit critical region is slightly different. sysexit
558 # atomically removes the entire stack frame. If we interrupt in the
559 # critical region we know that the entire frame is present and correct
560 # so we can simply throw away the new one.
561 ENTRY(hypervisor_callback)
568 jb critical_region_fixup
569 cmpl $sysexit_scrit,%eax
571 cmpl $sysexit_ecrit,%eax
573 addl $0x34,%esp # Remove cs...ebx from stack frame.
575 call evtchn_do_upcall
580 restore_all_enable_events:
582 scrit: /**** START OF CRITICAL REGION ****/
584 jnz 14f # process more events if necessary...
588 .section __ex_table,"a"
592 14: __DISABLE_INTERRUPTS
594 ecrit: /**** END OF CRITICAL REGION ****/
595 # [How we do the fixup]. We want to merge the current stack frame with the
596 # just-interrupted frame. How we do this depends on where in the critical
597 # region the interrupted handler was executing, and so how many saved
598 # registers are in each frame. We do this quickly using the lookup table
599 # 'critical_fixup_table'. For each byte offset in the critical region, it
600 # provides the number of bytes which have already been popped from the
601 # interrupted stack frame.
602 critical_region_fixup:
603 addl $critical_fixup_table-scrit,%eax
604 movzbl (%eax),%eax # %eax contains num bytes popped
605 cmpb $0xff,%al # 0xff => vcpu_info critical region
607 GET_THREAD_INFO(%ebp)
610 add %eax,%esi # %esi points at end of src region
612 add $0x34,%edi # %edi points at end of dst region
614 shr $2,%ecx # convert words to bytes
615 je 17f # skip loop if nothing to copy
616 16: subl $4,%esi # pre-decrementing copy loop
621 17: movl %edi,%esp # final %edi is top of merged stack
624 critical_fixup_table:
625 .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING
626 .byte 0xff,0xff # jnz 14f
627 .byte 0x00 # pop %ebx
628 .byte 0x04 # pop %ecx
629 .byte 0x08 # pop %edx
630 .byte 0x0c # pop %esi
631 .byte 0x10 # pop %edi
632 .byte 0x14 # pop %ebp
633 .byte 0x18 # pop %eax
636 .byte 0x24,0x24,0x24 # add $4,%esp
638 .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
639 .byte 0x00,0x00 # jmp 11b
641 # Hypervisor uses this for application faults while it executes.
642 # We get here for two reasons:
643 # 1. Fault while reloading DS, ES, FS or GS
644 # 2. Fault while executing IRET
645 # Category 1 we fix up by reattempting the load, and zeroing the segment
646 # register if the load fails.
647 # Category 2 we fix up by jumping to do_iret_error. We cannot use the
648 # normal Linux return path in this case because if we use the IRET hypercall
649 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
650 # We distinguish between categories by maintaining a status value in EAX.
651 ENTRY(failsafe_callback)
661 addl $16,%esp # EAX != 0 => Category 2 (Bad IRET)
663 5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment)
666 jmp ret_from_exception
667 .section .fixup,"ax"; \
675 movl %eax,12(%esp); \
678 movl %eax,16(%esp); \
681 .section __ex_table,"a"; \
690 ENTRY(coprocessor_error)
692 pushl $do_coprocessor_error
695 ENTRY(simd_coprocessor_error)
697 pushl $do_simd_coprocessor_error
700 ENTRY(device_not_available)
701 pushl $-1 # mark this as an int
705 testl $0x4, %eax # EM (math emulation bit)
706 je device_available_emulate
707 pushl $0 # temporary storage for ORIG_EIP
710 jmp ret_from_exception
711 device_available_emulate:
714 call math_state_restore
715 jmp ret_from_exception
719 * Debug traps and NMI can happen at the one SYSENTER instruction
720 * that sets up the real kernel stack. Check here, since we can't
721 * allow the wrong stack to be used.
723 * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
724 * already pushed 3 words if it hits on the sysenter instruction:
725 * eflags, cs and eip.
727 * We just load the right stack, and push the three (known) values
728 * by hand onto the new stack - while updating the return eip past
729 * the instruction that would have done it for sysenter.
731 #define FIX_STACK(offset, ok, label) \
732 cmpw $__KERNEL_CS,4(%esp); \
735 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
737 pushl $__KERNEL_CS; \
738 pushl $sysenter_past_esp
739 #endif /* CONFIG_XEN */
743 cmpl $sysenter_entry,(%esp)
744 jne debug_stack_correct
745 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
747 #endif /* !CONFIG_XEN */
748 pushl $-1 # mark this as an int
750 xorl %edx,%edx # error code 0
751 movl %esp,%eax # pt_regs pointer
753 jmp ret_from_exception
758 * NMI is doubly nasty. It can happen _while_ we're handling
759 * a debug fault, and the debug fault hasn't yet been able to
760 * clear up the stack. So we first check whether we got an
761 * NMI on the sysenter entry path, but after that we need to
762 * check whether we got an NMI on the debug path where the debug
763 * fault happened on the sysenter path.
768 cmpw $__ESPFIX_SS, %ax
771 cmpl $sysenter_entry,(%esp)
775 /* Do not access memory above the end of our stack page,
776 * it might not exist.
778 andl $(THREAD_SIZE-1),%eax
779 cmpl $(THREAD_SIZE-20),%eax
781 jae nmi_stack_correct
782 cmpl $sysenter_entry,12(%esp)
783 je nmi_debug_stack_check
787 xorl %edx,%edx # zero error code
788 movl %esp,%eax # pt_regs pointer
793 FIX_STACK(12,nmi_stack_correct, 1)
794 jmp nmi_stack_correct
795 nmi_debug_stack_check:
796 cmpw $__KERNEL_CS,16(%esp)
797 jne nmi_stack_correct
800 cmpl $debug_esp_fix_insn,(%esp)
802 FIX_STACK(24,nmi_stack_correct, 1)
803 jmp nmi_stack_correct
806 /* create the pointer to lss back */
811 /* copy the iret frame of 12 bytes */
817 FIXUP_ESPFIX_STACK # %eax == %esp
818 xorl %edx,%edx # zero error code
821 lss 12+4(%esp), %esp # back to 16bit stack
823 .section __ex_table,"a"
831 xorl %edx,%edx # zero error code
832 movl %esp,%eax # pt_regs pointer
834 orl $NMI_MASK, EFLAGS(%esp)
839 pushl $-1 # mark this as an int
841 xorl %edx,%edx # zero error code
842 movl %esp,%eax # pt_regs pointer
844 jmp ret_from_exception
862 ENTRY(coprocessor_segment_overrun)
864 pushl $do_coprocessor_segment_overrun
868 pushl $do_invalid_TSS
871 ENTRY(segment_not_present)
872 pushl $do_segment_not_present
876 pushl $do_stack_segment
879 KPROBE_ENTRY(general_protection)
880 pushl $do_general_protection
884 ENTRY(alignment_check)
885 pushl $do_alignment_check
888 KPROBE_ENTRY(page_fault)
893 #ifdef CONFIG_X86_MCE
896 pushl machine_check_vector
900 ENTRY(fixup_4gb_segment)
901 pushl $do_fixup_4gb_segment
905 #include "syscall_table.S"
907 syscall_table_size=(.-sys_call_table)