2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
40 * "current" is in register %ebx during any slow entries.
43 #include <linux/config.h>
44 #include <linux/linkage.h>
45 #include <asm/thread_info.h>
46 #include <asm/asm_offsets.h>
47 #include <asm/errno.h>
48 #include <asm/segment.h>
52 #include "irq_vectors.h"
54 #define nr_syscalls ((syscall_table_size)/4)
80 #define preempt_stop cli
83 #define resume_kernel restore_all
86 #ifdef CONFIG_X86_HIGH_ENTRY
88 #ifdef CONFIG_X86_SWITCH_PAGETABLES
90 #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
92 * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu,
93 * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
94 * left stale, so we must check whether to repeat the real stack calculation.
96 #define repeat_if_esp_changed \
98 testl $-THREAD_SIZE, %ebp; \
101 #define repeat_if_esp_changed
104 /* clobbers ebx, edx and ebp */
106 #define __SWITCH_KERNELSPACE \
107 cmpl $0xff000000, %esp; \
111 * switch pagetables and load the real stack, \
112 * keep the stack offset: \
115 movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \
117 /* GET_THREAD_INFO(%ebp) intermixed */ \
121 andl $(-THREAD_SIZE), %ebp; \
122 andl $(THREAD_SIZE-1), %ebx; \
123 orl TI_real_stack(%ebp), %ebx; \
124 repeat_if_esp_changed; \
133 #define __SWITCH_USERSPACE \
134 /* interrupted any of the user return paths? */ \
136 movl EIP(%esp), %eax; \
138 cmpl $int80_ret_start_marker, %eax; \
139 jb 33f; /* nope - continue with sysexit check */\
140 cmpl $int80_ret_end_marker, %eax; \
141 jb 22f; /* yes - switch to virtual stack */ \
143 cmpl $sysexit_ret_start_marker, %eax; \
144 jb 44f; /* nope - continue with user check */ \
145 cmpl $sysexit_ret_end_marker, %eax; \
146 jb 22f; /* yes - switch to virtual stack */ \
147 /* return to userspace? */ \
149 movl EFLAGS(%esp),%ecx; \
151 testl $(VM_MASK | 3),%ecx; \
155 * switch to the virtual stack, then switch to \
156 * the userspace pagetables. \
159 GET_THREAD_INFO(%ebp); \
160 movl TI_virtual_stack(%ebp), %edx; \
161 movl TI_user_pgd(%ebp), %ecx; \
164 andl $(THREAD_SIZE-1), %ebx; \
166 int80_ret_start_marker: \
171 int80_ret_end_marker: \
174 #else /* !CONFIG_X86_HIGH_ENTRY */
176 #define __SWITCH_KERNELSPACE
177 #define __SWITCH_USERSPACE
192 movl $(__USER_DS), %edx; \
196 #define __RESTORE_INT_REGS \
205 #define __RESTORE_REGS \
206 __RESTORE_INT_REGS; \
209 .section .fixup,"ax"; \
210 444: movl $0,(%esp); \
212 555: movl $0,(%esp); \
215 .section __ex_table,"a";\
221 #define __RESTORE_ALL \
225 .section .fixup,"ax"; \
227 movl $(__USER_DS), %edx; \
233 .section __ex_table,"a";\
240 __SWITCH_KERNELSPACE;
242 #define RESTORE_ALL \
243 __SWITCH_USERSPACE; \
246 .section .entry.text,"ax"
249 pushfl # We get a different stack layout with call
250 # gates, which has to be cleaned up later..
257 movl EIP(%ebp), %eax # due to call gates, this is eflags, not eip..
258 movl CS(%ebp), %edx # this is eip..
259 movl EFLAGS(%ebp), %ecx # and this is cs..
260 movl %eax,EFLAGS(%ebp) #
261 movl %edx,EIP(%ebp) # Now we move them to their "normal" places
263 GET_THREAD_INFO_WITH_ESP(%ebp) # GET_THREAD_INFO
264 movl TI_exec_domain(%ebp), %edx # Get the execution domain
265 call *EXEC_DOMAIN_handler(%edx) # Call the handler for the domain
271 pushfl # We get a different stack layout with call
272 # gates, which has to be cleaned up later..
284 GET_THREAD_INFO(%ebp)
289 * Return to user mode is not as complex as all this looks,
290 * but we want the default path for a system call return to
291 * go as quickly as possible which is why some of this is
292 * less clear than it otherwise should be.
295 # userspace resumption stub bypassing syscall exit tracing
300 GET_THREAD_INFO(%ebp)
301 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
303 testl $(VM_MASK | 3), %eax
304 jz resume_kernel # returning to kernel or vm86-space
305 ENTRY(resume_userspace)
306 cli # make sure we don't miss an interrupt
307 # setting need_resched or sigpending
308 # between sampling and the iret
309 movl TI_flags(%ebp), %ecx
310 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
311 # int/exception return?
315 #ifdef CONFIG_PREEMPT
317 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
320 movl TI_flags(%ebp), %ecx # need_resched set ?
321 testb $_TIF_NEED_RESCHED, %cl
323 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
325 movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp)
328 movl $0,TI_preempt_count(%ebp)
333 /* SYSENTER_RETURN points to after the "sysenter" instruction in
334 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
336 # sysenter call handler stub
337 ENTRY(sysenter_entry)
338 movl TSS_sysenter_esp0(%esp),%esp
346 * Push current_thread_info()->sysenter_return to the stack.
347 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
348 * pushed above, and the word being pushed now:
350 pushl (TI_sysenter_return-THREAD_SIZE+4*4)(%esp)
352 * No six-argument syscall is ever used with sysenter.
356 GET_THREAD_INFO(%ebp)
357 cmpl $(nr_syscalls), %eax
360 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
361 jnz syscall_trace_entry
362 call *sys_call_table(,%eax,4)
365 movl TI_flags(%ebp), %ecx
366 testw $_TIF_ALLWORK_MASK, %cx
367 jne syscall_exit_work
369 #ifdef CONFIG_X86_SWITCH_PAGETABLES
371 GET_THREAD_INFO(%ebp)
372 movl TI_virtual_stack(%ebp), %edx
373 movl TI_user_pgd(%ebp), %ecx
375 andl $(THREAD_SIZE-1), %ebx
377 sysexit_ret_start_marker:
381 * only ebx is not restored by the userspace sysenter vsyscall
382 * code, it assumes it to be callee-saved.
387 /* if something modifies registers it must also disable sysexit */
389 movl OLDESP(%esp), %ecx
392 #ifdef CONFIG_X86_SWITCH_PAGETABLES
393 sysexit_ret_end_marker:
397 # system call handler stub
399 pushl %eax # save orig_eax
401 GET_THREAD_INFO(%ebp)
402 cmpl $(nr_syscalls), %eax
404 # system call tracing in operation
405 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
406 jnz syscall_trace_entry
408 call *sys_call_table(,%eax,4)
409 movl %eax,EAX(%esp) # store the return value
411 cli # make sure we don't miss an interrupt
412 # setting need_resched or sigpending
413 # between sampling and the iret
414 movl TI_flags(%ebp), %ecx
415 testw $_TIF_ALLWORK_MASK, %cx # current->work
416 jne syscall_exit_work
420 # perform work that needs to be done immediately before resumption
423 testb $_TIF_NEED_RESCHED, %cl
427 cli # make sure we don't miss an interrupt
428 # setting need_resched or sigpending
429 # between sampling and the iret
430 movl TI_flags(%ebp), %ecx
431 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
432 # than syscall tracing?
434 testb $_TIF_NEED_RESCHED, %cl
437 work_notifysig: # deal with pending signals and
438 # notify-resume requests
439 testl $VM_MASK, EFLAGS(%esp)
441 jne work_notifysig_v86 # returning to kernel-space or
444 call do_notify_resume
446 #if CONFIG_X86_HIGH_ENTRY
448 * Reload db7 if necessary:
450 movl TI_flags(%ebp), %ecx
457 movl TI_task(%ebp), %edx;
458 movl task_thread_db7(%edx), %edx;
470 call do_notify_resume
473 # perform syscall exit tracing
476 movl $-ENOSYS,EAX(%esp)
479 call do_syscall_trace
480 movl ORIG_EAX(%esp), %eax
481 cmpl $(nr_syscalls), %eax
485 # perform syscall exit tracing
488 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT), %cl
490 sti # could let do_syscall_trace() call
494 call do_syscall_trace
499 movl $-ENOSYS,EAX(%esp)
503 * Build the entry stubs and pointer table with
504 * some assembler magic.
511 ENTRY(irq_entries_start)
528 #define BUILD_INTERRUPT(name, nr) \
535 /* The include is where all of the SMP etc. interrupts come from */
536 #include "entry_arch.h"
539 pushl $0 # no error code
540 pushl $do_divide_error
555 movl ORIG_EAX(%esp), %esi # get the error code
556 movl ES(%esp), %edi # get the function address
557 movl %eax, ORIG_EAX(%esp)
559 pushl %esi # push the error code
560 movl $(__USER_DS), %edx
564 /* clobbers edx, ebx and ebp */
567 leal 4(%esp), %edx # prepare pt_regs
568 pushl %edx # push pt_regs
572 jmp ret_from_exception
574 ENTRY(coprocessor_error)
576 pushl $do_coprocessor_error
579 ENTRY(simd_coprocessor_error)
581 pushl $do_simd_coprocessor_error
584 ENTRY(device_not_available)
585 pushl $-1 # mark this as an int
588 testl $0x4, %eax # EM (math emulation bit)
589 jne device_not_available_emulate
591 call math_state_restore
592 jmp ret_from_exception
593 device_not_available_emulate:
594 pushl $0 # temporary storage for ORIG_EIP
597 jmp ret_from_exception
600 * Debug traps and NMI can happen at the one SYSENTER instruction
601 * that sets up the real kernel stack. Check here, since we can't
602 * allow the wrong stack to be used.
604 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
605 * already pushed 3 words if it hits on the sysenter instruction:
606 * eflags, cs and eip.
608 * We just load the right stack, and push the three (known) values
609 * by hand onto the new stack - while updating the return eip past
610 * the instruction that would have done it for sysenter.
612 #define FIX_STACK(offset, ok, label) \
613 cmpw $__KERNEL_CS,4(%esp); \
616 movl TSS_sysenter_esp0+offset(%esp),%esp; \
618 pushl $__KERNEL_CS; \
619 pushl $sysenter_past_esp
622 cmpl $sysenter_entry,(%esp)
623 jne debug_stack_correct
624 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
631 * NMI is doubly nasty. It can happen _while_ we're handling
632 * a debug fault, and the debug fault hasn't yet been able to
633 * clear up the stack. So we first check whether we got an
634 * NMI on the sysenter entry path, but after that we need to
635 * check whether we got an NMI on the debug path where the debug
636 * fault happened on the sysenter path.
639 cmpl $sysenter_entry,(%esp)
643 /* Do not access memory above the end of our stack page,
644 * it might not exist.
646 andl $(THREAD_SIZE-1),%eax
647 cmpl $(THREAD_SIZE-20),%eax
649 jae nmi_stack_correct
650 cmpl $sysenter_entry,12(%esp)
651 je nmi_debug_stack_check
663 FIX_STACK(12,nmi_stack_correct, 1)
664 jmp nmi_stack_correct
665 nmi_debug_stack_check:
666 cmpw $__KERNEL_CS,16(%esp)
667 jne nmi_stack_correct
668 cmpl $debug - 1,(%esp)
669 jle nmi_stack_correct
670 cmpl $debug_esp_fix_insn,(%esp)
671 jle nmi_debug_stack_fixup
672 nmi_debug_stack_fixup:
673 FIX_STACK(24,nmi_stack_correct, 1)
674 jmp nmi_stack_correct
696 ENTRY(coprocessor_segment_overrun)
698 pushl $do_coprocessor_segment_overrun
702 pushl $do_invalid_TSS
705 ENTRY(segment_not_present)
706 pushl $do_segment_not_present
710 pushl $do_stack_segment
713 ENTRY(general_protection)
714 pushl $do_general_protection
717 ENTRY(alignment_check)
718 pushl $do_alignment_check
725 #ifdef CONFIG_X86_MCE
728 pushl machine_check_vector
732 ENTRY(spurious_interrupt_bug)
734 pushl $do_spurious_interrupt_bug
740 ENTRY(sys_call_table)
741 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
746 .long sys_open /* 5 */
751 .long sys_unlink /* 10 */
756 .long sys_chmod /* 15 */
758 .long sys_ni_syscall /* old break syscall holder */
761 .long sys_getpid /* 20 */
766 .long sys_stime /* 25 */
771 .long sys_utime /* 30 */
772 .long sys_ni_syscall /* old stty syscall holder */
773 .long sys_ni_syscall /* old gtty syscall holder */
776 .long sys_ni_syscall /* 35 - old ftime syscall holder */
781 .long sys_rmdir /* 40 */
785 .long sys_ni_syscall /* old prof syscall holder */
786 .long sys_brk /* 45 */
791 .long sys_getegid16 /* 50 */
793 .long sys_umount /* recycled never used phys() */
794 .long sys_ni_syscall /* old lock syscall holder */
796 .long sys_fcntl /* 55 */
797 .long sys_ni_syscall /* old mpx syscall holder */
799 .long sys_ni_syscall /* old ulimit syscall holder */
801 .long sys_umask /* 60 */
806 .long sys_getpgrp /* 65 */
811 .long sys_setreuid16 /* 70 */
815 .long sys_sethostname
816 .long sys_setrlimit /* 75 */
817 .long sys_old_getrlimit
819 .long sys_gettimeofday
820 .long sys_settimeofday
821 .long sys_getgroups16 /* 80 */
822 .long sys_setgroups16
826 .long sys_readlink /* 85 */
831 .long old_mmap /* 90 */
836 .long sys_fchown16 /* 95 */
837 .long sys_getpriority
838 .long sys_setpriority
839 .long sys_ni_syscall /* old profil syscall holder */
841 .long sys_fstatfs /* 100 */
846 .long sys_getitimer /* 105 */
851 .long sys_iopl /* 110 */
853 .long sys_ni_syscall /* old "idle" system call */
856 .long sys_swapoff /* 115 */
861 .long sys_clone /* 120 */
862 .long sys_setdomainname
866 .long sys_mprotect /* 125 */
867 .long sys_sigprocmask
868 .long sys_ni_syscall /* old "create_module" */
869 .long sys_init_module
870 .long sys_delete_module
871 .long sys_ni_syscall /* 130: old "get_kernel_syms" */
876 .long sys_sysfs /* 135 */
877 .long sys_personality
878 .long sys_ni_syscall /* reserved for afs_syscall */
881 .long sys_llseek /* 140 */
886 .long sys_readv /* 145 */
891 .long sys_mlock /* 150 */
895 .long sys_sched_setparam
896 .long sys_sched_getparam /* 155 */
897 .long sys_sched_setscheduler
898 .long sys_sched_getscheduler
899 .long sys_sched_yield
900 .long sys_sched_get_priority_max
901 .long sys_sched_get_priority_min /* 160 */
902 .long sys_sched_rr_get_interval
905 .long sys_setresuid16
906 .long sys_getresuid16 /* 165 */
908 .long sys_ni_syscall /* Old sys_query_module */
911 .long sys_setresgid16 /* 170 */
912 .long sys_getresgid16
914 .long sys_rt_sigreturn
915 .long sys_rt_sigaction
916 .long sys_rt_sigprocmask /* 175 */
917 .long sys_rt_sigpending
918 .long sys_rt_sigtimedwait
919 .long sys_rt_sigqueueinfo
920 .long sys_rt_sigsuspend
921 .long sys_pread64 /* 180 */
926 .long sys_capset /* 185 */
927 .long sys_sigaltstack
929 .long sys_ni_syscall /* reserved for streams1 */
930 .long sys_ni_syscall /* reserved for streams2 */
931 .long sys_vfork /* 190 */
935 .long sys_ftruncate64
936 .long sys_stat64 /* 195 */
941 .long sys_getgid /* 200 */
946 .long sys_getgroups /* 205 */
951 .long sys_setresgid /* 210 */
956 .long sys_setfsuid /* 215 */
961 .long sys_getdents64 /* 220 */
966 # ifdef CONFIG_TUX_MODULE
974 .long sys_readahead /* 225 */
979 .long sys_lgetxattr /* 230 */
984 .long sys_removexattr /* 235 */
985 .long sys_lremovexattr
986 .long sys_fremovexattr
989 .long sys_futex /* 240 */
990 .long sys_sched_setaffinity
991 .long sys_sched_getaffinity
992 .long sys_set_thread_area
993 .long sys_get_thread_area
994 .long sys_io_setup /* 245 */
996 .long sys_io_getevents
999 .long sys_fadvise64 /* 250 */
1000 .long sys_ni_syscall
1001 .long sys_exit_group
1002 .long sys_lookup_dcookie
1003 .long sys_epoll_create
1004 .long sys_epoll_ctl /* 255 */
1005 .long sys_epoll_wait
1006 .long sys_remap_file_pages
1007 .long sys_set_tid_address
1008 .long sys_timer_create
1009 .long sys_timer_settime /* 260 */
1010 .long sys_timer_gettime
1011 .long sys_timer_getoverrun
1012 .long sys_timer_delete
1013 .long sys_clock_settime
1014 .long sys_clock_gettime /* 265 */
1015 .long sys_clock_getres
1016 .long sys_clock_nanosleep
1019 .long sys_tgkill /* 270 */
1021 .long sys_fadvise64_64
1022 .long sys_ni_syscall /* sys_vserver */
1024 .long sys_get_mempolicy
1025 .long sys_set_mempolicy
1028 .long sys_mq_timedsend
1029 .long sys_mq_timedreceive /* 280 */
1031 .long sys_mq_getsetattr
1032 .long sys_ni_syscall /* reserved for kexec */
1034 syscall_table_size=(.-sys_call_table)