2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
40 * "current" is in register %ebx during any slow entries.
43 #include <linux/config.h>
44 #include <linux/linkage.h>
45 #include <asm/thread_info.h>
46 #include <asm/asm_offsets.h>
47 #include <asm/errno.h>
48 #include <asm/segment.h>
52 #include "irq_vectors.h"
54 #define nr_syscalls ((syscall_table_size)/4)
80 #define preempt_stop cli
83 #define resume_kernel restore_all
86 #ifdef CONFIG_X86_HIGH_ENTRY
88 #ifdef CONFIG_X86_SWITCH_PAGETABLES
90 #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
92 * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu,
93 * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
94 * left stale, so we must check whether to repeat the real stack calculation.
96 #define repeat_if_esp_changed \
98 testl $-THREAD_SIZE, %ebp; \
101 #define repeat_if_esp_changed
104 /* clobbers ebx, edx and ebp */
106 #define __SWITCH_KERNELSPACE \
107 cmpl $0xff000000, %esp; \
111 * switch pagetables and load the real stack, \
112 * keep the stack offset: \
115 movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \
117 /* GET_THREAD_INFO(%ebp) intermixed */ \
121 andl $(-THREAD_SIZE), %ebp; \
122 andl $(THREAD_SIZE-1), %ebx; \
123 orl TI_real_stack(%ebp), %ebx; \
124 repeat_if_esp_changed; \
133 #define __SWITCH_USERSPACE \
134 /* interrupted any of the user return paths? */ \
136 movl EIP(%esp), %eax; \
138 cmpl $int80_ret_start_marker, %eax; \
139 jb 33f; /* nope - continue with sysexit check */\
140 cmpl $int80_ret_end_marker, %eax; \
141 jb 22f; /* yes - switch to virtual stack */ \
143 cmpl $sysexit_ret_start_marker, %eax; \
144 jb 44f; /* nope - continue with user check */ \
145 cmpl $sysexit_ret_end_marker, %eax; \
146 jb 22f; /* yes - switch to virtual stack */ \
147 /* return to userspace? */ \
149 movl EFLAGS(%esp),%ecx; \
151 testl $(VM_MASK | 3),%ecx; \
155 * switch to the virtual stack, then switch to \
156 * the userspace pagetables. \
159 GET_THREAD_INFO(%ebp); \
160 movl TI_virtual_stack(%ebp), %edx; \
161 movl TI_user_pgd(%ebp), %ecx; \
164 andl $(THREAD_SIZE-1), %ebx; \
166 int80_ret_start_marker: \
171 int80_ret_end_marker: \
174 #else /* !CONFIG_X86_HIGH_ENTRY */
176 #define __SWITCH_KERNELSPACE
177 #define __SWITCH_USERSPACE
192 movl $(__USER_DS), %edx; \
196 #define __RESTORE_INT_REGS \
205 #define __RESTORE_REGS \
206 __RESTORE_INT_REGS; \
209 .section .fixup,"ax"; \
210 444: movl $0,(%esp); \
212 555: movl $0,(%esp); \
215 .section __ex_table,"a";\
221 #define __RESTORE_ALL \
225 .section .fixup,"ax"; \
227 movl $(__USER_DS), %edx; \
233 .section __ex_table,"a";\
240 __SWITCH_KERNELSPACE;
242 #define RESTORE_ALL \
243 __SWITCH_USERSPACE; \
246 .section .entry.text,"ax"
249 pushfl # We get a different stack layout with call
250 # gates, which has to be cleaned up later..
257 movl EIP(%ebp), %eax # due to call gates, this is eflags, not eip..
258 movl CS(%ebp), %edx # this is eip..
259 movl EFLAGS(%ebp), %ecx # and this is cs..
260 movl %eax,EFLAGS(%ebp) #
261 movl %edx,EIP(%ebp) # Now we move them to their "normal" places
263 GET_THREAD_INFO_WITH_ESP(%ebp) # GET_THREAD_INFO
264 movl TI_exec_domain(%ebp), %edx # Get the execution domain
265 call *EXEC_DOMAIN_handler(%edx) # Call the handler for the domain
271 pushfl # We get a different stack layout with call
272 # gates, which has to be cleaned up later..
284 GET_THREAD_INFO(%ebp)
289 * Return to user mode is not as complex as all this looks,
290 * but we want the default path for a system call return to
291 * go as quickly as possible which is why some of this is
292 * less clear than it otherwise should be.
295 # userspace resumption stub bypassing syscall exit tracing
300 GET_THREAD_INFO(%ebp)
301 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
303 testl $(VM_MASK | 3), %eax
304 jz resume_kernel # returning to kernel or vm86-space
305 ENTRY(resume_userspace)
306 cli # make sure we don't miss an interrupt
307 # setting need_resched or sigpending
308 # between sampling and the iret
309 movl TI_flags(%ebp), %ecx
310 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
311 # int/exception return?
315 #ifdef CONFIG_PREEMPT
317 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
320 movl TI_flags(%ebp), %ecx # need_resched set ?
321 testb $_TIF_NEED_RESCHED, %cl
323 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
325 movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp)
328 movl $0,TI_preempt_count(%ebp)
333 /* SYSENTER_RETURN points to after the "sysenter" instruction in
334 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
336 # sysenter call handler stub
337 ENTRY(sysenter_entry)
338 movl TSS_sysenter_esp0(%esp),%esp
346 * Push current_thread_info()->sysenter_return to the stack.
347 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
348 * pushed above, and the word being pushed now:
350 pushl (TI_sysenter_return-THREAD_SIZE+4*4)(%esp)
352 * No six-argument syscall is ever used with sysenter.
356 GET_THREAD_INFO(%ebp)
357 cmpl $(nr_syscalls), %eax
360 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
361 jnz syscall_trace_entry
362 call *sys_call_table(,%eax,4)
365 movl TI_flags(%ebp), %ecx
366 testw $_TIF_ALLWORK_MASK, %cx
367 jne syscall_exit_work
369 #ifdef CONFIG_X86_SWITCH_PAGETABLES
371 GET_THREAD_INFO(%ebp)
372 movl TI_virtual_stack(%ebp), %edx
373 movl TI_user_pgd(%ebp), %ecx
375 andl $(THREAD_SIZE-1), %ebx
377 sysexit_ret_start_marker:
381 * only ebx is not restored by the userspace sysenter vsyscall
382 * code, it assumes it to be callee-saved.
387 /* if something modifies registers it must also disable sysexit */
389 movl OLDESP(%esp), %ecx
392 #ifdef CONFIG_X86_SWITCH_PAGETABLES
393 sysexit_ret_end_marker:
397 # system call handler stub
399 pushl %eax # save orig_eax
401 GET_THREAD_INFO(%ebp)
402 cmpl $(nr_syscalls), %eax
404 # system call tracing in operation
405 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
406 jnz syscall_trace_entry
408 call *sys_call_table(,%eax,4)
409 movl %eax,EAX(%esp) # store the return value
411 cli # make sure we don't miss an interrupt
412 # setting need_resched or sigpending
413 # between sampling and the iret
414 movl TI_flags(%ebp), %ecx
415 testw $_TIF_ALLWORK_MASK, %cx # current->work
416 jne syscall_exit_work
420 # perform work that needs to be done immediately before resumption
423 testb $_TIF_NEED_RESCHED, %cl
427 cli # make sure we don't miss an interrupt
428 # setting need_resched or sigpending
429 # between sampling and the iret
430 movl TI_flags(%ebp), %ecx
431 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
432 # than syscall tracing?
434 testb $_TIF_NEED_RESCHED, %cl
437 work_notifysig: # deal with pending signals and
438 # notify-resume requests
439 testl $VM_MASK, EFLAGS(%esp)
441 jne work_notifysig_v86 # returning to kernel-space or
444 call do_notify_resume
446 #if CONFIG_X86_HIGH_ENTRY
448 * Reload db7 if necessary:
450 movl TI_flags(%ebp), %ecx
457 movl TI_task(%ebp), %edx;
458 movl task_thread_db7(%edx), %edx;
470 call do_notify_resume
473 # perform syscall exit tracing
476 movl $-ENOSYS,EAX(%esp)
479 call do_syscall_trace
480 movl ORIG_EAX(%esp), %eax
481 cmpl $(nr_syscalls), %eax
485 # perform syscall exit tracing
488 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT), %cl
490 sti # could let do_syscall_trace() call
494 call do_syscall_trace
499 movl $-ENOSYS,EAX(%esp)
503 * Build the entry stubs and pointer table with
504 * some assembler magic.
511 ENTRY(irq_entries_start)
528 #define BUILD_INTERRUPT(name, nr) \
535 /* The include is where all of the SMP etc. interrupts come from */
536 #include "entry_arch.h"
539 pushl $0 # no error code
540 pushl $do_divide_error
555 movl ORIG_EAX(%esp), %esi # get the error code
556 movl ES(%esp), %edi # get the function address
557 movl %eax, ORIG_EAX(%esp)
559 pushl %esi # push the error code
560 movl $(__USER_DS), %edx
564 /* clobbers edx, ebx and ebp */
567 leal 4(%esp), %edx # prepare pt_regs
568 pushl %edx # push pt_regs
572 jmp ret_from_exception
574 ENTRY(coprocessor_error)
576 pushl $do_coprocessor_error
579 ENTRY(simd_coprocessor_error)
581 pushl $do_simd_coprocessor_error
584 ENTRY(device_not_available)
585 pushl $-1 # mark this as an int
588 testl $0x4, %eax # EM (math emulation bit)
589 jne device_not_available_emulate
591 call math_state_restore
592 jmp ret_from_exception
593 device_not_available_emulate:
594 pushl $0 # temporary storage for ORIG_EIP
597 jmp ret_from_exception
600 * Debug traps and NMI can happen at the one SYSENTER instruction
601 * that sets up the real kernel stack. Check here, since we can't
602 * allow the wrong stack to be used.
604 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
605 * already pushed 3 words if it hits on the sysenter instruction:
606 * eflags, cs and eip.
608 * We just load the right stack, and push the three (known) values
609 * by hand onto the new stack - while updating the return eip past
610 * the instruction that would have done it for sysenter.
612 #define FIX_STACK(offset, ok, label) \
613 cmpw $__KERNEL_CS,4(%esp); \
616 movl TSS_sysenter_esp0+offset(%esp),%esp; \
618 pushl $__KERNEL_CS; \
619 pushl $sysenter_past_esp
622 cmpl $sysenter_entry,(%esp)
623 jne debug_stack_correct
624 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
631 * NMI is doubly nasty. It can happen _while_ we're handling
632 * a debug fault, and the debug fault hasn't yet been able to
633 * clear up the stack. So we first check whether we got an
634 * NMI on the sysenter entry path, but after that we need to
635 * check whether we got an NMI on the debug path where the debug
636 * fault happened on the sysenter path.
639 cmpl $sysenter_entry,(%esp)
643 /* Do not access memory above the end of our stack page,
644 * it might not exist.
646 andl $(THREAD_SIZE-1),%eax
647 cmpl $(THREAD_SIZE-20),%eax
649 jae nmi_stack_correct
650 cmpl $sysenter_entry,12(%esp)
651 je nmi_debug_stack_check
663 FIX_STACK(12,nmi_stack_correct, 1)
664 jmp nmi_stack_correct
665 nmi_debug_stack_check:
666 cmpw $__KERNEL_CS,16(%esp)
667 jne nmi_stack_correct
668 cmpl $debug - 1,(%esp)
669 jle nmi_stack_correct
670 cmpl $debug_esp_fix_insn,(%esp)
671 jle nmi_debug_stack_fixup
672 nmi_debug_stack_fixup:
673 FIX_STACK(24,nmi_stack_correct, 1)
674 jmp nmi_stack_correct
696 ENTRY(coprocessor_segment_overrun)
698 pushl $do_coprocessor_segment_overrun
702 pushl $do_invalid_TSS
705 ENTRY(segment_not_present)
706 pushl $do_segment_not_present
710 pushl $do_stack_segment
713 ENTRY(general_protection)
714 pushl $do_general_protection
717 ENTRY(alignment_check)
718 pushl $do_alignment_check
725 #ifdef CONFIG_X86_MCE
728 pushl machine_check_vector
732 ENTRY(spurious_interrupt_bug)
734 pushl $do_spurious_interrupt_bug
737 #ifdef CONFIG_X86_STACK_CHECK
740 movl $(THREAD_SIZE - 1),%eax
742 cmpl $STACK_WARN,%eax /* esp reaches into STACK_WARN space */
748 lock; btsl $0,stack_overflowed
751 # switch to overflow stack
753 movl $(stack_overflow_stack + THREAD_SIZE - 4),%esp
759 # push eip then esp of error for stack_overflow_panic
763 # update the task pointer and cpu in the overflow stack's thread_info.
764 GET_THREAD_INFO_WITH_ESP(%eax)
765 movl TI_task(%eax),%ebx
766 movl %ebx,stack_overflow_stack+TI_task
767 movl TI_cpu(%eax),%ebx
768 movl %ebx,stack_overflow_stack+TI_cpu
772 # pop off call arguments
779 movl $0,stack_overflowed
781 #warning stack check enabled
787 #ifdef CONFIG_X86_STACK_CHECK
788 .globl stack_overflowed
793 ENTRY(sys_call_table)
794 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
799 .long sys_open /* 5 */
804 .long sys_unlink /* 10 */
809 .long sys_chmod /* 15 */
811 .long sys_ni_syscall /* old break syscall holder */
814 .long sys_getpid /* 20 */
819 .long sys_stime /* 25 */
824 .long sys_utime /* 30 */
825 .long sys_ni_syscall /* old stty syscall holder */
826 .long sys_ni_syscall /* old gtty syscall holder */
829 .long sys_ni_syscall /* 35 - old ftime syscall holder */
834 .long sys_rmdir /* 40 */
838 .long sys_ni_syscall /* old prof syscall holder */
839 .long sys_brk /* 45 */
844 .long sys_getegid16 /* 50 */
846 .long sys_umount /* recycled never used phys() */
847 .long sys_ni_syscall /* old lock syscall holder */
849 .long sys_fcntl /* 55 */
850 .long sys_ni_syscall /* old mpx syscall holder */
852 .long sys_ni_syscall /* old ulimit syscall holder */
854 .long sys_umask /* 60 */
859 .long sys_getpgrp /* 65 */
864 .long sys_setreuid16 /* 70 */
868 .long sys_sethostname
869 .long sys_setrlimit /* 75 */
870 .long sys_old_getrlimit
872 .long sys_gettimeofday
873 .long sys_settimeofday
874 .long sys_getgroups16 /* 80 */
875 .long sys_setgroups16
879 .long sys_readlink /* 85 */
884 .long old_mmap /* 90 */
889 .long sys_fchown16 /* 95 */
890 .long sys_getpriority
891 .long sys_setpriority
892 .long sys_ni_syscall /* old profil syscall holder */
894 .long sys_fstatfs /* 100 */
899 .long sys_getitimer /* 105 */
904 .long sys_iopl /* 110 */
906 .long sys_ni_syscall /* old "idle" system call */
909 .long sys_swapoff /* 115 */
914 .long sys_clone /* 120 */
915 .long sys_setdomainname
919 .long sys_mprotect /* 125 */
920 .long sys_sigprocmask
921 .long sys_ni_syscall /* old "create_module" */
922 .long sys_init_module
923 .long sys_delete_module
924 .long sys_ni_syscall /* 130: old "get_kernel_syms" */
929 .long sys_sysfs /* 135 */
930 .long sys_personality
931 .long sys_ni_syscall /* reserved for afs_syscall */
934 .long sys_llseek /* 140 */
939 .long sys_readv /* 145 */
944 .long sys_mlock /* 150 */
948 .long sys_sched_setparam
949 .long sys_sched_getparam /* 155 */
950 .long sys_sched_setscheduler
951 .long sys_sched_getscheduler
952 .long sys_sched_yield
953 .long sys_sched_get_priority_max
954 .long sys_sched_get_priority_min /* 160 */
955 .long sys_sched_rr_get_interval
958 .long sys_setresuid16
959 .long sys_getresuid16 /* 165 */
961 .long sys_ni_syscall /* Old sys_query_module */
964 .long sys_setresgid16 /* 170 */
965 .long sys_getresgid16
967 .long sys_rt_sigreturn
968 .long sys_rt_sigaction
969 .long sys_rt_sigprocmask /* 175 */
970 .long sys_rt_sigpending
971 .long sys_rt_sigtimedwait
972 .long sys_rt_sigqueueinfo
973 .long sys_rt_sigsuspend
974 .long sys_pread64 /* 180 */
979 .long sys_capset /* 185 */
980 .long sys_sigaltstack
982 .long sys_ni_syscall /* reserved for streams1 */
983 .long sys_ni_syscall /* reserved for streams2 */
984 .long sys_vfork /* 190 */
988 .long sys_ftruncate64
989 .long sys_stat64 /* 195 */
994 .long sys_getgid /* 200 */
999 .long sys_getgroups /* 205 */
1004 .long sys_setresgid /* 210 */
1009 .long sys_setfsuid /* 215 */
1011 .long sys_pivot_root
1014 .long sys_getdents64 /* 220 */
1019 # ifdef CONFIG_TUX_MODULE
1022 .long sys_ni_syscall
1025 .long sys_ni_syscall
1027 .long sys_readahead /* 225 */
1032 .long sys_lgetxattr /* 230 */
1035 .long sys_llistxattr
1036 .long sys_flistxattr
1037 .long sys_removexattr /* 235 */
1038 .long sys_lremovexattr
1039 .long sys_fremovexattr
1041 .long sys_sendfile64
1042 .long sys_futex /* 240 */
1043 .long sys_sched_setaffinity
1044 .long sys_sched_getaffinity
1045 .long sys_set_thread_area
1046 .long sys_get_thread_area
1047 .long sys_io_setup /* 245 */
1048 .long sys_io_destroy
1049 .long sys_io_getevents
1052 .long sys_fadvise64 /* 250 */
1053 .long sys_ni_syscall
1054 .long sys_exit_group
1055 .long sys_lookup_dcookie
1056 .long sys_epoll_create
1057 .long sys_epoll_ctl /* 255 */
1058 .long sys_epoll_wait
1059 .long sys_remap_file_pages
1060 .long sys_set_tid_address
1061 .long sys_timer_create
1062 .long sys_timer_settime /* 260 */
1063 .long sys_timer_gettime
1064 .long sys_timer_getoverrun
1065 .long sys_timer_delete
1066 .long sys_clock_settime
1067 .long sys_clock_gettime /* 265 */
1068 .long sys_clock_getres
1069 .long sys_clock_nanosleep
1072 .long sys_tgkill /* 270 */
1074 .long sys_fadvise64_64
1077 .long sys_get_mempolicy
1078 .long sys_set_mempolicy
1081 .long sys_mq_timedsend
1082 .long sys_mq_timedreceive /* 280 */
1084 .long sys_mq_getsetattr
1085 .long sys_ni_syscall /* reserved for kexec */
1086 .long sys_ioprio_set
1087 .long sys_ioprio_get /* 285 */
1089 syscall_table_size=(.-sys_call_table)