2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
40 * "current" is in register %ebx during any slow entries.
43 #include <linux/config.h>
44 #include <linux/linkage.h>
45 #include <asm/thread_info.h>
46 #include <asm/errno.h>
47 #include <asm/segment.h>
50 #include "irq_vectors.h"
51 #include <asm-xen/xen-public/xen.h>
53 #define nr_syscalls ((syscall_table_size)/4)
79 /* Offsets into shared_info_t. */
80 #define evtchn_upcall_pending /* 0 */
81 #define evtchn_upcall_mask 1
83 #define sizeof_vcpu_shift 3
86 #define XEN_GET_VCPU_INFO(reg)
87 #define preempt_disable(reg) incl TI_preempt_count(reg)
88 #define preempt_enable(reg) decl TI_preempt_count(reg)
89 #define XEN_LOCK_VCPU_INFO_SMP(reg) preempt_disable(%ebp) ; \
90 movl TI_cpu(%ebp),reg ; \
91 shl $sizeof_vcpu_shift,reg ; \
92 addl HYPERVISOR_shared_info,reg
93 #define XEN_UNLOCK_VCPU_INFO_SMP(reg) preempt_enable(%ebp)
94 #define XEN_UNLOCK_VCPU_INFO_SMP_fixup .byte 0xff,0xff,0xff
96 #define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
97 #define XEN_BLOCK_EVENTS(reg) XEN_LOCK_VCPU_INFO_SMP(reg) ; \
98 XEN_LOCKED_BLOCK_EVENTS(reg) ; \
99 XEN_UNLOCK_VCPU_INFO_SMP(reg)
100 #define XEN_UNBLOCK_EVENTS(reg) XEN_LOCK_VCPU_INFO_SMP(reg) ; \
101 movb $0,evtchn_upcall_mask(reg) ; \
102 XEN_UNLOCK_VCPU_INFO_SMP(reg)
103 #define XEN_SAVE_UPCALL_MASK(reg,tmp,off) GET_THREAD_INFO(%ebp) ; \
104 XEN_LOCK_VCPU_INFO_SMP(reg) ; \
105 movb evtchn_upcall_mask(reg), tmp ; \
106 movb tmp, off(%esp) ; \
107 XEN_UNLOCK_VCPU_INFO_SMP(reg)
109 #define XEN_GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
110 #define XEN_LOCK_VCPU_INFO_SMP(reg)
111 #define XEN_UNLOCK_VCPU_INFO_SMP(reg)
112 #define XEN_UNLOCK_VCPU_INFO_SMP_fixup
114 #define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
115 #define XEN_BLOCK_EVENTS(reg) XEN_LOCKED_BLOCK_EVENTS(reg)
116 #define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
117 #define XEN_SAVE_UPCALL_MASK(reg,tmp,off) \
118 movb evtchn_upcall_mask(reg), tmp; \
122 #define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
124 #ifdef CONFIG_PREEMPT
125 #define preempt_stop XEN_BLOCK_EVENTS(%esi)
128 #define resume_kernel restore_all
131 #define SAVE_ALL_NO_EVENTMASK \
142 movl $(__USER_DS), %edx; \
147 SAVE_ALL_NO_EVENTMASK; \
148 XEN_GET_VCPU_INFO(%esi); \
149 XEN_SAVE_UPCALL_MASK(%esi,%dl,EVENT_MASK)
151 #define RESTORE_INT_REGS \
160 #define RESTORE_REGS \
164 .section .fixup,"ax"; \
170 .section __ex_table,"a";\
177 #define RESTORE_ALL \
181 .section .fixup,"ax"; \
182 2: movl $(__USER_DS), %edx; \
188 .section __ex_table,"a";\
197 GET_THREAD_INFO(%ebp)
199 XEN_GET_VCPU_INFO(%esi)
203 * Return to user mode is not as complex as all this looks,
204 * but we want the default path for a system call return to
205 * go as quickly as possible which is why some of this is
206 * less clear than it otherwise should be.
209 # userspace resumption stub bypassing syscall exit tracing
214 GET_THREAD_INFO(%ebp)
215 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
217 testl $(VM_MASK | 2), %eax
218 jz resume_kernel # returning to kernel or vm86-space
219 ENTRY(resume_userspace)
220 XEN_GET_VCPU_INFO(%esi)
221 XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
222 # setting need_resched or sigpending
223 # between sampling and the iret
224 movl TI_flags(%ebp), %ecx
225 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
226 # int/exception return?
230 #ifdef CONFIG_PREEMPT
232 XEN_GET_VCPU_INFO(%esi)
233 XEN_BLOCK_EVENTS(%esi)
234 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
237 movl TI_flags(%ebp), %ecx # need_resched set ?
238 testb $_TIF_NEED_RESCHED, %cl
240 testb $0xFF,EVENT_MASK(%esp) # interrupts off (exception path) ?
242 call preempt_schedule_irq
246 /* SYSENTER_RETURN points to after the "sysenter" instruction in
247 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
249 # sysenter call handler stub
250 ENTRY(sysenter_entry)
251 movl TSS_sysenter_esp0(%esp),%esp
258 pushl $SYSENTER_RETURN_OFFSET
261 * Load the potential sixth argument from user stack.
262 * Careful about security.
264 cmpl $__PAGE_OFFSET-3,%ebp
267 .section __ex_table,"a"
269 .long 1b,syscall_fault
274 GET_THREAD_INFO(%ebp)
276 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
277 jnz syscall_trace_entry
278 cmpl $(nr_syscalls), %eax
280 call *sys_call_table(,%eax,4)
283 movl TI_flags(%ebp), %ecx
284 testw $_TIF_ALLWORK_MASK, %cx
285 jne syscall_exit_work
286 /* if something modifies registers it must also disable sysexit */
288 movl OLDESP(%esp), %ecx
294 # system call handler stub
296 pushl %eax # save orig_eax
298 GET_THREAD_INFO(%ebp)
299 # system call tracing in operation
300 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
301 jnz syscall_trace_entry
302 cmpl $(nr_syscalls), %eax
305 call *sys_call_table(,%eax,4)
306 movl %eax,EAX(%esp) # store the return value
308 XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
309 # setting need_resched or sigpending
310 # between sampling and the iret
311 movl TI_flags(%ebp), %ecx
312 testw $_TIF_ALLWORK_MASK, %cx # current->work
313 jne syscall_exit_work
315 testl $VM_MASK, EFLAGS(%esp)
317 movb EVENT_MASK(%esp), %al
318 notb %al # %al == ~saved_mask
319 XEN_LOCK_VCPU_INFO_SMP(%esi)
320 andb evtchn_upcall_mask(%esi),%al
321 andb $1,%al # %al == mask & ~saved_mask
322 jnz restore_all_enable_events # != 0 => reenable event delivery
323 XEN_UNLOCK_VCPU_INFO_SMP(%esi)
327 XEN_UNBLOCK_EVENTS(%esi)
330 movl $__HYPERVISOR_switch_vm86,%eax
334 # perform work that needs to be done immediately before resumption
337 testb $_TIF_NEED_RESCHED, %cl
341 XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
342 # setting need_resched or sigpending
343 # between sampling and the iret
344 movl TI_flags(%ebp), %ecx
345 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
346 # than syscall tracing?
348 testb $_TIF_NEED_RESCHED, %cl
351 work_notifysig: # deal with pending signals and
352 # notify-resume requests
353 testl $VM_MASK, EFLAGS(%esp)
355 jne work_notifysig_v86 # returning to kernel-space or
358 call do_notify_resume
363 pushl %ecx # save ti_flags for do_notify_resume
364 call save_v86_state # %eax contains pt_regs pointer
368 call do_notify_resume
371 # perform syscall exit tracing
374 movl $-ENOSYS,EAX(%esp)
377 call do_syscall_trace
378 movl ORIG_EAX(%esp), %eax
379 cmpl $(nr_syscalls), %eax
383 # perform syscall exit tracing
386 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
388 XEN_UNBLOCK_EVENTS(%esi) # could let do_syscall_trace() call
392 call do_syscall_trace
397 pushl %eax # save orig_eax
399 GET_THREAD_INFO(%ebp)
400 movl $-EFAULT,EAX(%esp)
405 movl $-ENOSYS,EAX(%esp)
410 * Build the entry stubs and pointer table with
411 * some assembler magic.
418 ENTRY(irq_entries_start)
436 #define BUILD_INTERRUPT(name, nr) \
444 /* The include is where all of the SMP etc. interrupts come from */
445 #include "entry_arch.h"
449 pushl $0 # no error code
450 pushl $do_divide_error
465 movl ES(%esp), %edi # get the function address
466 movl ORIG_EAX(%esp), %edx # get the error code
467 movl %eax, ORIG_EAX(%esp)
469 movl $(__USER_DS), %ecx
472 movl %esp,%eax # pt_regs pointer
473 XEN_GET_VCPU_INFO(%esi)
474 XEN_SAVE_UPCALL_MASK(%esi,%bl,EVENT_MASK)
476 jmp ret_from_exception
478 # A note on the "critical region" in our callback handler.
479 # We want to avoid stacking callback handlers due to events occurring
480 # during handling of the last event. To do this, we keep events disabled
481 # until we've done all processing. HOWEVER, we must enable events before
482 # popping the stack frame (can't be done atomically) and so it would still
483 # be possible to get enough handler activations to overflow the stack.
484 # Although unlikely, bugs of that kind are hard to track down, so we'd
485 # like to avoid the possibility.
486 # So, on entry to the handler we detect whether we interrupted an
487 # existing activation in its critical region -- if so, we pop the current
488 # activation and restart the handler using the previous one.
489 ENTRY(hypervisor_callback)
491 SAVE_ALL_NO_EVENTMASK
496 jb critical_region_fixup
497 11: XEN_GET_VCPU_INFO(%esi)
498 movb $0, EVENT_MASK(%esp)
500 call evtchn_do_upcall
505 restore_all_enable_events:
506 XEN_UNBLOCK_EVENTS(%esi)
507 scrit: /**** START OF CRITICAL REGION ****/
508 XEN_TEST_PENDING(%esi)
509 jnz 14f # process more events if necessary...
510 XEN_UNLOCK_VCPU_INFO_SMP(%esi)
512 14: XEN_LOCKED_BLOCK_EVENTS(%esi)
513 XEN_UNLOCK_VCPU_INFO_SMP(%esi)
515 ecrit: /**** END OF CRITICAL REGION ****/
516 # [How we do the fixup]. We want to merge the current stack frame with the
517 # just-interrupted frame. How we do this depends on where in the critical
518 # region the interrupted handler was executing, and so how many saved
519 # registers are in each frame. We do this quickly using the lookup table
520 # 'critical_fixup_table'. For each byte offset in the critical region, it
521 # provides the number of bytes which have already been popped from the
522 # interrupted stack frame.
523 critical_region_fixup:
524 addl $critical_fixup_table-scrit,%eax
525 movzbl (%eax),%eax # %eax contains num bytes popped
530 GET_THREAD_INFO(%ebp)
531 XEN_UNLOCK_VCPU_INFO_SMP(%esi)
535 add %eax,%esi # %esi points at end of src region
537 add $0x34,%edi # %edi points at end of dst region
539 shr $2,%ecx # convert words to bytes
540 je 17f # skip loop if nothing to copy
541 16: subl $4,%esi # pre-decrementing copy loop
546 17: movl %edi,%esp # final %edi is top of merged stack
549 critical_fixup_table:
550 .byte Ux00,Ux00,Ux00 # testb $0xff,(%esi) = XEN_TEST_PENDING
551 .byte Ux00,Ux00 # jnz 14f
552 XEN_UNLOCK_VCPU_INFO_SMP_fixup
553 .byte 0x00 # pop %ebx
554 .byte 0x04 # pop %ecx
555 .byte 0x08 # pop %edx
556 .byte 0x0c # pop %esi
557 .byte 0x10 # pop %edi
558 .byte 0x14 # pop %ebp
559 .byte 0x18 # pop %eax
562 .byte 0x24,0x24,0x24 # add $4,%esp
564 .byte Ux00,Ux00,Ux00,Ux00 # movb $1,1(%esi)
565 XEN_UNLOCK_VCPU_INFO_SMP_fixup
566 .byte 0x00,0x00 # jmp 11b
568 # Hypervisor uses this for application faults while it executes.
569 ENTRY(failsafe_callback)
576 jmp ret_from_exception
577 .section .fixup,"ax"; \
587 .section __ex_table,"a";\
595 ENTRY(coprocessor_error)
597 pushl $do_coprocessor_error
600 ENTRY(simd_coprocessor_error)
602 pushl $do_simd_coprocessor_error
605 ENTRY(device_not_available)
606 pushl $-1 # mark this as an int
609 call math_state_restore
610 jmp ret_from_exception
613 * Debug traps and NMI can happen at the one SYSENTER instruction
614 * that sets up the real kernel stack. Check here, since we can't
615 * allow the wrong stack to be used.
617 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
618 * already pushed 3 words if it hits on the sysenter instruction:
619 * eflags, cs and eip.
621 * We just load the right stack, and push the three (known) values
622 * by hand onto the new stack - while updating the return eip past
623 * the instruction that would have done it for sysenter.
625 #define FIX_STACK(offset, ok, label) \
626 cmpw $__KERNEL_CS,4(%esp); \
629 movl TSS_sysenter_esp0+offset(%esp),%esp; \
631 pushl $__KERNEL_CS; \
632 pushl $sysenter_past_esp
635 cmpl $sysenter_entry,(%esp)
636 jne debug_stack_correct
637 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
639 pushl $-1 # mark this as an int
641 xorl %edx,%edx # error code 0
642 movl %esp,%eax # pt_regs pointer
646 jmp ret_from_exception
650 * NMI is doubly nasty. It can happen _while_ we're handling
651 * a debug fault, and the debug fault hasn't yet been able to
652 * clear up the stack. So we first check whether we got an
653 * NMI on the sysenter entry path, but after that we need to
654 * check whether we got an NMI on the debug path where the debug
655 * fault happened on the sysenter path.
658 cmpl $sysenter_entry,(%esp)
662 /* Do not access memory above the end of our stack page,
663 * it might not exist.
665 andl $(THREAD_SIZE-1),%eax
666 cmpl $(THREAD_SIZE-20),%eax
668 jae nmi_stack_correct
669 cmpl $sysenter_entry,12(%esp)
670 je nmi_debug_stack_check
674 xorl %edx,%edx # zero error code
675 movl %esp,%eax # pt_regs pointer
680 FIX_STACK(12,nmi_stack_correct, 1)
681 jmp nmi_stack_correct
682 nmi_debug_stack_check:
683 cmpw $__KERNEL_CS,16(%esp)
684 jne nmi_stack_correct
685 cmpl $debug - 1,(%esp)
686 jle nmi_stack_correct
687 cmpl $debug_esp_fix_insn,(%esp)
688 jle nmi_debug_stack_fixup
689 nmi_debug_stack_fixup:
690 FIX_STACK(24,nmi_stack_correct, 1)
691 jmp nmi_stack_correct
695 pushl $-1 # mark this as an int
697 xorl %edx,%edx # zero error code
698 movl %esp,%eax # pt_regs pointer
702 jmp ret_from_exception
719 ENTRY(coprocessor_segment_overrun)
721 pushl $do_coprocessor_segment_overrun
725 pushl $do_invalid_TSS
728 ENTRY(segment_not_present)
729 pushl $do_segment_not_present
733 pushl $do_stack_segment
736 ENTRY(general_protection)
737 pushl $do_general_protection
740 ENTRY(alignment_check)
741 pushl $do_alignment_check
744 # This handler is special, because it gets an extra value on its stack,
745 # which is the linear faulting address.
746 # fastcall register usage: %eax = pt_regs, %edx = error code,
747 # %ecx = fault address
756 decl %eax /* eax = -1 */
761 movl ES(%esp), %ecx /* get the faulting address */
762 movl ORIG_EAX(%esp), %edx /* get the error code */
763 movl %eax, ORIG_EAX(%esp)
765 movl $(__KERNEL_DS),%eax
768 movl %esp,%eax /* pt_regs pointer */
769 XEN_GET_VCPU_INFO(%esi)
770 XEN_SAVE_UPCALL_MASK(%esi,%bl,EVENT_MASK)
772 jmp ret_from_exception
774 #ifdef CONFIG_X86_MCE
777 pushl machine_check_vector
781 ENTRY(fixup_4gb_segment)
782 pushl $do_fixup_4gb_segment
786 ENTRY(sys_call_table)
787 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
792 .long sys_open /* 5 */
797 .long sys_unlink /* 10 */
802 .long sys_chmod /* 15 */
804 .long sys_ni_syscall /* old break syscall holder */
807 .long sys_getpid /* 20 */
812 .long sys_stime /* 25 */
817 .long sys_utime /* 30 */
818 .long sys_ni_syscall /* old stty syscall holder */
819 .long sys_ni_syscall /* old gtty syscall holder */
822 .long sys_ni_syscall /* 35 - old ftime syscall holder */
827 .long sys_rmdir /* 40 */
831 .long sys_ni_syscall /* old prof syscall holder */
832 .long sys_brk /* 45 */
837 .long sys_getegid16 /* 50 */
839 .long sys_umount /* recycled never used phys() */
840 .long sys_ni_syscall /* old lock syscall holder */
842 .long sys_fcntl /* 55 */
843 .long sys_ni_syscall /* old mpx syscall holder */
845 .long sys_ni_syscall /* old ulimit syscall holder */
847 .long sys_umask /* 60 */
852 .long sys_getpgrp /* 65 */
857 .long sys_setreuid16 /* 70 */
861 .long sys_sethostname
862 .long sys_setrlimit /* 75 */
863 .long sys_old_getrlimit
865 .long sys_gettimeofday
866 .long sys_settimeofday
867 .long sys_getgroups16 /* 80 */
868 .long sys_setgroups16
872 .long sys_readlink /* 85 */
877 .long old_mmap /* 90 */
882 .long sys_fchown16 /* 95 */
883 .long sys_getpriority
884 .long sys_setpriority
885 .long sys_ni_syscall /* old profil syscall holder */
887 .long sys_fstatfs /* 100 */
892 .long sys_getitimer /* 105 */
897 .long sys_iopl /* 110 */
899 .long sys_ni_syscall /* old "idle" system call */
902 .long sys_swapoff /* 115 */
907 .long sys_clone /* 120 */
908 .long sys_setdomainname
912 .long sys_mprotect /* 125 */
913 .long sys_sigprocmask
914 .long sys_ni_syscall /* old "create_module" */
915 .long sys_init_module
916 .long sys_delete_module
917 .long sys_ni_syscall /* 130: old "get_kernel_syms" */
922 .long sys_sysfs /* 135 */
923 .long sys_personality
924 .long sys_ni_syscall /* reserved for afs_syscall */
927 .long sys_llseek /* 140 */
932 .long sys_readv /* 145 */
937 .long sys_mlock /* 150 */
941 .long sys_sched_setparam
942 .long sys_sched_getparam /* 155 */
943 .long sys_sched_setscheduler
944 .long sys_sched_getscheduler
945 .long sys_sched_yield
946 .long sys_sched_get_priority_max
947 .long sys_sched_get_priority_min /* 160 */
948 .long sys_sched_rr_get_interval
951 .long sys_setresuid16
952 .long sys_getresuid16 /* 165 */
954 .long sys_ni_syscall /* Old sys_query_module */
957 .long sys_setresgid16 /* 170 */
958 .long sys_getresgid16
960 .long sys_rt_sigreturn
961 .long sys_rt_sigaction
962 .long sys_rt_sigprocmask /* 175 */
963 .long sys_rt_sigpending
964 .long sys_rt_sigtimedwait
965 .long sys_rt_sigqueueinfo
966 .long sys_rt_sigsuspend
967 .long sys_pread64 /* 180 */
972 .long sys_capset /* 185 */
973 .long sys_sigaltstack
975 .long sys_ni_syscall /* reserved for streams1 */
976 .long sys_ni_syscall /* reserved for streams2 */
977 .long sys_vfork /* 190 */
981 .long sys_ftruncate64
982 .long sys_stat64 /* 195 */
987 .long sys_getgid /* 200 */
992 .long sys_getgroups /* 205 */
997 .long sys_setresgid /* 210 */
1002 .long sys_setfsuid /* 215 */
1004 .long sys_pivot_root
1007 .long sys_getdents64 /* 220 */
1009 .long sys_ni_syscall /* reserved for TUX */
1010 .long sys_ni_syscall
1012 .long sys_readahead /* 225 */
1017 .long sys_lgetxattr /* 230 */
1020 .long sys_llistxattr
1021 .long sys_flistxattr
1022 .long sys_removexattr /* 235 */
1023 .long sys_lremovexattr
1024 .long sys_fremovexattr
1026 .long sys_sendfile64
1027 .long sys_futex /* 240 */
1028 .long sys_sched_setaffinity
1029 .long sys_sched_getaffinity
1030 .long sys_set_thread_area
1031 .long sys_get_thread_area
1032 .long sys_io_setup /* 245 */
1033 .long sys_io_destroy
1034 .long sys_io_getevents
1037 .long sys_fadvise64 /* 250 */
1038 .long sys_ni_syscall
1039 .long sys_exit_group
1040 .long sys_lookup_dcookie
1041 .long sys_epoll_create
1042 .long sys_epoll_ctl /* 255 */
1043 .long sys_epoll_wait
1044 .long sys_remap_file_pages
1045 .long sys_set_tid_address
1046 .long sys_timer_create
1047 .long sys_timer_settime /* 260 */
1048 .long sys_timer_gettime
1049 .long sys_timer_getoverrun
1050 .long sys_timer_delete
1051 .long sys_clock_settime
1052 .long sys_clock_gettime /* 265 */
1053 .long sys_clock_getres
1054 .long sys_clock_nanosleep
1057 .long sys_tgkill /* 270 */
1059 .long sys_fadvise64_64
1062 .long sys_get_mempolicy
1063 .long sys_set_mempolicy
1066 .long sys_mq_timedsend
1067 .long sys_mq_timedreceive /* 280 */
1069 .long sys_mq_getsetattr
1070 .long sys_ni_syscall /* reserved for kexec */
1072 .long sys_ni_syscall /* 285 */ /* available */
1074 .long sys_request_key
1077 syscall_table_size=(.-sys_call_table)