2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
12 * entry.S contains the system-call and fault low-level handling routines.
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
27 * - schedule it carefully for the final hardware.
31 #include <linux/config.h>
32 #include <linux/linkage.h>
33 #include <asm/segment.h>
35 #include <asm/cache.h>
36 #include <asm/errno.h>
37 #include <asm/dwarf2.h>
38 #include <asm/calling.h>
39 #include <asm/offset.h>
41 #include <asm/unistd.h>
42 #include <asm/thread_info.h>
43 #include <asm/hw_irq.h>
44 #include <asm/errno.h>
49 #define preempt_stop cli
52 #define retint_kernel retint_restore_args
56 * C code is not supposed to know about undefined top of stack. Every time
57 * a C function with an pt_regs argument is called from the SYSCALL based
58 * fast path FIXUP_TOP_OF_STACK is needed.
59 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
63 /* %rsp:at FRAMEEND */
64 .macro FIXUP_TOP_OF_STACK tmp
65 movq %gs:pda_oldrsp,\tmp
67 movq $__USER_DS,SS(%rsp)
68 movq $__USER_CS,CS(%rsp)
70 movq R11(%rsp),\tmp /* get eflags */
71 movq \tmp,EFLAGS(%rsp)
74 .macro RESTORE_TOP_OF_STACK tmp,offset=0
75 movq RSP-\offset(%rsp),\tmp
76 movq \tmp,%gs:pda_oldrsp
77 movq EFLAGS-\offset(%rsp),\tmp
78 movq \tmp,R11-\offset(%rsp)
81 .macro FAKE_STACK_FRAME child_rip
82 /* push in order ss, rsp, eflags, cs, rip */
85 CFI_ADJUST_CFA_OFFSET 8
87 CFI_ADJUST_CFA_OFFSET 8
89 pushq $(1<<9) /* eflags - interrupts on */
90 CFI_ADJUST_CFA_OFFSET 8
91 pushq $__KERNEL_CS /* cs */
92 CFI_ADJUST_CFA_OFFSET 8
93 pushq \child_rip /* rip */
94 CFI_ADJUST_CFA_OFFSET 8
96 pushq %rax /* orig rax */
97 CFI_ADJUST_CFA_OFFSET 8
100 .macro UNFAKE_STACK_FRAME
102 CFI_ADJUST_CFA_OFFSET -(6*8)
105 .macro CFI_DEFAULT_STACK
106 CFI_ADJUST_CFA_OFFSET (SS)
107 CFI_OFFSET r15,R15-SS
108 CFI_OFFSET r14,R14-SS
109 CFI_OFFSET r13,R13-SS
110 CFI_OFFSET r12,R12-SS
111 CFI_OFFSET rbp,RBP-SS
112 CFI_OFFSET rbx,RBX-SS
113 CFI_OFFSET r11,R11-SS
114 CFI_OFFSET r10,R10-SS
117 CFI_OFFSET rax,RAX-SS
118 CFI_OFFSET rcx,RCX-SS
119 CFI_OFFSET rdx,RDX-SS
120 CFI_OFFSET rsi,RSI-SS
121 CFI_OFFSET rdi,RDI-SS
122 CFI_OFFSET rsp,RSP-SS
123 CFI_OFFSET rip,RIP-SS
126 * A newly forked process directly context switches into this.
133 GET_THREAD_INFO(%rcx)
134 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
138 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
139 je int_ret_from_sys_call
140 testl $_TIF_IA32,threadinfo_flags(%rcx)
141 jnz int_ret_from_sys_call
142 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
143 jmp ret_from_sys_call
146 call syscall_trace_leave
147 GET_THREAD_INFO(%rcx)
152 * System call entry. Upto 6 arguments in registers are supported.
154 * SYSCALL does not save anything on the stack and does not change the
160 * rax system call number
162 * rcx return address for syscall/sysret, C arg3
165 * r10 arg3 (--> moved to rcx for C)
168 * r11 eflags for syscall/sysret, temporary for C
169 * r12-r15,rbp,rbx saved by C code, not touched.
171 * Interrupts are off on entry.
172 * Only called from user space.
174 * XXX if we had a free scratch register we could save the RSP into the stack frame
175 * and report it properly in ps. Unfortunately we haven't.
181 movq %rsp,%gs:pda_oldrsp
182 movq %gs:pda_kernelstack,%rsp
185 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
186 movq %rcx,RIP-ARGOFFSET(%rsp)
187 GET_THREAD_INFO(%rcx)
188 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
190 cmpq $__NR_syscall_max,%rax
193 call *sys_call_table(,%rax,8) # XXX: rip relative
194 movq %rax,RAX-ARGOFFSET(%rsp)
196 * Syscall return path ending with SYSRET (fast path)
197 * Has incomplete stack frame and undefined top of stack.
199 .globl ret_from_sys_call
201 movl $_TIF_WORK_MASK,%edi
204 GET_THREAD_INFO(%rcx)
206 movl threadinfo_flags(%rcx),%edx
209 movq RIP-ARGOFFSET(%rsp),%rcx
210 RESTORE_ARGS 0,-ARG_SKIP,1
211 movq %gs:pda_oldrsp,%rsp
215 /* Handle reschedules */
216 /* edx: work, edi: workmask */
218 bt $TIF_NEED_RESCHED,%edx
226 /* Handle a signal */
229 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
232 /* Really a signal */
233 /* edx: work flags (arg3) */
234 leaq do_notify_resume(%rip),%rax
235 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
236 xorl %esi,%esi # oldset -> arg2
237 call ptregscall_common
238 1: movl $_TIF_NEED_RESCHED,%edi
241 /* Do syscall tracing */
244 movq $-ENOSYS,RAX(%rsp)
245 FIXUP_TOP_OF_STACK %rdi
247 call syscall_trace_enter
248 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
250 cmpq $__NR_syscall_max,%rax
252 movq %r10,%rcx /* fixup for C */
253 call *sys_call_table(,%rax,8)
254 movq %rax,RAX-ARGOFFSET(%rsp)
257 call syscall_trace_leave
258 RESTORE_TOP_OF_STACK %rbx
260 jmp ret_from_sys_call
263 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
264 jmp ret_from_sys_call
267 * Syscall return path ending with IRET.
268 * Has correct top of stack, but partial stack frame.
270 ENTRY(int_ret_from_sys_call)
272 testl $3,CS-ARGOFFSET(%rsp)
273 je retint_restore_args
274 movl $_TIF_ALLWORK_MASK,%edi
275 /* edi: mask to check */
277 GET_THREAD_INFO(%rcx)
278 movl threadinfo_flags(%rcx),%edx
283 /* Either reschedule or signal or syscall exit tracking needed. */
284 /* First do a reschedule test. */
285 /* edx: work, edi: workmask */
287 bt $TIF_NEED_RESCHED,%edx
295 /* handle signals and tracing -- both require a full stack frame */
299 /* Check for syscall exit trace */
300 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
303 leaq 8(%rsp),%rdi # &ptregs -> arg1
304 call syscall_trace_leave
306 btr $TIF_SYSCALL_TRACE,%edi
307 btr $TIF_SYSCALL_AUDIT,%edi
308 btr $TIF_SINGLESTEP,%edi
312 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
314 movq %rsp,%rdi # &ptregs -> arg1
315 xorl %esi,%esi # oldset -> arg2
316 call do_notify_resume
317 1: movl $_TIF_NEED_RESCHED,%edi
324 * Certain special system calls that need to save a complete full stack frame.
327 .macro PTREGSCALL label,func,arg
330 leaq \func(%rip),%rax
331 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
332 jmp ptregscall_common
335 PTREGSCALL stub_clone, sys_clone, %r8
336 PTREGSCALL stub_fork, sys_fork, %rdi
337 PTREGSCALL stub_vfork, sys_vfork, %rdi
338 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
339 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
340 PTREGSCALL stub_iopl, sys_iopl, %rsi
342 ENTRY(ptregscall_common)
345 CFI_ADJUST_CFA_OFFSET -8
348 FIXUP_TOP_OF_STACK %r11
350 RESTORE_TOP_OF_STACK %r11
354 CFI_ADJUST_CFA_OFFSET 8
361 CFI_ADJUST_CFA_OFFSET -8
364 FIXUP_TOP_OF_STACK %r11
366 GET_THREAD_INFO(%rcx)
367 bt $TIF_IA32,threadinfo_flags(%rcx)
369 RESTORE_TOP_OF_STACK %r11
376 CFI_ADJUST_CFA_OFFSET REST_SKIP
379 jmp int_ret_from_sys_call
383 * sigreturn is special because it needs to restore all registers on return.
384 * This cannot be done with SYSRET, so use the IRET return path instead.
386 ENTRY(stub_rt_sigreturn)
391 FIXUP_TOP_OF_STACK %r11
392 call sys_rt_sigreturn
393 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
395 jmp int_ret_from_sys_call
399 * Interrupt entry/exit.
401 * Interrupt entry points save only callee clobbered registers in fast path.
403 * Entry runs with interrupts off.
406 /* 0(%rsp): interrupt number */
407 .macro interrupt func
409 CFI_DEF_CFA rsp,(SS-RDI)
410 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
411 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
413 #ifdef CONFIG_DEBUG_INFO
417 * Setup a stack frame pointer. This allows gdb to trace
418 * back to the original stack.
421 CFI_DEF_CFA_REGISTER rbp
424 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
429 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
430 movq %gs:pda_irqstackptr,%rax
432 pushq %rdi # save old stack
436 ENTRY(common_interrupt)
438 /* 0(%rsp): oldrsp-ARGOFFSET */
442 subl $1,%gs:pda_irqcount
443 #ifdef CONFIG_DEBUG_INFO
446 leaq ARGOFFSET(%rdi),%rsp
448 GET_THREAD_INFO(%rcx)
449 testl $3,CS-ARGOFFSET(%rsp)
452 /* Interrupt came from user space */
454 * Has a correct top of stack, but a partial stack frame
455 * %rcx: thread info. Interrupts off.
457 retint_with_reschedule:
458 movl $_TIF_WORK_MASK,%edi
460 movl threadinfo_flags(%rcx),%edx
472 .section __ex_table,"a"
473 .quad iret_label,bad_iret
476 /* force a signal here? this matches i386 behaviour */
477 /* running with kernel gs */
479 movq $-9999,%rdi /* better code? */
483 /* edi: workmask, edx: work */
485 bt $TIF_NEED_RESCHED,%edx
491 GET_THREAD_INFO(%rcx)
496 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
500 movq $-1,ORIG_RAX(%rsp)
501 xorq %rsi,%rsi # oldset
502 movq %rsp,%rdi # &pt_regs
503 call do_notify_resume
506 movl $_TIF_NEED_RESCHED,%edi
507 GET_THREAD_INFO(%rcx)
510 #ifdef CONFIG_PREEMPT
511 /* Returning to kernel space. Check if we need preemption */
512 /* rcx: threadinfo. interrupts off. */
515 cmpl $0,threadinfo_preempt_count(%rcx)
516 jnz retint_restore_args
517 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
518 jnc retint_restore_args
519 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
520 jc retint_restore_args
521 movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
525 GET_THREAD_INFO(%rcx)
526 movl $0,threadinfo_preempt_count(%rcx)
534 .macro apicinterrupt num,func
542 ENTRY(reschedule_interrupt)
543 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
545 ENTRY(invalidate_interrupt)
546 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
548 ENTRY(call_function_interrupt)
549 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
552 #ifdef CONFIG_X86_LOCAL_APIC
553 ENTRY(apic_timer_interrupt)
554 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
556 ENTRY(error_interrupt)
557 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
559 ENTRY(spurious_interrupt)
560 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
564 * Exception entry points.
567 pushq $0 /* push error code/oldrax */
568 pushq %rax /* push real oldrax to the rdi slot */
573 .macro errorentry sym
579 /* error code is on the stack already */
580 /* handle NMI like exceptions that can happen everywhere */
581 .macro paranoidentry sym
585 movl $MSR_GS_BASE,%ecx
592 movq ORIG_RAX(%rsp),%rsi
593 movq $-1,ORIG_RAX(%rsp)
598 * Exception entry point. This expects an error code/orig_rax on the stack
599 * and the exception handler in %rax.
603 CFI_DEF_CFA rsp,(SS-RDI)
604 CFI_REL_OFFSET rsp,(RSP-RDI)
605 CFI_REL_OFFSET rip,(RIP-RDI)
606 /* rdi slot contains rax, oldrax contains error code */
609 CFI_ADJUST_CFA_OFFSET (14*8)
611 CFI_REL_OFFSET rsi,RSI
612 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
614 CFI_REL_OFFSET rdx,RDX
616 CFI_REL_OFFSET rcx,RCX
617 movq %rsi,10*8(%rsp) /* store rax */
618 CFI_REL_OFFSET rax,RAX
624 CFI_REL_OFFSET r10,R10
626 CFI_REL_OFFSET r11,R11
628 CFI_REL_OFFSET rbx,RBX
630 CFI_REL_OFFSET rbp,RBP
632 CFI_REL_OFFSET r12,R12
634 CFI_REL_OFFSET r13,R13
636 CFI_REL_OFFSET r14,R14
638 CFI_REL_OFFSET r15,R15
647 movq ORIG_RAX(%rsp),%rsi /* get error code */
648 movq $-1,ORIG_RAX(%rsp)
650 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
655 GET_THREAD_INFO(%rcx)
658 movl threadinfo_flags(%rcx),%edx
659 movl $_TIF_WORK_MASK,%edi
669 /* There are two places in the kernel that can potentially fault with
670 usergs. Handle them here. The exception handlers after
671 iret run with kernel gs again, so don't set the user space flag.
672 B stepping K8s sometimes report an truncated RIP for IRET
673 exceptions returning to compat mode. Check for these here too. */
674 leaq iret_label(%rip),%rbp
677 movl %ebp,%ebp /* zero extend */
680 cmpq $gs_change,RIP(%rsp)
684 /* Reload gs selector with exception handling */
685 /* edi: new selector */
692 2: mfence /* workaround */
697 .section __ex_table,"a"
699 .quad gs_change,bad_gs
702 /* running with kernelgs */
704 swapgs /* switch back to user gs */
711 * Create a kernel thread.
713 * C extern interface:
714 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
716 * asm input arguments:
717 * rdi: fn, rsi: arg, rdx: flags
721 FAKE_STACK_FRAME $child_rip
724 # rdi: flags, rsi: usp, rdx: will be &pt_regs
726 orq kernel_thread_flags(%rip),%rdi
739 * It isn't worth to check for reschedule here,
740 * so internally to the x86_64 port you can rely on kernel_thread()
741 * not to reschedule the child before returning, this avoids the need
742 * of hacks for example to fork off the per-CPU idle tasks.
743 * [Hopefully no generic code relies on the reschedule -AK]
753 * Here we are in the child and the registers are set as they were
754 * at kernel_thread() invocation in the parent.
764 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
766 * C extern interface:
767 * extern long execve(char *name, char **argv, char **envp)
769 * asm input arguments:
770 * rdi: name, rsi: argv, rdx: envp
772 * We want to fallback into:
773 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
775 * do_sys_execve asm fallback arguments:
776 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
786 je int_ret_from_sys_call
793 errorentry do_page_fault
795 ENTRY(coprocessor_error)
796 zeroentry do_coprocessor_error
798 ENTRY(simd_coprocessor_error)
799 zeroentry do_simd_coprocessor_error
801 ENTRY(device_not_available)
802 zeroentry math_state_restore
804 /* runs on exception stack */
808 CFI_ADJUST_CFA_OFFSET 8
809 paranoidentry do_debug
810 /* switch back to process stack to restore the state ptrace touched */
813 jnz paranoid_userspace
817 /* runs on exception stack */
821 CFI_ADJUST_CFA_OFFSET 8
823 /* ebx: no swapgs flag */
825 testl %ebx,%ebx /* swapgs needed? */
835 GET_THREAD_INFO(%rcx)
836 movl threadinfo_flags(%rcx),%edx
837 testl $_TIF_NEED_RESCHED,%edx
839 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
848 xorl %esi,%esi /* oldset */
849 movq %rsp,%rdi /* &pt_regs */
850 call do_notify_resume
858 zeroentry do_overflow
864 zeroentry do_invalid_op
866 ENTRY(coprocessor_segment_overrun)
867 zeroentry do_coprocessor_segment_overrun
870 zeroentry do_reserved
872 /* runs on exception stack */
875 paranoidentry do_double_fault
878 jnz paranoid_userspace
883 errorentry do_invalid_TSS
885 ENTRY(segment_not_present)
886 errorentry do_segment_not_present
888 /* runs on exception stack */
891 paranoidentry do_stack_segment
894 jnz paranoid_userspace
898 ENTRY(general_protection)
899 errorentry do_general_protection
901 ENTRY(alignment_check)
902 errorentry do_alignment_check
905 zeroentry do_divide_error
907 ENTRY(spurious_interrupt_bug)
908 zeroentry do_spurious_interrupt_bug
910 #ifdef CONFIG_X86_MCE
911 /* runs on exception stack */
915 CFI_ADJUST_CFA_OFFSET 8
916 paranoidentry do_machine_check
922 zeroentry do_call_debug