2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
16 * entry.S contains the system-call and fault low-level handling routines.
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
31 * - schedule it carefully for the final hardware.
35 #include <linux/config.h>
36 #ifdef CONFIG_DEBUG_INFO
37 #undef CONFIG_DEBUG_INFO
39 #include <linux/linkage.h>
40 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
52 #include <asm/errno.h>
53 #include <xen/interface/arch-x86_64.h>
54 #include <xen/interface/features.h>
56 #include "irq_vectors.h"
58 #include "xen_entry.S"
62 #ifndef CONFIG_PREEMPT
63 #define retint_kernel retint_restore_args
69 * C code is not supposed to know about undefined top of stack. Every time
70 * a C function with an pt_regs argument is called from the SYSCALL based
71 * fast path FIXUP_TOP_OF_STACK is needed.
72 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
76 /* %rsp:at FRAMEEND */
77 .macro FIXUP_TOP_OF_STACK tmp
78 movq $__USER_CS,CS(%rsp)
82 .macro RESTORE_TOP_OF_STACK tmp,offset=0
85 .macro FAKE_STACK_FRAME child_rip
86 /* push in order ss, rsp, eflags, cs, rip */
89 CFI_ADJUST_CFA_OFFSET 8
90 /*CFI_REL_OFFSET ss,0*/
92 CFI_ADJUST_CFA_OFFSET 8
94 pushq $(1<<9) /* eflags - interrupts on */
95 CFI_ADJUST_CFA_OFFSET 8
96 /*CFI_REL_OFFSET rflags,0*/
97 pushq $__KERNEL_CS /* cs */
98 CFI_ADJUST_CFA_OFFSET 8
99 /*CFI_REL_OFFSET cs,0*/
100 pushq \child_rip /* rip */
101 CFI_ADJUST_CFA_OFFSET 8
103 pushq %rax /* orig rax */
104 CFI_ADJUST_CFA_OFFSET 8
107 .macro UNFAKE_STACK_FRAME
109 CFI_ADJUST_CFA_OFFSET -(6*8)
112 .macro CFI_DEFAULT_STACK start=1
117 CFI_DEF_CFA_OFFSET SS+8
119 CFI_REL_OFFSET r15,R15
120 CFI_REL_OFFSET r14,R14
121 CFI_REL_OFFSET r13,R13
122 CFI_REL_OFFSET r12,R12
123 CFI_REL_OFFSET rbp,RBP
124 CFI_REL_OFFSET rbx,RBX
125 CFI_REL_OFFSET r11,R11
126 CFI_REL_OFFSET r10,R10
129 CFI_REL_OFFSET rax,RAX
130 CFI_REL_OFFSET rcx,RCX
131 CFI_REL_OFFSET rdx,RDX
132 CFI_REL_OFFSET rsi,RSI
133 CFI_REL_OFFSET rdi,RDI
134 CFI_REL_OFFSET rip,RIP
135 /*CFI_REL_OFFSET cs,CS*/
136 /*CFI_REL_OFFSET rflags,EFLAGS*/
137 CFI_REL_OFFSET rsp,RSP
138 /*CFI_REL_OFFSET ss,SS*/
142 * Must be consistent with the definition in arch-x86_64.h:
143 * struct iret_context {
144 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
146 * #define VGCF_IN_SYSCALL (1<<8)
148 .macro HYPERVISOR_IRET flag
151 testl $NMI_MASK,2*8(%rsp)
154 testb $1,(xen_features+XENFEAT_supervisor_mode_kernel)
157 /* Direct iret to kernel space. Correct CS and SS. */
162 2: /* Slow iret via hypervisor. */
163 andl $~NMI_MASK, 16(%rsp)
165 jmp hypercall_page + (__HYPERVISOR_iret * 32)
168 .macro SWITCH_TO_KERNEL ssoff,adjust=0
170 orb $1,\ssoff-\adjust+4(%rsp)
175 * A newly forked process directly context switches into this.
181 GET_THREAD_INFO(%rcx)
182 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
186 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
187 je int_ret_from_sys_call
188 testl $_TIF_IA32,threadinfo_flags(%rcx)
189 jnz int_ret_from_sys_call
190 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
191 jmp ret_from_sys_call
194 call syscall_trace_leave
195 GET_THREAD_INFO(%rcx)
200 * System call entry. Upto 6 arguments in registers are supported.
202 * SYSCALL does not save anything on the stack and does not change the
208 * rax system call number
210 * rcx return address for syscall/sysret, C arg3
213 * r10 arg3 (--> moved to rcx for C)
216 * r11 eflags for syscall/sysret, temporary for C
217 * r12-r15,rbp,rbx saved by C code, not touched.
219 * Interrupts are off on entry.
220 * Only called from user space.
222 * XXX if we had a free scratch register we could save the RSP into the stack frame
223 * and report it properly in ps. Unfortunately we haven't.
225 * When user can change the frames always force IRET. That is because
226 * it deals with uncanonical addresses better. SYSRET has trouble
227 * with them due to bugs in both AMD and Intel CPUs.
234 /*CFI_REGISTER rflags,r11*/
236 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
237 XEN_UNBLOCK_EVENTS(%r11)
238 GET_THREAD_INFO(%rcx)
239 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
242 cmpq $__NR_syscall_max,%rax
245 call *sys_call_table(,%rax,8) # XXX: rip relative
246 movq %rax,RAX-ARGOFFSET(%rsp)
248 * Syscall return path ending with SYSRET (fast path)
249 * Has incomplete stack frame and undefined top of stack.
251 .globl ret_from_sys_call
253 movl $_TIF_ALLWORK_MASK,%edi
256 GET_THREAD_INFO(%rcx)
257 XEN_BLOCK_EVENTS(%rsi)
258 movl threadinfo_flags(%rcx),%edx
262 XEN_UNBLOCK_EVENTS(%rsi)
265 /*CFI_REGISTER rflags,r11*/
266 HYPERVISOR_IRET VGCF_IN_SYSCALL
268 /* Handle reschedules */
269 /* edx: work, edi: workmask */
272 bt $TIF_NEED_RESCHED,%edx
274 XEN_BLOCK_EVENTS(%rsi)
276 CFI_ADJUST_CFA_OFFSET 8
279 CFI_ADJUST_CFA_OFFSET -8
282 /* Handle a signal */
285 XEN_UNBLOCK_EVENTS(%rsi)
286 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
289 /* Really a signal */
290 /* edx: work flags (arg3) */
291 leaq do_notify_resume(%rip),%rax
292 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
293 xorl %esi,%esi # oldset -> arg2
294 call ptregscall_common
295 1: movl $_TIF_NEED_RESCHED,%edi
296 /* Use IRET because user could have changed frame. This
297 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
302 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
303 jmp ret_from_sys_call
305 /* Do syscall tracing */
309 movq $-ENOSYS,RAX(%rsp)
310 FIXUP_TOP_OF_STACK %rdi
312 call syscall_trace_enter
313 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
315 cmpq $__NR_syscall_max,%rax
317 movq %r10,%rcx /* fixup for C */
318 call *sys_call_table(,%rax,8)
319 movq %rax,RAX-ARGOFFSET(%rsp)
322 call syscall_trace_leave
323 RESTORE_TOP_OF_STACK %rbx
325 /* Use IRET because user could have changed frame */
326 jmp int_ret_from_sys_call
330 * Syscall return path ending with IRET.
331 * Has correct top of stack, but partial stack frame.
333 ENTRY(int_ret_from_sys_call)
335 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
336 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
337 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
338 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
339 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
340 CFI_REL_OFFSET rip,RIP-ARGOFFSET
341 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
342 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
343 CFI_REL_OFFSET rax,RAX-ARGOFFSET
344 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
345 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
346 CFI_REL_OFFSET r8,R8-ARGOFFSET
347 CFI_REL_OFFSET r9,R9-ARGOFFSET
348 CFI_REL_OFFSET r10,R10-ARGOFFSET
349 CFI_REL_OFFSET r11,R11-ARGOFFSET
350 XEN_BLOCK_EVENTS(%rsi)
351 testb $3,CS-ARGOFFSET(%rsp)
353 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
354 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
355 jmp retint_restore_args # retrun from ring3 kernel
357 movl $_TIF_ALLWORK_MASK,%edi
358 /* edi: mask to check */
360 GET_THREAD_INFO(%rcx)
361 movl threadinfo_flags(%rcx),%edx
364 andl $~TS_COMPAT,threadinfo_status(%rcx)
365 jmp retint_restore_args
367 /* Either reschedule or signal or syscall exit tracking needed. */
368 /* First do a reschedule test. */
369 /* edx: work, edi: workmask */
371 bt $TIF_NEED_RESCHED,%edx
374 XEN_UNBLOCK_EVENTS(%rsi)
376 CFI_ADJUST_CFA_OFFSET 8
379 CFI_ADJUST_CFA_OFFSET -8
383 /* handle signals and tracing -- both require a full stack frame */
386 XEN_UNBLOCK_EVENTS(%rsi)
388 /* Check for syscall exit trace */
389 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
392 CFI_ADJUST_CFA_OFFSET 8
393 leaq 8(%rsp),%rdi # &ptregs -> arg1
394 call syscall_trace_leave
396 CFI_ADJUST_CFA_OFFSET -8
397 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
402 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
404 movq %rsp,%rdi # &ptregs -> arg1
405 xorl %esi,%esi # oldset -> arg2
406 call do_notify_resume
407 1: movl $_TIF_NEED_RESCHED,%edi
415 * Certain special system calls that need to save a complete full stack frame.
418 .macro PTREGSCALL label,func,arg
421 leaq \func(%rip),%rax
422 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
423 jmp ptregscall_common
428 PTREGSCALL stub_clone, sys_clone, %r8
429 PTREGSCALL stub_fork, sys_fork, %rdi
430 PTREGSCALL stub_vfork, sys_vfork, %rdi
431 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
432 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
433 PTREGSCALL stub_iopl, sys_iopl, %rsi
435 ENTRY(ptregscall_common)
437 CFI_ADJUST_CFA_OFFSET -8
438 CFI_REGISTER rip, r11
441 CFI_REGISTER rip, r15
442 FIXUP_TOP_OF_STACK %r11
444 RESTORE_TOP_OF_STACK %r11
446 CFI_REGISTER rip, r11
449 CFI_ADJUST_CFA_OFFSET 8
450 CFI_REL_OFFSET rip, 0
457 CFI_ADJUST_CFA_OFFSET -8
458 CFI_REGISTER rip, r11
460 FIXUP_TOP_OF_STACK %r11
462 RESTORE_TOP_OF_STACK %r11
465 jmp int_ret_from_sys_call
469 * sigreturn is special because it needs to restore all registers on return.
470 * This cannot be done with SYSRET, so use the IRET return path instead.
472 ENTRY(stub_rt_sigreturn)
475 CFI_ADJUST_CFA_OFFSET -8
478 FIXUP_TOP_OF_STACK %r11
479 call sys_rt_sigreturn
480 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
482 jmp int_ret_from_sys_call
486 * initial frame state for interrupts and exceptions
490 CFI_DEF_CFA rsp,SS+8-\ref
491 /*CFI_REL_OFFSET ss,SS-\ref*/
492 CFI_REL_OFFSET rsp,RSP-\ref
493 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
494 /*CFI_REL_OFFSET cs,CS-\ref*/
495 CFI_REL_OFFSET rip,RIP-\ref
498 /* initial frame state for interrupts (and exceptions without error code) */
499 #define INTR_FRAME _frame RIP
500 /* initial frame state for exceptions with error code (and interrupts with
501 vector already pushed) */
502 #define XCPT_FRAME _frame ORIG_RAX
510 movl threadinfo_flags(%rcx),%edx
515 movl EFLAGS-REST_SKIP(%rsp), %eax
516 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
517 XEN_GET_VCPU_INFO(%rsi)
518 andb evtchn_upcall_mask(%rsi),%al
519 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
520 jnz restore_all_enable_events # != 0 => enable event delivery
521 XEN_PUT_VCPU_INFO(%rsi)
526 /* edi: workmask, edx: work */
529 bt $TIF_NEED_RESCHED,%edx
531 XEN_UNBLOCK_EVENTS(%rsi)
534 CFI_ADJUST_CFA_OFFSET 8
537 CFI_ADJUST_CFA_OFFSET -8
538 XEN_BLOCK_EVENTS(%rsi)
539 GET_THREAD_INFO(%rcx)
544 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
545 jz retint_restore_args
546 XEN_UNBLOCK_EVENTS(%rsi)
548 movq $-1,ORIG_RAX(%rsp)
549 xorl %esi,%esi # oldset
550 movq %rsp,%rdi # &pt_regs
551 call do_notify_resume
553 XEN_BLOCK_EVENTS(%rsi)
554 movl $_TIF_NEED_RESCHED,%edi
555 GET_THREAD_INFO(%rcx)
558 #ifdef CONFIG_PREEMPT
559 /* Returning to kernel space. Check if we need preemption */
560 /* rcx: threadinfo. interrupts off. */
563 cmpl $0,threadinfo_preempt_count(%rcx)
564 jnz retint_restore_args
565 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
566 jnc retint_restore_args
567 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
568 jnc retint_restore_args
569 call preempt_schedule_irq
570 jmp retint_kernel /* check again */
577 .macro apicinterrupt num,func
580 CFI_ADJUST_CFA_OFFSET 8
587 ENTRY(thermal_interrupt)
588 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
590 ENTRY(threshold_interrupt)
591 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
594 ENTRY(reschedule_interrupt)
595 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
597 .macro INVALIDATE_ENTRY num
598 ENTRY(invalidate_interrupt\num)
599 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
611 ENTRY(call_function_interrupt)
612 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
615 #ifdef CONFIG_X86_LOCAL_APIC
616 ENTRY(apic_timer_interrupt)
617 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
619 ENTRY(error_interrupt)
620 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
622 ENTRY(spurious_interrupt)
623 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
625 #endif /* !CONFIG_XEN */
628 * Exception entry points.
634 addq $0x10,%rsp /* skip rcx and r11 */
635 pushq $0 /* push error code/oldrax */
636 CFI_ADJUST_CFA_OFFSET 8
637 pushq %rax /* push real oldrax to the rdi slot */
638 CFI_ADJUST_CFA_OFFSET 8
644 .macro errorentry sym
648 addq $0x10,%rsp /* rsp points to the error code */
650 CFI_ADJUST_CFA_OFFSET 8
657 /* error code is on the stack already */
658 /* handle NMI like exceptions that can happen everywhere */
659 .macro paranoidentry sym, ist=0
662 addq $0x10,%rsp /* skip rcx and r11 */
667 movl $MSR_GS_BASE,%ecx
676 movq %gs:pda_data_offset, %rbp
679 movq ORIG_RAX(%rsp),%rsi
680 movq $-1,ORIG_RAX(%rsp)
682 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
686 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
689 XEN_BLOCK_EVENTS(%rsi)
694 * Exception entry point. This expects an error code/orig_rax on the stack
695 * and the exception handler in %rax.
699 /* rdi slot contains rax, oldrax contains error code */
702 CFI_ADJUST_CFA_OFFSET (14*8)
704 CFI_REL_OFFSET rsi,RSI
705 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
707 CFI_REL_OFFSET rdx,RDX
709 CFI_REL_OFFSET rcx,RCX
710 movq %rsi,10*8(%rsp) /* store rax */
711 CFI_REL_OFFSET rax,RAX
717 CFI_REL_OFFSET r10,R10
719 CFI_REL_OFFSET r11,R11
721 CFI_REL_OFFSET rbx,RBX
723 CFI_REL_OFFSET rbp,RBP
725 CFI_REL_OFFSET r12,R12
727 CFI_REL_OFFSET r13,R13
729 CFI_REL_OFFSET r14,R14
731 CFI_REL_OFFSET r15,R15
733 cmpl $__KERNEL_CS,CS(%rsp)
739 movq ORIG_RAX(%rsp),%rsi # get error code
740 movq $-1,ORIG_RAX(%rsp)
745 XEN_BLOCK_EVENTS(%rsi)
746 GET_THREAD_INFO(%rcx)
747 testb $3,CS-ARGOFFSET(%rsp)
749 movl threadinfo_flags(%rcx),%edx
750 movl $_TIF_WORK_MASK,%edi
753 jmp retint_restore_args
757 * We need to re-write the logic here because we don't do iretq to
758 * to return to user mode. It's still possible that we get trap/fault
759 * in the kernel (when accessing buffers pointed to by system calls,
765 /* There are two places in the kernel that can potentially fault with
766 usergs. Handle them here. The exception handlers after
767 iret run with kernel gs again, so don't set the user space flag.
768 B stepping K8s sometimes report an truncated RIP for IRET
769 exceptions returning to compat mode. Check for these here too. */
770 leaq iret_label(%rip),%rbp
773 movl %ebp,%ebp /* zero extend */
776 cmpq $gs_change,RIP(%rsp)
781 ENTRY(hypervisor_callback)
782 zeroentry do_hypervisor_callback
785 * Copied from arch/xen/i386/kernel/entry.S
787 # A note on the "critical region" in our callback handler.
788 # We want to avoid stacking callback handlers due to events occurring
789 # during handling of the last event. To do this, we keep events disabled
790 # until we've done all processing. HOWEVER, we must enable events before
791 # popping the stack frame (can't be done atomically) and so it would still
792 # be possible to get enough handler activations to overflow the stack.
793 # Although unlikely, bugs of that kind are hard to track down, so we'd
794 # like to avoid the possibility.
795 # So, on entry to the handler we detect whether we interrupted an
796 # existing activation in its critical region -- if so, we pop the current
797 # activation and restart the handler using the previous one.
798 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
799 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
800 # see the correct pointer to the pt_regs
801 movq %rdi, %rsp # we don't return, adjust the stack frame
802 11: movq %gs:pda_irqstackptr,%rax
803 incl %gs:pda_irqcount
806 call evtchn_do_upcall
808 decl %gs:pda_irqcount
811 #ifdef CONFIG_X86_LOCAL_APIC
813 zeroentry do_nmi_callback
814 ENTRY(do_nmi_callback)
817 orl $NMI_MASK,EFLAGS(%rsp)
819 XEN_BLOCK_EVENTS(%rsi)
820 GET_THREAD_INFO(%rcx)
821 jmp retint_restore_args
826 restore_all_enable_events:
827 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
829 scrit: /**** START OF CRITICAL REGION ****/
830 XEN_TEST_PENDING(%rsi)
831 jnz 14f # process more events if necessary...
832 XEN_PUT_VCPU_INFO(%rsi)
836 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
837 XEN_PUT_VCPU_INFO(%rsi)
839 movq %rsp,%rdi # set the argument again
841 ecrit: /**** END OF CRITICAL REGION ****/
842 # At this point, unlike on x86-32, we don't do the fixup to simplify the
843 # code and the stack frame is more complex on x86-64.
844 # When the kernel is interrupted in the critical section, the kernel
845 # will do IRET in that case, and everything will be restored at that point,
846 # i.e. it just resumes from the next instruction interrupted with the same context.
848 # Hypervisor uses this for application faults while it executes.
849 # We get here for two reasons:
850 # 1. Fault while reloading DS, ES, FS or GS
851 # 2. Fault while executing IRET
852 # Category 1 we do not need to fix up as Xen has already reloaded all segment
853 # registers that could be reloaded and zeroed the others.
854 # Category 2 we fix up by killing the current process. We cannot use the
855 # normal Linux return path in this case because if we use the IRET hypercall
856 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
857 # We distinguish between categories by comparing each saved segment register
858 # with its current contents: any discrepancy means we in category 1.
859 ENTRY(failsafe_callback)
872 /* All segments match their saved values => Category 2 (Bad IRET). */
876 movq $-9999,%rdi /* better code? */
878 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
886 .section __ex_table,"a"
888 .quad gs_change,bad_gs
891 /* running with kernelgs */
893 /* swapgs */ /* switch back to user gs */
901 * Create a kernel thread.
903 * C extern interface:
904 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
906 * asm input arguments:
907 * rdi: fn, rsi: arg, rdx: flags
911 FAKE_STACK_FRAME $child_rip
914 # rdi: flags, rsi: usp, rdx: will be &pt_regs
916 orq kernel_thread_flags(%rip),%rdi
929 * It isn't worth to check for reschedule here,
930 * so internally to the x86_64 port you can rely on kernel_thread()
931 * not to reschedule the child before returning, this avoids the need
932 * of hacks for example to fork off the per-CPU idle tasks.
933 * [Hopefully no generic code relies on the reschedule -AK]
943 * Here we are in the child and the registers are set as they were
944 * at kernel_thread() invocation in the parent.
954 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
956 * C extern interface:
957 * extern long execve(char *name, char **argv, char **envp)
959 * asm input arguments:
960 * rdi: name, rsi: argv, rdx: envp
962 * We want to fallback into:
963 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
965 * do_sys_execve asm fallback arguments:
966 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
977 jmp int_ret_from_sys_call
983 KPROBE_ENTRY(page_fault)
984 errorentry do_page_fault
987 ENTRY(coprocessor_error)
988 zeroentry do_coprocessor_error
990 ENTRY(simd_coprocessor_error)
991 zeroentry do_simd_coprocessor_error
993 ENTRY(device_not_available)
994 zeroentry math_state_restore
996 /* runs on exception stack */
1000 CFI_ADJUST_CFA_OFFSET 8 */
1002 /* jmp paranoid_exit */
1007 /* runs on exception stack */
1011 CFI_ADJUST_CFA_OFFSET 8
1012 paranoidentry do_nmi
1014 * "Paranoid" exit path from exception stack.
1015 * Paranoid because this is used by NMIs and cannot take
1016 * any kernel state for granted.
1017 * We don't do kernel preemption checks here, because only
1018 * NMI should be common and it does not enable IRQs and
1019 * cannot get reschedule ticks.
1021 /* ebx: no swapgs flag */
1023 testl %ebx,%ebx /* swapgs needed? */
1024 jnz paranoid_restore
1026 jnz paranoid_userspace
1033 GET_THREAD_INFO(%rcx)
1034 movl threadinfo_flags(%rcx),%ebx
1035 andl $_TIF_WORK_MASK,%ebx
1037 movq %rsp,%rdi /* &pt_regs */
1039 movq %rax,%rsp /* switch stack for scheduling */
1040 testl $_TIF_NEED_RESCHED,%ebx
1041 jnz paranoid_schedule
1042 movl %ebx,%edx /* arg3: thread flags */
1044 xorl %esi,%esi /* arg2: oldset */
1045 movq %rsp,%rdi /* arg1: &pt_regs */
1046 call do_notify_resume
1048 jmp paranoid_userspace
1053 jmp paranoid_userspace
1061 CFI_ADJUST_CFA_OFFSET 8 */
1063 /* jmp paranoid_exit */
1068 zeroentry do_overflow
1074 zeroentry do_invalid_op
1076 ENTRY(coprocessor_segment_overrun)
1077 zeroentry do_coprocessor_segment_overrun
1080 zeroentry do_reserved
1083 /* runs on exception stack */
1086 paranoidentry do_double_fault
1092 errorentry do_invalid_TSS
1094 ENTRY(segment_not_present)
1095 errorentry do_segment_not_present
1097 /* runs on exception stack */
1098 ENTRY(stack_segment)
1100 errorentry do_stack_segment
1103 KPROBE_ENTRY(general_protection)
1104 errorentry do_general_protection
1107 ENTRY(alignment_check)
1108 errorentry do_alignment_check
1111 zeroentry do_divide_error
1113 ENTRY(spurious_interrupt_bug)
1114 zeroentry do_spurious_interrupt_bug
1116 #ifdef CONFIG_X86_MCE
1117 /* runs on exception stack */
1118 ENTRY(machine_check)
1121 CFI_ADJUST_CFA_OFFSET 8
1122 paranoidentry do_machine_check
1129 movq %gs:pda_irqstackptr,%rax
1131 CFI_DEF_CFA_REGISTER rdx
1132 incl %gs:pda_irqcount
1135 /*todo CFI_DEF_CFA_EXPRESSION ...*/
1138 CFI_DEF_CFA_REGISTER rsp
1139 decl %gs:pda_irqcount