2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
8 * Jun Nakajima <jun.nakajima@intel.com>
9 * Asit Mallick <asit.k.mallick@intel.com>
14 * entry.S contains the system-call and fault low-level handling routines.
16 * NOTE: This code handles signal-recognition, which happens every time
17 * after an interrupt and after each system call.
19 * Normal syscalls and interrupts don't save a full stack frame, this is
20 * only done for syscall tracing, signals or fork/exec et.al.
22 * A note on terminology:
23 * - top of stack: Architecture defined interrupt frame from SS to RIP
24 * at the top of the kernel process stack.
25 * - partial stack frame: partially saved registers upto R11.
26 * - full stack frame: Like partial stack frame, but all register saved.
29 * - CFI macros are used to generate dwarf2 unwind information for better
30 * backtraces. They don't change any code.
31 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
32 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
33 * There are unfortunately lots of special cases where some registers
34 * not touched. The macro is a big mess that should be cleaned up.
35 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
36 * Gives a full stack frame.
37 * - ENTRY/END Define functions in the symbol table.
38 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
39 * frame that is otherwise undefined after a SYSCALL
40 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
41 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
44 #include <linux/linkage.h>
45 #include <asm/segment.h>
46 #include <asm/cache.h>
47 #include <asm/errno.h>
48 #include <asm/dwarf2.h>
49 #include <asm/calling.h>
50 #include <asm/asm-offsets.h>
52 #include <asm/unistd.h>
53 #include <asm/thread_info.h>
54 #include <asm/hw_irq.h>
56 #include <asm/irqflags.h>
57 #include <asm/errno.h>
58 #include <xen/interface/arch-x86_64.h>
59 #include <xen/interface/features.h>
61 #include "irq_vectors.h"
63 #include "xen_entry.S"
67 #ifndef CONFIG_PREEMPT
68 #define retint_kernel retint_restore_args
72 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
73 #ifdef CONFIG_TRACE_IRQFLAGS
74 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
84 * C code is not supposed to know about undefined top of stack. Every time
85 * a C function with an pt_regs argument is called from the SYSCALL based
86 * fast path FIXUP_TOP_OF_STACK is needed.
87 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
91 /* %rsp:at FRAMEEND */
92 .macro FIXUP_TOP_OF_STACK tmp
93 movq $__USER_CS,CS(%rsp)
97 .macro RESTORE_TOP_OF_STACK tmp,offset=0
100 .macro FAKE_STACK_FRAME child_rip
101 /* push in order ss, rsp, eflags, cs, rip */
104 CFI_ADJUST_CFA_OFFSET 8
105 /*CFI_REL_OFFSET ss,0*/
107 CFI_ADJUST_CFA_OFFSET 8
109 pushq $(1<<9) /* eflags - interrupts on */
110 CFI_ADJUST_CFA_OFFSET 8
111 /*CFI_REL_OFFSET rflags,0*/
112 pushq $__KERNEL_CS /* cs */
113 CFI_ADJUST_CFA_OFFSET 8
114 /*CFI_REL_OFFSET cs,0*/
115 pushq \child_rip /* rip */
116 CFI_ADJUST_CFA_OFFSET 8
118 pushq %rax /* orig rax */
119 CFI_ADJUST_CFA_OFFSET 8
122 .macro UNFAKE_STACK_FRAME
124 CFI_ADJUST_CFA_OFFSET -(6*8)
127 .macro CFI_DEFAULT_STACK start=1
133 CFI_DEF_CFA_OFFSET SS+8
135 CFI_REL_OFFSET r15,R15
136 CFI_REL_OFFSET r14,R14
137 CFI_REL_OFFSET r13,R13
138 CFI_REL_OFFSET r12,R12
139 CFI_REL_OFFSET rbp,RBP
140 CFI_REL_OFFSET rbx,RBX
141 CFI_REL_OFFSET r11,R11
142 CFI_REL_OFFSET r10,R10
145 CFI_REL_OFFSET rax,RAX
146 CFI_REL_OFFSET rcx,RCX
147 CFI_REL_OFFSET rdx,RDX
148 CFI_REL_OFFSET rsi,RSI
149 CFI_REL_OFFSET rdi,RDI
150 CFI_REL_OFFSET rip,RIP
151 /*CFI_REL_OFFSET cs,CS*/
152 /*CFI_REL_OFFSET rflags,EFLAGS*/
153 CFI_REL_OFFSET rsp,RSP
154 /*CFI_REL_OFFSET ss,SS*/
158 * Must be consistent with the definition in arch-x86_64.h:
159 * struct iret_context {
160 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
162 * #define VGCF_IN_SYSCALL (1<<8)
164 .macro HYPERVISOR_IRET flag
167 testl $NMI_MASK,2*8(%rsp)
170 testb $1,(xen_features+XENFEAT_supervisor_mode_kernel)
173 /* Direct iret to kernel space. Correct CS and SS. */
178 2: /* Slow iret via hypervisor. */
179 andl $~NMI_MASK, 16(%rsp)
181 jmp hypercall_page + (__HYPERVISOR_iret * 32)
184 .macro SWITCH_TO_KERNEL ssoff,adjust=0
186 orb $1,\ssoff-\adjust+4(%rsp)
191 * A newly forked process directly context switches into this.
196 push kernel_eflags(%rip)
197 CFI_ADJUST_CFA_OFFSET 4
198 popf # reset kernel eflags
199 CFI_ADJUST_CFA_OFFSET -4
201 GET_THREAD_INFO(%rcx)
202 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
206 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
207 je int_ret_from_sys_call
208 testl $_TIF_IA32,threadinfo_flags(%rcx)
209 jnz int_ret_from_sys_call
210 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
211 jmp ret_from_sys_call
214 call syscall_trace_leave
215 GET_THREAD_INFO(%rcx)
221 * System call entry. Upto 6 arguments in registers are supported.
223 * SYSCALL does not save anything on the stack and does not change the
229 * rax system call number
231 * rcx return address for syscall/sysret, C arg3
234 * r10 arg3 (--> moved to rcx for C)
237 * r11 eflags for syscall/sysret, temporary for C
238 * r12-r15,rbp,rbx saved by C code, not touched.
240 * Interrupts are off on entry.
241 * Only called from user space.
243 * XXX if we had a free scratch register we could save the RSP into the stack frame
244 * and report it properly in ps. Unfortunately we haven't.
246 * When user can change the frames always force IRET. That is because
247 * it deals with uncanonical addresses better. SYSRET has trouble
248 * with them due to bugs in both AMD and Intel CPUs.
254 CFI_DEF_CFA rsp,PDA_STACKOFFSET
256 /*CFI_REGISTER rflags,r11*/
258 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
260 * No need to follow this irqs off/on section - it's straight
263 XEN_UNBLOCK_EVENTS(%r11)
264 GET_THREAD_INFO(%rcx)
265 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
267 cmpq $__NR_syscall_max,%rax
270 call *sys_call_table(,%rax,8) # XXX: rip relative
271 movq %rax,RAX-ARGOFFSET(%rsp)
273 * Syscall return path ending with SYSRET (fast path)
274 * Has incomplete stack frame and undefined top of stack.
277 movl $_TIF_ALLWORK_MASK,%edi
280 GET_THREAD_INFO(%rcx)
281 XEN_BLOCK_EVENTS(%rsi)
283 movl threadinfo_flags(%rcx),%edx
288 * sysretq will re-enable interrupts:
291 XEN_UNBLOCK_EVENTS(%rsi)
294 /*CFI_REGISTER rflags,r11*/
295 HYPERVISOR_IRET VGCF_IN_SYSCALL
298 /* Handle reschedules */
299 /* edx: work, edi: workmask */
301 bt $TIF_NEED_RESCHED,%edx
304 XEN_UNBLOCK_EVENTS(%rsi)
306 CFI_ADJUST_CFA_OFFSET 8
309 CFI_ADJUST_CFA_OFFSET -8
312 /* Handle a signal */
315 XEN_UNBLOCK_EVENTS(%rsi)
316 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
319 /* Really a signal */
320 /* edx: work flags (arg3) */
321 leaq do_notify_resume(%rip),%rax
322 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
323 xorl %esi,%esi # oldset -> arg2
324 call ptregscall_common
325 1: movl $_TIF_NEED_RESCHED,%edi
326 /* Use IRET because user could have changed frame. This
327 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
333 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
334 jmp ret_from_sys_call
336 /* Do syscall tracing */
339 movq $-ENOSYS,RAX(%rsp)
340 FIXUP_TOP_OF_STACK %rdi
342 call syscall_trace_enter
343 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
345 cmpq $__NR_syscall_max,%rax
349 movq %r10,%rcx /* fixup for C */
350 call *sys_call_table(,%rax,8)
351 1: movq %rax,RAX-ARGOFFSET(%rsp)
352 /* Use IRET because user could have changed frame */
355 * Syscall return path ending with IRET.
356 * Has correct top of stack, but partial stack frame.
358 .globl int_ret_from_sys_call
359 int_ret_from_sys_call:
360 XEN_BLOCK_EVENTS(%rsi)
362 testb $3,CS-ARGOFFSET(%rsp)
364 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
365 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
366 jmp retint_restore_args # retrun from ring3 kernel
368 movl $_TIF_ALLWORK_MASK,%edi
369 /* edi: mask to check */
371 GET_THREAD_INFO(%rcx)
372 movl threadinfo_flags(%rcx),%edx
375 andl $~TS_COMPAT,threadinfo_status(%rcx)
376 jmp retint_restore_args
378 /* Either reschedule or signal or syscall exit tracking needed. */
379 /* First do a reschedule test. */
380 /* edx: work, edi: workmask */
382 bt $TIF_NEED_RESCHED,%edx
385 XEN_UNBLOCK_EVENTS(%rsi)
387 CFI_ADJUST_CFA_OFFSET 8
390 CFI_ADJUST_CFA_OFFSET -8
391 XEN_BLOCK_EVENTS(%rsi)
395 /* handle signals and tracing -- both require a full stack frame */
398 XEN_UNBLOCK_EVENTS(%rsi)
400 /* Check for syscall exit trace */
401 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
404 CFI_ADJUST_CFA_OFFSET 8
405 leaq 8(%rsp),%rdi # &ptregs -> arg1
406 call syscall_trace_leave
408 CFI_ADJUST_CFA_OFFSET -8
409 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
413 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
415 movq %rsp,%rdi # &ptregs -> arg1
416 xorl %esi,%esi # oldset -> arg2
417 call do_notify_resume
418 1: movl $_TIF_NEED_RESCHED,%edi
421 XEN_BLOCK_EVENTS(%rsi)
428 * Certain special system calls that need to save a complete full stack frame.
431 .macro PTREGSCALL label,func,arg
434 leaq \func(%rip),%rax
435 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
436 jmp ptregscall_common
442 PTREGSCALL stub_clone, sys_clone, %r8
443 PTREGSCALL stub_fork, sys_fork, %rdi
444 PTREGSCALL stub_vfork, sys_vfork, %rdi
445 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
446 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
447 PTREGSCALL stub_iopl, sys_iopl, %rsi
449 ENTRY(ptregscall_common)
451 CFI_ADJUST_CFA_OFFSET -8
452 CFI_REGISTER rip, r11
455 CFI_REGISTER rip, r15
456 FIXUP_TOP_OF_STACK %r11
458 RESTORE_TOP_OF_STACK %r11
460 CFI_REGISTER rip, r11
463 CFI_ADJUST_CFA_OFFSET 8
464 CFI_REL_OFFSET rip, 0
467 END(ptregscall_common)
472 CFI_ADJUST_CFA_OFFSET -8
473 CFI_REGISTER rip, r11
475 FIXUP_TOP_OF_STACK %r11
477 RESTORE_TOP_OF_STACK %r11
480 jmp int_ret_from_sys_call
485 * sigreturn is special because it needs to restore all registers on return.
486 * This cannot be done with SYSRET, so use the IRET return path instead.
488 ENTRY(stub_rt_sigreturn)
491 CFI_ADJUST_CFA_OFFSET -8
494 FIXUP_TOP_OF_STACK %r11
495 call sys_rt_sigreturn
496 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
498 jmp int_ret_from_sys_call
500 END(stub_rt_sigreturn)
503 * initial frame state for interrupts and exceptions
508 CFI_DEF_CFA rsp,SS+8-\ref
509 /*CFI_REL_OFFSET ss,SS-\ref*/
510 CFI_REL_OFFSET rsp,RSP-\ref
511 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
512 /*CFI_REL_OFFSET cs,CS-\ref*/
513 CFI_REL_OFFSET rip,RIP-\ref
516 /* initial frame state for interrupts (and exceptions without error code) */
517 #define INTR_FRAME _frame RIP
518 /* initial frame state for exceptions with error code (and interrupts with
519 vector already pushed) */
520 #define XCPT_FRAME _frame ORIG_RAX
528 movl threadinfo_flags(%rcx),%edx
533 movl EFLAGS-REST_SKIP(%rsp), %eax
534 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
535 XEN_GET_VCPU_INFO(%rsi)
536 andb evtchn_upcall_mask(%rsi),%al
537 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
538 jnz restore_all_enable_events # != 0 => enable event delivery
539 XEN_PUT_VCPU_INFO(%rsi)
544 /* edi: workmask, edx: work */
547 bt $TIF_NEED_RESCHED,%edx
550 XEN_UNBLOCK_EVENTS(%rsi)
552 CFI_ADJUST_CFA_OFFSET 8
555 CFI_ADJUST_CFA_OFFSET -8
556 GET_THREAD_INFO(%rcx)
557 XEN_BLOCK_EVENTS(%rsi)
562 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
563 jz retint_restore_args
565 XEN_UNBLOCK_EVENTS(%rsi)
567 movq $-1,ORIG_RAX(%rsp)
568 xorl %esi,%esi # oldset
569 movq %rsp,%rdi # &pt_regs
570 call do_notify_resume
572 XEN_BLOCK_EVENTS(%rsi)
574 movl $_TIF_NEED_RESCHED,%edi
575 GET_THREAD_INFO(%rcx)
578 #ifdef CONFIG_PREEMPT
579 /* Returning to kernel space. Check if we need preemption */
580 /* rcx: threadinfo. interrupts off. */
582 cmpl $0,threadinfo_preempt_count(%rcx)
583 jnz retint_restore_args
584 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
585 jnc retint_restore_args
586 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
587 jnc retint_restore_args
588 call preempt_schedule_irq
589 jmp retint_kernel /* check again */
593 END(common_interrupt)
598 .macro apicinterrupt num,func
601 CFI_ADJUST_CFA_OFFSET 8
608 ENTRY(thermal_interrupt)
609 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
610 END(thermal_interrupt)
612 ENTRY(threshold_interrupt)
613 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
614 END(threshold_interrupt)
617 ENTRY(reschedule_interrupt)
618 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
619 END(reschedule_interrupt)
621 .macro INVALIDATE_ENTRY num
622 ENTRY(invalidate_interrupt\num)
623 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
624 END(invalidate_interrupt\num)
636 ENTRY(call_function_interrupt)
637 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
638 END(call_function_interrupt)
641 ENTRY(apic_timer_interrupt)
642 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
643 END(apic_timer_interrupt)
645 ENTRY(error_interrupt)
646 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
649 ENTRY(spurious_interrupt)
650 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
651 END(spurious_interrupt)
652 #endif /* !CONFIG_XEN */
655 * Exception entry points.
661 addq $0x10,%rsp /* skip rcx and r11 */
662 pushq $0 /* push error code/oldrax */
663 CFI_ADJUST_CFA_OFFSET 8
664 pushq %rax /* push real oldrax to the rdi slot */
665 CFI_ADJUST_CFA_OFFSET 8
671 .macro errorentry sym
675 addq $0x10,%rsp /* rsp points to the error code */
677 CFI_ADJUST_CFA_OFFSET 8
684 /* error code is on the stack already */
685 /* handle NMI like exceptions that can happen everywhere */
686 .macro paranoidentry sym, ist=0
689 addq $0x10,%rsp /* skip rcx and r11 */
694 movl $MSR_GS_BASE,%ecx
703 movq %gs:pda_data_offset, %rbp
706 movq ORIG_RAX(%rsp),%rsi
707 movq $-1,ORIG_RAX(%rsp)
709 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
713 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
715 XEN_BLOCK_EVENTS(%rsi)
723 * Exception entry point. This expects an error code/orig_rax on the stack
724 * and the exception handler in %rax.
726 KPROBE_ENTRY(error_entry)
728 /* rdi slot contains rax, oldrax contains error code */
731 CFI_ADJUST_CFA_OFFSET (14*8)
733 CFI_REL_OFFSET rsi,RSI
734 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
736 CFI_REL_OFFSET rdx,RDX
738 CFI_REL_OFFSET rcx,RCX
739 movq %rsi,10*8(%rsp) /* store rax */
740 CFI_REL_OFFSET rax,RAX
746 CFI_REL_OFFSET r10,R10
748 CFI_REL_OFFSET r11,R11
750 CFI_REL_OFFSET rbx,RBX
752 CFI_REL_OFFSET rbp,RBP
754 CFI_REL_OFFSET r12,R12
756 CFI_REL_OFFSET r13,R13
758 CFI_REL_OFFSET r14,R14
760 CFI_REL_OFFSET r15,R15
762 cmpl $__KERNEL_CS,CS(%rsp)
768 movq ORIG_RAX(%rsp),%rsi /* get error code */
769 movq $-1,ORIG_RAX(%rsp)
773 XEN_BLOCK_EVENTS(%rsi)
775 GET_THREAD_INFO(%rcx)
776 testb $3,CS-ARGOFFSET(%rsp)
778 movl threadinfo_flags(%rcx),%edx
779 movl $_TIF_WORK_MASK,%edi
782 jmp retint_restore_args
786 * We need to re-write the logic here because we don't do iretq to
787 * to return to user mode. It's still possible that we get trap/fault
788 * in the kernel (when accessing buffers pointed to by system calls,
794 /* There are two places in the kernel that can potentially fault with
795 usergs. Handle them here. The exception handlers after
796 iret run with kernel gs again, so don't set the user space flag.
797 B stepping K8s sometimes report an truncated RIP for IRET
798 exceptions returning to compat mode. Check for these here too. */
799 leaq iret_label(%rip),%rbp
802 movl %ebp,%ebp /* zero extend */
805 cmpq $gs_change,RIP(%rsp)
809 KPROBE_END(error_entry)
811 ENTRY(hypervisor_callback)
812 zeroentry do_hypervisor_callback
815 * Copied from arch/xen/i386/kernel/entry.S
817 # A note on the "critical region" in our callback handler.
818 # We want to avoid stacking callback handlers due to events occurring
819 # during handling of the last event. To do this, we keep events disabled
820 # until we've done all processing. HOWEVER, we must enable events before
821 # popping the stack frame (can't be done atomically) and so it would still
822 # be possible to get enough handler activations to overflow the stack.
823 # Although unlikely, bugs of that kind are hard to track down, so we'd
824 # like to avoid the possibility.
825 # So, on entry to the handler we detect whether we interrupted an
826 # existing activation in its critical region -- if so, we pop the current
827 # activation and restart the handler using the previous one.
828 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
829 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
830 # see the correct pointer to the pt_regs
831 movq %rdi, %rsp # we don't return, adjust the stack frame
832 11: movq %gs:pda_irqstackptr,%rax
833 incl %gs:pda_irqcount
836 call evtchn_do_upcall
838 decl %gs:pda_irqcount
842 zeroentry do_nmi_callback
843 ENTRY(xen_do_nmi_callback)
846 orl $NMI_MASK,EFLAGS(%rsp)
848 XEN_BLOCK_EVENTS(%rsi)
849 GET_THREAD_INFO(%rcx)
850 jmp retint_restore_args
854 restore_all_enable_events:
855 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
857 scrit: /**** START OF CRITICAL REGION ****/
858 XEN_TEST_PENDING(%rsi)
859 jnz 14f # process more events if necessary...
860 XEN_PUT_VCPU_INFO(%rsi)
864 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
865 XEN_PUT_VCPU_INFO(%rsi)
867 movq %rsp,%rdi # set the argument again
869 ecrit: /**** END OF CRITICAL REGION ****/
870 # At this point, unlike on x86-32, we don't do the fixup to simplify the
871 # code and the stack frame is more complex on x86-64.
872 # When the kernel is interrupted in the critical section, the kernel
873 # will do IRET in that case, and everything will be restored at that point,
874 # i.e. it just resumes from the next instruction interrupted with the same context.
876 # Hypervisor uses this for application faults while it executes.
877 # We get here for two reasons:
878 # 1. Fault while reloading DS, ES, FS or GS
879 # 2. Fault while executing IRET
880 # Category 1 we do not need to fix up as Xen has already reloaded all segment
881 # registers that could be reloaded and zeroed the others.
882 # Category 2 we fix up by killing the current process. We cannot use the
883 # normal Linux return path in this case because if we use the IRET hypercall
884 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
885 # We distinguish between categories by comparing each saved segment register
886 # with its current contents: any discrepancy means we in category 1.
887 ENTRY(failsafe_callback)
900 /* All segments match their saved values => Category 2 (Bad IRET). */
904 movq $-9999,%rdi /* better code? */
906 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
914 .section __ex_table,"a"
916 .quad gs_change,bad_gs
919 /* running with kernelgs */
921 /* swapgs */ /* switch back to user gs */
929 * Create a kernel thread.
931 * C extern interface:
932 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
934 * asm input arguments:
935 * rdi: fn, rsi: arg, rdx: flags
939 FAKE_STACK_FRAME $child_rip
942 # rdi: flags, rsi: usp, rdx: will be &pt_regs
944 orq kernel_thread_flags(%rip),%rdi
957 * It isn't worth to check for reschedule here,
958 * so internally to the x86_64 port you can rely on kernel_thread()
959 * not to reschedule the child before returning, this avoids the need
960 * of hacks for example to fork off the per-CPU idle tasks.
961 * [Hopefully no generic code relies on the reschedule -AK]
967 ENDPROC(kernel_thread)
970 pushq $0 # fake return address
973 * Here we are in the child and the registers are set as they were
974 * at kernel_thread() invocation in the parent.
986 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
988 * C extern interface:
989 * extern long execve(char *name, char **argv, char **envp)
991 * asm input arguments:
992 * rdi: name, rsi: argv, rdx: envp
994 * We want to fallback into:
995 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
997 * do_sys_execve asm fallback arguments:
998 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1000 ENTRY(kernel_execve)
1005 movq %rax, RAX(%rsp)
1009 jmp int_ret_from_sys_call
1014 ENDPROC(kernel_execve)
1016 KPROBE_ENTRY(page_fault)
1017 errorentry do_page_fault
1018 KPROBE_END(page_fault)
1020 ENTRY(coprocessor_error)
1021 zeroentry do_coprocessor_error
1022 END(coprocessor_error)
1024 ENTRY(simd_coprocessor_error)
1025 zeroentry do_simd_coprocessor_error
1026 END(simd_coprocessor_error)
1028 ENTRY(device_not_available)
1029 zeroentry math_state_restore
1030 END(device_not_available)
1032 /* runs on exception stack */
1036 CFI_ADJUST_CFA_OFFSET 8 */
1038 /* jmp paranoid_exit */
1044 /* runs on exception stack */
1048 CFI_ADJUST_CFA_OFFSET 8
1049 paranoidentry do_nmi, 0, 0
1050 #ifdef CONFIG_TRACE_IRQFLAGS
1063 CFI_ADJUST_CFA_OFFSET 8 */
1065 /* jmp paranoid_exit */
1070 zeroentry do_overflow
1078 zeroentry do_invalid_op
1081 ENTRY(coprocessor_segment_overrun)
1082 zeroentry do_coprocessor_segment_overrun
1083 END(coprocessor_segment_overrun)
1086 zeroentry do_reserved
1090 /* runs on exception stack */
1093 paranoidentry do_double_fault
1100 errorentry do_invalid_TSS
1103 ENTRY(segment_not_present)
1104 errorentry do_segment_not_present
1105 END(segment_not_present)
1107 /* runs on exception stack */
1108 ENTRY(stack_segment)
1110 errorentry do_stack_segment
1114 KPROBE_ENTRY(general_protection)
1115 errorentry do_general_protection
1116 KPROBE_END(general_protection)
1118 ENTRY(alignment_check)
1119 errorentry do_alignment_check
1120 END(alignment_check)
1123 zeroentry do_divide_error
1126 ENTRY(spurious_interrupt_bug)
1127 zeroentry do_spurious_interrupt_bug
1128 END(spurious_interrupt_bug)
1130 #ifdef CONFIG_X86_MCE
1131 /* runs on exception stack */
1132 ENTRY(machine_check)
1135 CFI_ADJUST_CFA_OFFSET 8
1136 paranoidentry do_machine_check
1142 /* Call softirq on interrupt stack. Interrupts are off. */
1146 CFI_ADJUST_CFA_OFFSET 8
1147 CFI_REL_OFFSET rbp,0
1149 CFI_DEF_CFA_REGISTER rbp
1150 incl %gs:pda_irqcount
1151 cmove %gs:pda_irqstackptr,%rsp
1152 push %rbp # backlink for old unwinder
1155 CFI_DEF_CFA_REGISTER rsp
1156 CFI_ADJUST_CFA_OFFSET -8
1157 decl %gs:pda_irqcount
1160 ENDPROC(call_softirq)