2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
16 * entry.S contains the system-call and fault low-level handling routines.
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
31 * - schedule it carefully for the final hardware.
35 #ifdef CONFIG_DEBUG_INFO
36 #undef CONFIG_DEBUG_INFO
38 #include <linux/linkage.h>
39 #include <asm/segment.h>
41 #include <asm/cache.h>
42 #include <asm/errno.h>
43 #include <asm/dwarf2.h>
44 #include <asm/calling.h>
45 #include <asm/asm-offsets.h>
47 #include <asm/unistd.h>
48 #include <asm/thread_info.h>
49 #include <asm/hw_irq.h>
51 #include <asm/irqflags.h>
52 #include <asm/errno.h>
53 #include <xen/interface/arch-x86_64.h>
54 #include <xen/interface/features.h>
56 #include "irq_vectors.h"
58 #include "xen_entry.S"
62 #ifndef CONFIG_PREEMPT
63 #define retint_kernel retint_restore_args
67 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
68 #ifdef CONFIG_TRACE_IRQFLAGS
69 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
79 * C code is not supposed to know about undefined top of stack. Every time
80 * a C function with an pt_regs argument is called from the SYSCALL based
81 * fast path FIXUP_TOP_OF_STACK is needed.
82 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
86 /* %rsp:at FRAMEEND */
87 .macro FIXUP_TOP_OF_STACK tmp
88 movq $__USER_CS,CS(%rsp)
92 .macro RESTORE_TOP_OF_STACK tmp,offset=0
95 .macro FAKE_STACK_FRAME child_rip
96 /* push in order ss, rsp, eflags, cs, rip */
99 CFI_ADJUST_CFA_OFFSET 8
100 /*CFI_REL_OFFSET ss,0*/
102 CFI_ADJUST_CFA_OFFSET 8
104 pushq $(1<<9) /* eflags - interrupts on */
105 CFI_ADJUST_CFA_OFFSET 8
106 /*CFI_REL_OFFSET rflags,0*/
107 pushq $__KERNEL_CS /* cs */
108 CFI_ADJUST_CFA_OFFSET 8
109 /*CFI_REL_OFFSET cs,0*/
110 pushq \child_rip /* rip */
111 CFI_ADJUST_CFA_OFFSET 8
113 pushq %rax /* orig rax */
114 CFI_ADJUST_CFA_OFFSET 8
117 .macro UNFAKE_STACK_FRAME
119 CFI_ADJUST_CFA_OFFSET -(6*8)
122 .macro CFI_DEFAULT_STACK start=1
127 CFI_DEF_CFA_OFFSET SS+8
129 CFI_REL_OFFSET r15,R15
130 CFI_REL_OFFSET r14,R14
131 CFI_REL_OFFSET r13,R13
132 CFI_REL_OFFSET r12,R12
133 CFI_REL_OFFSET rbp,RBP
134 CFI_REL_OFFSET rbx,RBX
135 CFI_REL_OFFSET r11,R11
136 CFI_REL_OFFSET r10,R10
139 CFI_REL_OFFSET rax,RAX
140 CFI_REL_OFFSET rcx,RCX
141 CFI_REL_OFFSET rdx,RDX
142 CFI_REL_OFFSET rsi,RSI
143 CFI_REL_OFFSET rdi,RDI
144 CFI_REL_OFFSET rip,RIP
145 /*CFI_REL_OFFSET cs,CS*/
146 /*CFI_REL_OFFSET rflags,EFLAGS*/
147 CFI_REL_OFFSET rsp,RSP
148 /*CFI_REL_OFFSET ss,SS*/
152 * Must be consistent with the definition in arch-x86_64.h:
153 * struct iret_context {
154 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
156 * #define VGCF_IN_SYSCALL (1<<8)
158 .macro HYPERVISOR_IRET flag
161 testl $NMI_MASK,2*8(%rsp)
164 testb $1,(xen_features+XENFEAT_supervisor_mode_kernel)
167 /* Direct iret to kernel space. Correct CS and SS. */
172 2: /* Slow iret via hypervisor. */
173 andl $~NMI_MASK, 16(%rsp)
175 jmp hypercall_page + (__HYPERVISOR_iret * 32)
178 .macro SWITCH_TO_KERNEL ssoff,adjust=0
180 orb $1,\ssoff-\adjust+4(%rsp)
185 * A newly forked process directly context switches into this.
191 GET_THREAD_INFO(%rcx)
192 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
196 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
197 je int_ret_from_sys_call
198 testl $_TIF_IA32,threadinfo_flags(%rcx)
199 jnz int_ret_from_sys_call
200 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
201 jmp ret_from_sys_call
204 call syscall_trace_leave
205 GET_THREAD_INFO(%rcx)
211 * System call entry. Upto 6 arguments in registers are supported.
213 * SYSCALL does not save anything on the stack and does not change the
219 * rax system call number
221 * rcx return address for syscall/sysret, C arg3
224 * r10 arg3 (--> moved to rcx for C)
227 * r11 eflags for syscall/sysret, temporary for C
228 * r12-r15,rbp,rbx saved by C code, not touched.
230 * Interrupts are off on entry.
231 * Only called from user space.
233 * XXX if we had a free scratch register we could save the RSP into the stack frame
234 * and report it properly in ps. Unfortunately we haven't.
236 * When user can change the frames always force IRET. That is because
237 * it deals with uncanonical addresses better. SYSRET has trouble
238 * with them due to bugs in both AMD and Intel CPUs.
243 CFI_DEF_CFA rsp,PDA_STACKOFFSET
245 /*CFI_REGISTER rflags,r11*/
247 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
249 * No need to follow this irqs off/on section - it's straight
252 XEN_UNBLOCK_EVENTS(%r11)
253 GET_THREAD_INFO(%rcx)
254 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
257 cmpq $__NR_syscall_max,%rax
260 call *sys_call_table(,%rax,8) # XXX: rip relative
261 movq %rax,RAX-ARGOFFSET(%rsp)
263 * Syscall return path ending with SYSRET (fast path)
264 * Has incomplete stack frame and undefined top of stack.
266 .globl ret_from_sys_call
268 movl $_TIF_ALLWORK_MASK,%edi
271 GET_THREAD_INFO(%rcx)
272 XEN_BLOCK_EVENTS(%rsi)
274 movl threadinfo_flags(%rcx),%edx
279 * sysretq will re-enable interrupts:
282 XEN_UNBLOCK_EVENTS(%rsi)
285 /*CFI_REGISTER rflags,r11*/
286 HYPERVISOR_IRET VGCF_IN_SYSCALL
288 /* Handle reschedules */
289 /* edx: work, edi: workmask */
292 bt $TIF_NEED_RESCHED,%edx
295 XEN_UNBLOCK_EVENTS(%rsi)
297 CFI_ADJUST_CFA_OFFSET 8
300 CFI_ADJUST_CFA_OFFSET -8
303 /* Handle a signal */
307 XEN_UNBLOCK_EVENTS(%rsi)
308 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
311 /* Really a signal */
312 /* edx: work flags (arg3) */
313 leaq do_notify_resume(%rip),%rax
314 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
315 xorl %esi,%esi # oldset -> arg2
316 call ptregscall_common
317 1: movl $_TIF_NEED_RESCHED,%edi
318 /* Use IRET because user could have changed frame. This
319 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
325 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
326 jmp ret_from_sys_call
328 /* Do syscall tracing */
332 movq $-ENOSYS,RAX(%rsp)
333 FIXUP_TOP_OF_STACK %rdi
335 call syscall_trace_enter
336 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
338 cmpq $__NR_syscall_max,%rax
340 movq %r10,%rcx /* fixup for C */
341 call *sys_call_table(,%rax,8)
342 1: movq %rax,RAX-ARGOFFSET(%rsp)
343 /* Use IRET because user could have changed frame */
344 jmp int_ret_from_sys_call
349 * Syscall return path ending with IRET.
350 * Has correct top of stack, but partial stack frame.
352 ENTRY(int_ret_from_sys_call)
354 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
355 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
356 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
357 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
358 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
359 CFI_REL_OFFSET rip,RIP-ARGOFFSET
360 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
361 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
362 CFI_REL_OFFSET rax,RAX-ARGOFFSET
363 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
364 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
365 CFI_REL_OFFSET r8,R8-ARGOFFSET
366 CFI_REL_OFFSET r9,R9-ARGOFFSET
367 CFI_REL_OFFSET r10,R10-ARGOFFSET
368 CFI_REL_OFFSET r11,R11-ARGOFFSET
369 XEN_BLOCK_EVENTS(%rsi)
371 testb $3,CS-ARGOFFSET(%rsp)
373 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
374 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
375 jmp retint_restore_args # retrun from ring3 kernel
377 movl $_TIF_ALLWORK_MASK,%edi
378 /* edi: mask to check */
380 GET_THREAD_INFO(%rcx)
381 movl threadinfo_flags(%rcx),%edx
384 andl $~TS_COMPAT,threadinfo_status(%rcx)
385 jmp retint_restore_args
387 /* Either reschedule or signal or syscall exit tracking needed. */
388 /* First do a reschedule test. */
389 /* edx: work, edi: workmask */
391 bt $TIF_NEED_RESCHED,%edx
395 XEN_UNBLOCK_EVENTS(%rsi)
397 CFI_ADJUST_CFA_OFFSET 8
400 CFI_ADJUST_CFA_OFFSET -8
401 XEN_BLOCK_EVENTS(%rsi)
405 /* handle signals and tracing -- both require a full stack frame */
409 XEN_UNBLOCK_EVENTS(%rsi)
411 /* Check for syscall exit trace */
412 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
415 CFI_ADJUST_CFA_OFFSET 8
416 leaq 8(%rsp),%rdi # &ptregs -> arg1
417 call syscall_trace_leave
419 CFI_ADJUST_CFA_OFFSET -8
420 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
421 XEN_BLOCK_EVENTS(%rsi)
426 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
428 movq %rsp,%rdi # &ptregs -> arg1
429 xorl %esi,%esi # oldset -> arg2
430 call do_notify_resume
431 1: movl $_TIF_NEED_RESCHED,%edi
434 XEN_BLOCK_EVENTS(%rsi)
438 END(int_ret_from_sys_call)
441 * Certain special system calls that need to save a complete full stack frame.
444 .macro PTREGSCALL label,func,arg
447 leaq \func(%rip),%rax
448 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
449 jmp ptregscall_common
455 PTREGSCALL stub_clone, sys_clone, %r8
456 PTREGSCALL stub_fork, sys_fork, %rdi
457 PTREGSCALL stub_vfork, sys_vfork, %rdi
458 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
459 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
460 PTREGSCALL stub_iopl, sys_iopl, %rsi
462 ENTRY(ptregscall_common)
464 CFI_ADJUST_CFA_OFFSET -8
465 CFI_REGISTER rip, r11
468 CFI_REGISTER rip, r15
469 FIXUP_TOP_OF_STACK %r11
471 RESTORE_TOP_OF_STACK %r11
473 CFI_REGISTER rip, r11
476 CFI_ADJUST_CFA_OFFSET 8
477 CFI_REL_OFFSET rip, 0
480 END(ptregscall_common)
485 CFI_ADJUST_CFA_OFFSET -8
486 CFI_REGISTER rip, r11
488 FIXUP_TOP_OF_STACK %r11
490 RESTORE_TOP_OF_STACK %r11
493 jmp int_ret_from_sys_call
498 * sigreturn is special because it needs to restore all registers on return.
499 * This cannot be done with SYSRET, so use the IRET return path instead.
501 ENTRY(stub_rt_sigreturn)
504 CFI_ADJUST_CFA_OFFSET -8
507 FIXUP_TOP_OF_STACK %r11
508 call sys_rt_sigreturn
509 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
511 jmp int_ret_from_sys_call
513 END(stub_rt_sigreturn)
516 * initial frame state for interrupts and exceptions
520 CFI_DEF_CFA rsp,SS+8-\ref
521 /*CFI_REL_OFFSET ss,SS-\ref*/
522 CFI_REL_OFFSET rsp,RSP-\ref
523 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
524 /*CFI_REL_OFFSET cs,CS-\ref*/
525 CFI_REL_OFFSET rip,RIP-\ref
528 /* initial frame state for interrupts (and exceptions without error code) */
529 #define INTR_FRAME _frame RIP
530 /* initial frame state for exceptions with error code (and interrupts with
531 vector already pushed) */
532 #define XCPT_FRAME _frame ORIG_RAX
540 movl threadinfo_flags(%rcx),%edx
545 movl EFLAGS-REST_SKIP(%rsp), %eax
546 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
547 XEN_GET_VCPU_INFO(%rsi)
548 andb evtchn_upcall_mask(%rsi),%al
549 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
550 jnz restore_all_enable_events # != 0 => enable event delivery
551 XEN_PUT_VCPU_INFO(%rsi)
556 /* edi: workmask, edx: work */
559 bt $TIF_NEED_RESCHED,%edx
562 XEN_UNBLOCK_EVENTS(%rsi)
565 CFI_ADJUST_CFA_OFFSET 8
568 CFI_ADJUST_CFA_OFFSET -8
569 GET_THREAD_INFO(%rcx)
570 XEN_BLOCK_EVENTS(%rsi)
576 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
577 jz retint_restore_args
579 XEN_UNBLOCK_EVENTS(%rsi)
581 movq $-1,ORIG_RAX(%rsp)
582 xorl %esi,%esi # oldset
583 movq %rsp,%rdi # &pt_regs
584 call do_notify_resume
586 XEN_BLOCK_EVENTS(%rsi)
588 movl $_TIF_NEED_RESCHED,%edi
589 GET_THREAD_INFO(%rcx)
592 #ifdef CONFIG_PREEMPT
593 /* Returning to kernel space. Check if we need preemption */
594 /* rcx: threadinfo. interrupts off. */
597 cmpl $0,threadinfo_preempt_count(%rcx)
598 jnz retint_restore_args
599 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
600 jnc retint_restore_args
601 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
602 jnc retint_restore_args
603 call preempt_schedule_irq
604 jmp retint_kernel /* check again */
608 END(common_interrupt)
613 .macro apicinterrupt num,func
616 CFI_ADJUST_CFA_OFFSET 8
623 ENTRY(thermal_interrupt)
624 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
625 END(thermal_interrupt)
627 ENTRY(threshold_interrupt)
628 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
629 END(threshold_interrupt)
632 ENTRY(reschedule_interrupt)
633 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
634 END(reschedule_interrupt)
636 .macro INVALIDATE_ENTRY num
637 ENTRY(invalidate_interrupt\num)
638 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
639 END(invalidate_interrupt\num)
651 ENTRY(call_function_interrupt)
652 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
653 END(call_function_interrupt)
656 #ifdef CONFIG_X86_LOCAL_APIC
657 ENTRY(apic_timer_interrupt)
658 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
659 END(apic_timer_interrupt)
661 ENTRY(error_interrupt)
662 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
665 ENTRY(spurious_interrupt)
666 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
667 END(spurious_interrupt)
669 #endif /* !CONFIG_XEN */
672 * Exception entry points.
678 addq $0x10,%rsp /* skip rcx and r11 */
679 pushq $0 /* push error code/oldrax */
680 CFI_ADJUST_CFA_OFFSET 8
681 pushq %rax /* push real oldrax to the rdi slot */
682 CFI_ADJUST_CFA_OFFSET 8
688 .macro errorentry sym
692 addq $0x10,%rsp /* rsp points to the error code */
694 CFI_ADJUST_CFA_OFFSET 8
701 /* error code is on the stack already */
702 /* handle NMI like exceptions that can happen everywhere */
703 .macro paranoidentry sym, ist=0
706 addq $0x10,%rsp /* skip rcx and r11 */
711 movl $MSR_GS_BASE,%ecx
720 movq %gs:pda_data_offset, %rbp
723 movq ORIG_RAX(%rsp),%rsi
724 movq $-1,ORIG_RAX(%rsp)
726 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
730 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
734 XEN_BLOCK_EVENTS(%rsi)
739 * Exception entry point. This expects an error code/orig_rax on the stack
740 * and the exception handler in %rax.
744 /* rdi slot contains rax, oldrax contains error code */
747 CFI_ADJUST_CFA_OFFSET (14*8)
749 CFI_REL_OFFSET rsi,RSI
750 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
752 CFI_REL_OFFSET rdx,RDX
754 CFI_REL_OFFSET rcx,RCX
755 movq %rsi,10*8(%rsp) /* store rax */
756 CFI_REL_OFFSET rax,RAX
762 CFI_REL_OFFSET r10,R10
764 CFI_REL_OFFSET r11,R11
766 CFI_REL_OFFSET rbx,RBX
768 CFI_REL_OFFSET rbp,RBP
770 CFI_REL_OFFSET r12,R12
772 CFI_REL_OFFSET r13,R13
774 CFI_REL_OFFSET r14,R14
776 CFI_REL_OFFSET r15,R15
778 cmpl $__KERNEL_CS,CS(%rsp)
784 movq ORIG_RAX(%rsp),%rsi # get error code
785 movq $-1,ORIG_RAX(%rsp)
790 XEN_BLOCK_EVENTS(%rsi)
792 GET_THREAD_INFO(%rcx)
793 testb $3,CS-ARGOFFSET(%rsp)
795 movl threadinfo_flags(%rcx),%edx
796 movl $_TIF_WORK_MASK,%edi
799 jmp retint_restore_args
803 * We need to re-write the logic here because we don't do iretq to
804 * to return to user mode. It's still possible that we get trap/fault
805 * in the kernel (when accessing buffers pointed to by system calls,
811 /* There are two places in the kernel that can potentially fault with
812 usergs. Handle them here. The exception handlers after
813 iret run with kernel gs again, so don't set the user space flag.
814 B stepping K8s sometimes report an truncated RIP for IRET
815 exceptions returning to compat mode. Check for these here too. */
816 leaq iret_label(%rip),%rbp
819 movl %ebp,%ebp /* zero extend */
822 cmpq $gs_change,RIP(%rsp)
828 ENTRY(hypervisor_callback)
829 zeroentry do_hypervisor_callback
832 * Copied from arch/xen/i386/kernel/entry.S
834 # A note on the "critical region" in our callback handler.
835 # We want to avoid stacking callback handlers due to events occurring
836 # during handling of the last event. To do this, we keep events disabled
837 # until we've done all processing. HOWEVER, we must enable events before
838 # popping the stack frame (can't be done atomically) and so it would still
839 # be possible to get enough handler activations to overflow the stack.
840 # Although unlikely, bugs of that kind are hard to track down, so we'd
841 # like to avoid the possibility.
842 # So, on entry to the handler we detect whether we interrupted an
843 # existing activation in its critical region -- if so, we pop the current
844 # activation and restart the handler using the previous one.
845 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
846 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
847 # see the correct pointer to the pt_regs
848 movq %rdi, %rsp # we don't return, adjust the stack frame
849 11: movq %gs:pda_irqstackptr,%rax
850 incl %gs:pda_irqcount
853 call evtchn_do_upcall
855 decl %gs:pda_irqcount
858 #ifdef CONFIG_X86_LOCAL_APIC
860 zeroentry do_nmi_callback
861 ENTRY(do_nmi_callback)
864 orl $NMI_MASK,EFLAGS(%rsp)
866 XEN_BLOCK_EVENTS(%rsi)
867 GET_THREAD_INFO(%rcx)
868 jmp retint_restore_args
873 restore_all_enable_events:
874 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
876 scrit: /**** START OF CRITICAL REGION ****/
877 XEN_TEST_PENDING(%rsi)
878 jnz 14f # process more events if necessary...
879 XEN_PUT_VCPU_INFO(%rsi)
883 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
884 XEN_PUT_VCPU_INFO(%rsi)
886 movq %rsp,%rdi # set the argument again
888 ecrit: /**** END OF CRITICAL REGION ****/
889 # At this point, unlike on x86-32, we don't do the fixup to simplify the
890 # code and the stack frame is more complex on x86-64.
891 # When the kernel is interrupted in the critical section, the kernel
892 # will do IRET in that case, and everything will be restored at that point,
893 # i.e. it just resumes from the next instruction interrupted with the same context.
895 # Hypervisor uses this for application faults while it executes.
896 # We get here for two reasons:
897 # 1. Fault while reloading DS, ES, FS or GS
898 # 2. Fault while executing IRET
899 # Category 1 we do not need to fix up as Xen has already reloaded all segment
900 # registers that could be reloaded and zeroed the others.
901 # Category 2 we fix up by killing the current process. We cannot use the
902 # normal Linux return path in this case because if we use the IRET hypercall
903 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
904 # We distinguish between categories by comparing each saved segment register
905 # with its current contents: any discrepancy means we in category 1.
906 ENTRY(failsafe_callback)
919 /* All segments match their saved values => Category 2 (Bad IRET). */
923 movq $-9999,%rdi /* better code? */
925 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
933 .section __ex_table,"a"
935 .quad gs_change,bad_gs
938 /* running with kernelgs */
940 /* swapgs */ /* switch back to user gs */
948 * Create a kernel thread.
950 * C extern interface:
951 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
953 * asm input arguments:
954 * rdi: fn, rsi: arg, rdx: flags
958 FAKE_STACK_FRAME $child_rip
961 # rdi: flags, rsi: usp, rdx: will be &pt_regs
963 orq kernel_thread_flags(%rip),%rdi
976 * It isn't worth to check for reschedule here,
977 * so internally to the x86_64 port you can rely on kernel_thread()
978 * not to reschedule the child before returning, this avoids the need
979 * of hacks for example to fork off the per-CPU idle tasks.
980 * [Hopefully no generic code relies on the reschedule -AK]
986 ENDPROC(kernel_thread)
989 pushq $0 # fake return address
992 * Here we are in the child and the registers are set as they were
993 * at kernel_thread() invocation in the parent.
1005 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1007 * C extern interface:
1008 * extern long execve(char *name, char **argv, char **envp)
1010 * asm input arguments:
1011 * rdi: name, rsi: argv, rdx: envp
1013 * We want to fallback into:
1014 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1016 * do_sys_execve asm fallback arguments:
1017 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1024 movq %rax, RAX(%rsp)
1028 jmp int_ret_from_sys_call
1035 KPROBE_ENTRY(page_fault)
1036 errorentry do_page_fault
1040 ENTRY(coprocessor_error)
1041 zeroentry do_coprocessor_error
1042 END(coprocessor_error)
1044 ENTRY(simd_coprocessor_error)
1045 zeroentry do_simd_coprocessor_error
1046 END(simd_coprocessor_error)
1048 ENTRY(device_not_available)
1049 zeroentry math_state_restore
1050 END(device_not_available)
1052 /* runs on exception stack */
1056 CFI_ADJUST_CFA_OFFSET 8 */
1058 /* jmp paranoid_exit */
1064 /* runs on exception stack */
1068 CFI_ADJUST_CFA_OFFSET 8
1069 paranoidentry do_nmi, 0, 0
1070 #ifdef CONFIG_TRACE_IRQFLAGS
1083 CFI_ADJUST_CFA_OFFSET 8 */
1085 /* jmp paranoid_exit */
1091 zeroentry do_overflow
1099 zeroentry do_invalid_op
1102 ENTRY(coprocessor_segment_overrun)
1103 zeroentry do_coprocessor_segment_overrun
1104 END(coprocessor_segment_overrun)
1107 zeroentry do_reserved
1111 /* runs on exception stack */
1114 paranoidentry do_double_fault
1121 errorentry do_invalid_TSS
1124 ENTRY(segment_not_present)
1125 errorentry do_segment_not_present
1126 END(segment_not_present)
1127 /* runs on exception stack */
1128 ENTRY(stack_segment)
1130 errorentry do_stack_segment
1134 KPROBE_ENTRY(general_protection)
1135 errorentry do_general_protection
1136 END(general_protection)
1139 ENTRY(alignment_check)
1140 errorentry do_alignment_check
1141 END(alignment_check)
1144 zeroentry do_divide_error
1147 ENTRY(spurious_interrupt_bug)
1148 zeroentry do_spurious_interrupt_bug
1149 END(spurious_interrupt_bug)
1151 #ifdef CONFIG_X86_MCE
1152 /* runs on exception stack */
1153 ENTRY(machine_check)
1156 CFI_ADJUST_CFA_OFFSET 8
1157 paranoidentry do_machine_check
1166 CFI_ADJUST_CFA_OFFSET 8
1167 CFI_REL_OFFSET rbp,0
1169 CFI_DEF_CFA_REGISTER rbp
1170 incl %gs:pda_irqcount
1171 cmove %gs:pda_irqstackptr,%rsp
1172 push %rbp # backlink for old unwinder
1175 CFI_DEF_CFA_REGISTER rsp
1176 CFI_ADJUST_CFA_OFFSET -8
1177 decl %gs:pda_irqcount
1180 ENDPROC(call_softirq)
1182 #ifdef CONFIG_STACK_UNWIND
1183 ENTRY(arch_unwind_init_running)
1185 movq %r15, R15(%rdi)
1186 movq %r14, R14(%rdi)
1188 movq %r13, R13(%rdi)
1189 movq %r12, R12(%rdi)
1191 movq %rbp, RBP(%rdi)
1192 movq %rbx, RBX(%rdi)
1194 movq %rax, R11(%rdi)
1195 movq %rax, R10(%rdi)
1198 movq %rax, RAX(%rdi)
1199 movq %rax, RCX(%rdi)
1200 movq %rax, RDX(%rdi)
1201 movq %rax, RSI(%rdi)
1202 movq %rax, RDI(%rdi)
1203 movq %rax, ORIG_RAX(%rdi)
1204 movq %rcx, RIP(%rdi)
1206 movq $__KERNEL_CS, CS(%rdi)
1207 movq %rax, EFLAGS(%rdi)
1208 movq %rcx, RSP(%rdi)
1209 movq $__KERNEL_DS, SS(%rdi)
1212 ENDPROC(arch_unwind_init_running)