2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
16 * entry.S contains the system-call and fault low-level handling routines.
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
31 * - schedule it carefully for the final hardware.
35 #include <linux/config.h>
36 #include <linux/linkage.h>
37 #include <asm/segment.h>
39 #include <asm/cache.h>
40 #include <asm/errno.h>
41 #include <asm/dwarf2.h>
42 #include <asm/calling.h>
43 #include <asm/asm_offset.h>
45 #include <asm/unistd.h>
46 #include <asm/thread_info.h>
47 #include <asm/hw_irq.h>
48 #include <asm/errno.h>
49 #include <asm-xen/xen-public/arch-x86_64.h>
53 ECF_IN_SYSCALL = (1<<8)
56 * Copied from arch/xen/i386/kernel/entry.S
58 /* Offsets into shared_info_t. */
59 #define evtchn_upcall_pending 0
60 #define evtchn_upcall_mask 1
62 #define sizeof_vcpu_shift 3
65 #define XEN_GET_VCPU_INFO(reg)
66 #define preempt_disable(reg) incl TI_preempt_count(reg)
67 #define preempt_enable(reg) decl TI_preempt_count(reg)
68 #define XEN_LOCK_VCPU_INFO_SMP(reg) preempt_disable(%rbp) ; \
69 movl TI_cpu(%rbp),reg ; \
70 shl $sizeof_vcpu_shift,reg ; \
71 addl HYPERVISOR_shared_info,reg
72 #define XEN_UNLOCK_VCPU_INFO_SMP(reg) preempt_enable(%rbp)
73 #define XEN_UNLOCK_VCPU_INFO_SMP_fixup .byte 0xff,0xff,0xff
75 #define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
76 #define XEN_BLOCK_EVENTS(reg) XEN_LOCK_VCPU_INFO_SMP(reg) ; \
77 XEN_LOCKED_BLOCK_EVENTS(reg) ; \
78 XEN_UNLOCK_VCPU_INFO_SMP(reg)
79 #define XEN_UNBLOCK_EVENTS(reg) XEN_LOCK_VCPU_INFO_SMP(reg) ; \
80 movb $0,evtchn_upcall_mask(reg) ; \
81 XEN_UNLOCK_VCPU_INFO_SMP(reg)
82 #define XEN_SAVE_UPCALL_MASK(reg,tmp,off) GET_THREAD_INFO(%ebp) ; \
83 XEN_LOCK_VCPU_INFO_SMP(reg) ; \
84 movb evtchn_upcall_mask(reg), tmp ; \
85 movb tmp, off(%rsp) ; \
86 XEN_UNLOCK_VCPU_INFO_SMP(reg)
88 #define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
89 #define XEN_LOCK_VCPU_INFO_SMP(reg) movq HYPERVISOR_shared_info,reg
90 #define XEN_UNLOCK_VCPU_INFO_SMP(reg)
91 #define XEN_UNLOCK_VCPU_INFO_SMP_fixup
93 #define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
94 #define XEN_BLOCK_EVENTS(reg) XEN_LOCKED_BLOCK_EVENTS(reg)
95 #define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
96 #define XEN_SAVE_UPCALL_MASK(reg,tmp,off) \
97 movb evtchn_upcall_mask(reg), tmp; \
101 #define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
105 #ifdef CONFIG_PREEMPT
106 #define preempt_stop XEN_BLOCK_EVENTS(%rsi)
109 #define retint_kernel retint_restore_args
114 * C code is not supposed to know about undefined top of stack. Every time
115 * a C function with an pt_regs argument is called from the SYSCALL based
116 * fast path FIXUP_TOP_OF_STACK is needed.
117 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
122 .macro FAKE_STACK_FRAME child_rip
123 /* push in order ss, rsp, eflags, cs, rip */
126 CFI_ADJUST_CFA_OFFSET 8
128 CFI_ADJUST_CFA_OFFSET 8
130 pushq $(1<<9) /* eflags - interrupts on */
131 CFI_ADJUST_CFA_OFFSET 8
132 pushq $__KERNEL_CS /* cs */
133 CFI_ADJUST_CFA_OFFSET 8
134 pushq \child_rip /* rip */
135 CFI_ADJUST_CFA_OFFSET 8
137 pushq %rax /* orig rax */
138 CFI_ADJUST_CFA_OFFSET 8
141 .macro UNFAKE_STACK_FRAME
143 CFI_ADJUST_CFA_OFFSET -(6*8)
146 .macro CFI_DEFAULT_STACK
147 CFI_ADJUST_CFA_OFFSET (SS)
148 CFI_OFFSET r15,R15-SS
149 CFI_OFFSET r14,R14-SS
150 CFI_OFFSET r13,R13-SS
151 CFI_OFFSET r12,R12-SS
152 CFI_OFFSET rbp,RBP-SS
153 CFI_OFFSET rbx,RBX-SS
154 CFI_OFFSET r11,R11-SS
155 CFI_OFFSET r10,R10-SS
158 CFI_OFFSET rax,RAX-SS
159 CFI_OFFSET rcx,RCX-SS
160 CFI_OFFSET rdx,RDX-SS
161 CFI_OFFSET rsi,RSI-SS
162 CFI_OFFSET rdi,RDI-SS
163 CFI_OFFSET rsp,RSP-SS
164 CFI_OFFSET rip,RIP-SS
168 * Must be consistent with the definition in arch_x86_64.h:
169 * struct switch_to_user {
170 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
172 * #define ECF_IN_SYSCALL (1<<8)
174 .macro SWITCH_TO_USER flag
175 movl $0,%gs:pda_kernel_mode # change to user mode
176 subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in the stack
179 movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions
180 movq $\flag,3*8(%rsp)
181 movq $__USER_CS,5*8(%rsp)
182 movq $__USER_DS,8*8(%rsp)
183 movq $__HYPERVISOR_switch_to_user,%rax
187 .macro SWITCH_TO_KERNEL ssoff,adjust=0
188 btsq $0,%gs:pda_kernel_mode
190 orb $1,\ssoff-\adjust+4(%rsp)
195 * A newly forked process directly context switches into this.
202 GET_THREAD_INFO(%rcx)
203 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
207 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
208 je int_ret_from_sys_call
209 testl $_TIF_IA32,threadinfo_flags(%rcx)
210 jnz int_ret_from_sys_call
211 jmp ret_from_sys_call
214 call syscall_trace_leave
215 GET_THREAD_INFO(%rcx)
220 * System call entry. Upto 6 arguments in registers are supported.
222 * SYSCALL does not save anything on the stack and does not change the
228 * rax system call number
230 * rcx return address for syscall/sysret, C arg3
233 * r10 arg3 (--> moved to rcx for C)
236 * r11 eflags for syscall/sysret, temporary for C
237 * r12-r15,rbp,rbx saved by C code, not touched.
239 * Interrupts are off on entry.
240 * Only called from user space.
242 * XXX if we had a free scratch register we could save the RSP into the stack frame
243 * and report it properly in ps. Unfortunately we haven't.
249 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
250 XEN_GET_VCPU_INFO(%r11)
251 XEN_SAVE_UPCALL_MASK(%r11,%cl,EVENT_MASK-ARGOFFSET) # saved %rcx
252 XEN_UNBLOCK_EVENTS(%r11)
253 GET_THREAD_INFO(%rcx)
254 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
256 cmpq $__NR_syscall_max,%rax
259 call *sys_call_table(,%rax,8) # XXX: rip relative
260 movq %rax,RAX-ARGOFFSET(%rsp)
262 * Syscall return path ending with SYSRET (fast path)
263 * Has incomplete stack frame and undefined top of stack.
265 .globl ret_from_sys_call
267 movl $_TIF_WORK_MASK,%edi
270 GET_THREAD_INFO(%rcx)
271 XEN_GET_VCPU_INFO(%rsi)
272 XEN_BLOCK_EVENTS(%rsi)
273 movl threadinfo_flags(%rcx),%edx
276 XEN_UNBLOCK_EVENTS(%rsi)
278 SWITCH_TO_USER ECF_IN_SYSCALL
280 /* Handle reschedules */
281 /* edx: work, edi: workmask */
283 bt $TIF_NEED_RESCHED,%edx
285 XEN_GET_VCPU_INFO(%rsi)
286 XEN_BLOCK_EVENTS(%rsi)
292 /* Handle a signal */
295 XEN_GET_VCPU_INFO(%rsi)
296 XEN_UNBLOCK_EVENTS(%rsi)
297 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
300 /* Really a signal */
301 /* edx: work flags (arg3) */
302 leaq do_notify_resume(%rip),%rax
303 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
304 xorl %esi,%esi # oldset -> arg2
305 call ptregscall_common
306 1: movl $_TIF_NEED_RESCHED,%edi
309 /* Do syscall tracing */
312 movq $-ENOSYS,RAX(%rsp)
314 call syscall_trace_enter
315 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
317 cmpq $__NR_syscall_max,%rax
319 movq %r10,%rcx /* fixup for C */
320 call *sys_call_table(,%rax,8)
321 movq %rax,RAX-ARGOFFSET(%rsp)
324 call syscall_trace_leave
326 jmp ret_from_sys_call
329 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
330 jmp ret_from_sys_call
333 * Syscall return path ending with IRET.
334 * Has correct top of stack, but partial stack frame.
336 ENTRY(int_ret_from_sys_call)
337 XEN_GET_VCPU_INFO(%rsi)
338 XEN_BLOCK_EVENTS(%rsi)
339 testb $3,CS-ARGOFFSET(%rsp)
341 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
342 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
343 jmp retint_restore_args # retrun from ring3 kernel
345 movl $_TIF_ALLWORK_MASK,%edi
346 /* edi: mask to check */
348 GET_THREAD_INFO(%rcx)
349 movl threadinfo_flags(%rcx),%edx
352 jmp retint_restore_args
354 /* Either reschedule or signal or syscall exit tracking needed. */
355 /* First do a reschedule test. */
356 /* edx: work, edi: workmask */
358 bt $TIF_NEED_RESCHED,%edx
361 XEN_GET_VCPU_INFO(%rsi)
362 XEN_UNBLOCK_EVENTS(%rsi)
368 /* handle signals and tracing -- both require a full stack frame */
371 XEN_GET_VCPU_INFO(%rsi)
372 XEN_UNBLOCK_EVENTS(%rsi)
374 /* Check for syscall exit trace */
375 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
378 leaq 8(%rsp),%rdi # &ptregs -> arg1
379 call syscall_trace_leave
381 btr $TIF_SYSCALL_TRACE,%edi
382 btr $TIF_SYSCALL_AUDIT,%edi
383 btr $TIF_SINGLESTEP,%edi
387 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
389 movq %rsp,%rdi # &ptregs -> arg1
390 xorl %esi,%esi # oldset -> arg2
391 call do_notify_resume
392 1: movl $_TIF_NEED_RESCHED,%edi
399 * Certain special system calls that need to save a complete full stack frame.
402 .macro PTREGSCALL label,func,arg
405 leaq \func(%rip),%rax
406 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
407 jmp ptregscall_common
410 PTREGSCALL stub_clone, sys_clone, %r8
411 PTREGSCALL stub_fork, sys_fork, %rdi
412 PTREGSCALL stub_vfork, sys_vfork, %rdi
413 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
414 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
415 PTREGSCALL stub_iopl, sys_iopl, %rsi
417 ENTRY(ptregscall_common)
420 CFI_ADJUST_CFA_OFFSET -8
427 CFI_ADJUST_CFA_OFFSET 8
434 CFI_ADJUST_CFA_OFFSET -8
438 GET_THREAD_INFO(%rcx)
439 bt $TIF_IA32,threadinfo_flags(%rcx)
447 CFI_ADJUST_CFA_OFFSET REST_SKIP
450 jmp int_ret_from_sys_call
454 * sigreturn is special because it needs to restore all registers on return.
455 * This cannot be done with SYSRET, so use the IRET return path instead.
457 ENTRY(stub_rt_sigreturn)
462 call sys_rt_sigreturn
463 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
465 jmp int_ret_from_sys_call
470 * Interrupt entry/exit.
472 * Interrupt entry points save only callee clobbered registers in fast path.
474 * Entry runs with interrupts off.
477 /* 0(%rsp): interrupt number */
478 .macro interrupt func
480 CFI_DEF_CFA rsp,(SS-RDI)
481 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
482 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
484 #ifdef CONFIG_DEBUG_INFO
488 * Setup a stack frame pointer. This allows gdb to trace
489 * back to the original stack.
492 CFI_DEF_CFA_REGISTER rbp
495 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
497 #if 0 /* For Xen we don't need to do this */
502 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
503 movq %gs:pda_irqstackptr,%rax
505 pushq %rdi # save old stack
510 movl threadinfo_flags(%rcx),%edx
515 testb $3,8(%rsp) # check CS
523 /* edi: workmask, edx: work */
525 bt $TIF_NEED_RESCHED,%edx
527 XEN_GET_VCPU_INFO(%rsi)
528 XEN_UNBLOCK_EVENTS(%rsi)
533 XEN_GET_VCPU_INFO(%rsi)
534 XEN_BLOCK_EVENTS(%rsi)
535 GET_THREAD_INFO(%rcx)
540 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
541 jz retint_restore_args
542 XEN_GET_VCPU_INFO(%rsi)
543 XEN_UNBLOCK_EVENTS(%rsi)
545 movq $-1,ORIG_RAX(%rsp)
546 xorq %rsi,%rsi # oldset
547 movq %rsp,%rdi # &pt_regs
548 call do_notify_resume
550 XEN_GET_VCPU_INFO(%rsi)
551 XEN_BLOCK_EVENTS(%rsi)
552 movl $_TIF_NEED_RESCHED,%edi
553 GET_THREAD_INFO(%rcx)
556 #ifdef CONFIG_PREEMPT
557 /* Returning to kernel space. Check if we need preemption */
558 /* rcx: threadinfo. interrupts off. */
561 cmpl $0,threadinfo_preempt_count(%rcx)
562 jnz retint_restore_args
563 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
564 jnc retint_restore_args
565 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
566 jc retint_restore_args
567 movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
569 XEN_GET_VCPU_INFO(%rsi)
570 XEN_UNBLOCK_EVENTS(%rsi)
572 XEN_GET_VCPU_INFO(%rsi) /* %esi can be different */
573 XEN_BLOCK_EVENTS(%rsi)
575 GET_THREAD_INFO(%rcx)
576 movl $0,threadinfo_preempt_count(%rcx)
577 jmp retint_kernel /* check again */
584 .macro apicinterrupt num,func
592 ENTRY(reschedule_interrupt)
593 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
595 ENTRY(invalidate_interrupt)
596 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
598 ENTRY(call_function_interrupt)
599 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
602 #ifdef CONFIG_X86_LOCAL_APIC
603 ENTRY(apic_timer_interrupt)
604 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
606 ENTRY(error_interrupt)
607 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
609 ENTRY(spurious_interrupt)
610 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
614 * Exception entry points.
619 addq $0x10,%rsp /* skip rcx and r11 */
620 pushq $0 /* push error code/oldrax */
621 pushq %rax /* push real oldrax to the rdi slot */
626 .macro errorentry sym
629 addq $0x18,%rsp /* rsp points to the error code */
635 /* error code is on the stack already */
636 /* handle NMI like exceptions that can happen everywhere */
637 .macro paranoidentry sym
640 addq $0x10,%rsp /* skip rcx and r11 */
644 movl $MSR_GS_BASE,%ecx
651 movq ORIG_RAX(%rsp),%rsi
652 movq $-1,ORIG_RAX(%rsp)
657 * Exception entry point. This expects an error code/orig_rax on the stack
658 * and the exception handler in %rax.
662 CFI_DEF_CFA rsp,(SS-RDI)
663 CFI_REL_OFFSET rsp,(RSP-RDI)
664 CFI_REL_OFFSET rip,(RIP-RDI)
665 /* rdi slot contains rax, oldrax contains error code */
668 CFI_ADJUST_CFA_OFFSET (14*8)
670 CFI_REL_OFFSET rsi,RSI
671 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
673 CFI_REL_OFFSET rdx,RDX
675 CFI_REL_OFFSET rcx,RCX
676 movq %rsi,10*8(%rsp) /* store rax */
677 CFI_REL_OFFSET rax,RAX
683 CFI_REL_OFFSET r10,R10
685 CFI_REL_OFFSET r11,R11
687 CFI_REL_OFFSET rbx,RBX
689 CFI_REL_OFFSET rbp,RBP
691 CFI_REL_OFFSET r12,R12
693 CFI_REL_OFFSET r13,R13
695 CFI_REL_OFFSET r14,R14
697 CFI_REL_OFFSET r15,R15
699 cmpl $__KERNEL_CS,CS(%rsp)
705 movq ORIG_RAX(%rsp),%rsi # get error code
706 movq $-1,ORIG_RAX(%rsp)
707 leaq do_hypervisor_callback,%rcx
709 je 0f # don't save event mask for callbacks
710 XEN_GET_VCPU_INFO(%r11)
711 XEN_SAVE_UPCALL_MASK(%r11,%cl,EVENT_MASK)
715 movb EVENT_MASK(%rsp), %al
716 notb %al # %al == ~saved_mask
717 XEN_LOCK_VCPU_INFO_SMP(%rsi)
718 andb evtchn_upcall_mask(%rsi),%al
719 andb $1,%al # %al == mask & ~saved_mask
720 jnz restore_all_enable_events # != 0 => reenable event delivery
721 XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
725 GET_THREAD_INFO(%rcx)
726 testb $3,CS-REST_SKIP(%rsp)
728 movl threadinfo_flags(%rcx),%edx
729 movl $_TIF_WORK_MASK,%edi
738 * We need to re-write the logic here because we don't do iretq to
739 * to return to user mode. It's still possible that we get trap/fault
740 * in the kernel (when accessing buffers pointed to by system calls,
746 /* There are two places in the kernel that can potentially fault with
747 usergs. Handle them here. The exception handlers after
748 iret run with kernel gs again, so don't set the user space flag.
749 B stepping K8s sometimes report an truncated RIP for IRET
750 exceptions returning to compat mode. Check for these here too. */
751 leaq iret_label(%rip),%rbp
754 movl %ebp,%ebp /* zero extend */
757 cmpq $gs_change,RIP(%rsp)
762 ENTRY(hypervisor_callback)
763 zeroentry do_hypervisor_callback
766 * Copied from arch/xen/i386/kernel/entry.S
768 # A note on the "critical region" in our callback handler.
769 # We want to avoid stacking callback handlers due to events occurring
770 # during handling of the last event. To do this, we keep events disabled
771 # until we've done all processing. HOWEVER, we must enable events before
772 # popping the stack frame (can't be done atomically) and so it would still
773 # be possible to get enough handler activations to overflow the stack.
774 # Although unlikely, bugs of that kind are hard to track down, so we'd
775 # like to avoid the possibility.
776 # So, on entry to the handler we detect whether we interrupted an
777 # existing activation in its critical region -- if so, we pop the current
778 # activation and restart the handler using the previous one.
780 ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs)
781 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
782 # see the correct pointer to the pt_regs
783 addq $8, %rsp # we don't return, adjust the stack frame
788 jb critical_region_fixup
789 11: movb $0, EVENT_MASK(%rsp)
790 call evtchn_do_upcall
791 jmp error_check_event
794 restore_all_enable_events:
795 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
796 scrit: /**** START OF CRITICAL REGION ****/
797 XEN_TEST_PENDING(%rsi)
798 jnz 14f # process more events if necessary...
799 XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
802 testb $3,8(%rsp) # check CS
809 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
810 XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
811 movq %rsp,%rdi # set the argument again
813 ecrit: /**** END OF CRITICAL REGION ****/
814 # [How we do the fixup]. We want to merge the current stack frame with the
815 # just-interrupted frame. How we do this depends on where in the critical
816 # region the interrupted handler was executing, and so how many saved
817 # registers are in each frame. We do this quickly using the lookup table
818 # 'critical_fixup_table'. For each byte offset in the critical region, it
819 # provides the number of bytes which have already been popped from the
820 # interrupted stack frame.
821 critical_region_fixup:
824 addq $critical_fixup_table,%rax
833 GET_THREAD_INFO(%rbp)
834 XEN_UNLOCK_VCPU_INFO_SMP(%r11)
842 shrq $3,%rcx # convert words to bytes
843 je 17f # skip loop if nothing to copy
844 16: subq $8,%rsi # pre-decrementing copy loop
849 17: movq %rdi,%rsp # final %edi is top of merged stack
852 critical_fixup_table:
853 .byte 0x00,0x00,0x00,0x00 # testb $0xff,0x0(%rsi)
854 .byte 0x00,0x00,0x00,0x00,0x00,0x00 # jne <crit_user_mode+0x42>
855 .byte 0x00,0x00,0x00,0x00 # mov (%rsp),%r15
856 .byte 0x00,0x00,0x00,0x00,0x00 # mov 0x8(%rsp),%r14
857 .byte 0x00,0x00,0x00,0x00,0x00 # mov 0x10(%rsp),%r13
858 .byte 0x00,0x00,0x00,0x00,0x00 # mov 0x18(%rsp),%r12
859 .byte 0x00,0x00,0x00,0x00,0x00 # mov 0x20(%rsp),%rbp
860 .byte 0x00,0x00,0x00,0x00,0x00 # mov 0x28(%rsp),%rbx
861 .byte 0x00,0x00,0x00,0x00 # add $0x30,%rsp
862 .byte 0x30,0x30,0x30,0x30 # mov (%rsp),%r11
863 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x8(%rsp),%r10
864 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x10(%rsp),%r9
865 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x18(%rsp),%r8
866 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x20(%rsp),%rax
867 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x28(%rsp),%rcx
868 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x30(%rsp),%rdx
869 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x38(%rsp),%rsi
870 .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x40(%rsp),%rdi
871 .byte 0x30,0x30,0x30,0x30 # add $0x50,%rsp
872 .byte 0x80,0x80,0x80,0x80,0x80 # testb $0x3,0x8(%rsp)
873 .byte 0x80,0x80 # jne ffffffff8010dc25 <crit_user_mode>
874 .byte 0x80,0x80,0x80,0x80 # orb $0x3,0x8(%rsp)
875 .byte 0x80,0x80 # iretq
877 .byte 0x80,0x80,0x80,0x80,0x80,0x80,0x80 # movq $0x0,%gs:0x60
878 .byte 0x80,0x80,0x80,0x80,0x80
879 .byte 0x80,0x80,0x80,0x80 # sub $0x20,%rsp
880 .byte 0x60,0x60,0x60,0x60 # mov %rax,(%rsp)
881 .byte 0x60,0x60,0x60,0x60,0x60 # mov %r11,0x8(%rsp)
882 .byte 0x60,0x60,0x60,0x60,0x60 # mov %rcx,0x10(%rsp)
883 .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60 # movq $0x0,0x18(%rsp)
885 .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60 # movq $0x33,0x28(%rsp)
887 .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60 # movq $0x2b,0x40(%rsp)
889 .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60 # mov $0x17,%rax
890 .byte 0x60,0x60 # syscall
891 .byte 0x60,0x60,0x60,0x60,0x60 # movb $0x1,0x1(%rsi)
892 .byte 0x60,0x60,0x60 # mov %rsp,%rdi
893 .byte 0x60,0x60,0x60,0x60,0x60 # jmpq <do_hypervisor_callback+0x20>
894 # Hypervisor uses this for application faults while it executes.
895 ENTRY(failsafe_callback)
904 jmp ret_from_exception
905 .section .fixup,"ax"; \
915 .section __ex_table,"a";\
923 .section __ex_table,"a"
925 .quad gs_change,bad_gs
928 /* running with kernelgs */
930 /* swapgs */ /* switch back to user gs */
937 * Create a kernel thread.
939 * C extern interface:
940 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
942 * asm input arguments:
943 * rdi: fn, rsi: arg, rdx: flags
947 FAKE_STACK_FRAME $child_rip
950 # rdi: flags, rsi: usp, rdx: will be &pt_regs
952 orq kernel_thread_flags(%rip),%rdi
965 * It isn't worth to check for reschedule here,
966 * so internally to the x86_64 port you can rely on kernel_thread()
967 * not to reschedule the child before returning, this avoids the need
968 * of hacks for example to fork off the per-CPU idle tasks.
969 * [Hopefully no generic code relies on the reschedule -AK]
979 * Here we are in the child and the registers are set as they were
980 * at kernel_thread() invocation in the parent.
990 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
992 * C extern interface:
993 * extern long execve(char *name, char **argv, char **envp)
995 * asm input arguments:
996 * rdi: name, rsi: argv, rdx: envp
998 * We want to fallback into:
999 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1001 * do_sys_execve asm fallback arguments:
1002 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1009 movq %rax, RAX(%rsp)
1013 jmp int_ret_from_sys_call
1021 * Copy error_entry because of the different stack frame
1026 addq $0x10,%rsp # now %rsp points to %cr2
1028 leaq do_page_fault(%rip),%rax
1031 movq %rdx,12*8(%rsp) # save %rdx
1032 movq 13*8(%rsp),%rdx # load rax
1033 movq %rcx,11*8(%rsp)
1034 movq %rdx,10*8(%rsp) # store rax
1035 movq %rsi,13*8(%rsp) # now save %rsi
1036 movq 14*8(%rsp),%rdx # load %cr2, 3rd argument
1048 cmpl $__KERNEL_CS,CS(%rsp)
1049 je error_kernelspace
1052 * 1st and 2nd arguments are set by error_call_handler
1054 jmp error_call_handler
1056 ENTRY(coprocessor_error)
1057 zeroentry do_coprocessor_error
1059 ENTRY(simd_coprocessor_error)
1060 zeroentry do_simd_coprocessor_error
1062 ENTRY(device_not_available)
1063 zeroentry math_state_restore
1065 /* runs on exception stack */
1069 CFI_ADJUST_CFA_OFFSET 8
1070 paranoidentry do_debug
1071 /* switch back to process stack to restore the state ptrace touched */
1077 /* runs on exception stack */
1081 CFI_ADJUST_CFA_OFFSET 8
1082 paranoidentry do_nmi
1083 /* ebx: no swapgs flag */
1086 testl %ebx,%ebx /* swapgs needed? */
1087 jnz paranoid_restore
1096 GET_THREAD_INFO(%rcx)
1097 movl threadinfo_flags(%rcx),%edx
1098 testl $_TIF_NEED_RESCHED,%edx
1099 jnz paranoid_resched
1100 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
1109 xorl %esi,%esi /* oldset */
1110 movq %rsp,%rdi /* &pt_regs */
1111 call do_notify_resume
1119 zeroentry do_overflow
1125 zeroentry do_invalid_op
1127 ENTRY(coprocessor_segment_overrun)
1128 zeroentry do_coprocessor_segment_overrun
1131 zeroentry do_reserved
1133 /* runs on exception stack */
1136 paranoidentry do_double_fault
1142 errorentry do_invalid_TSS
1144 ENTRY(segment_not_present)
1145 errorentry do_segment_not_present
1147 /* runs on exception stack */
1148 ENTRY(stack_segment)
1150 paranoidentry do_stack_segment
1155 ENTRY(general_protection)
1156 errorentry do_general_protection
1158 ENTRY(alignment_check)
1159 errorentry do_alignment_check
1162 zeroentry do_divide_error
1164 ENTRY(spurious_interrupt_bug)
1165 zeroentry do_spurious_interrupt_bug
1167 #ifdef CONFIG_X86_MCE
1168 /* runs on exception stack */
1169 ENTRY(machine_check)
1172 CFI_ADJUST_CFA_OFFSET 8
1173 paranoidentry do_machine_check
1179 zeroentry do_call_debug