X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fentry-xen.S;h=a1513e0c5f3176728393829340a4c308fd0219ef;hb=refs%2Fheads%2Fvserver;hp=45b7f7413299adf08412c0bf4d258cdaa708ca49;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/arch/i386/kernel/entry-xen.S b/arch/i386/kernel/entry-xen.S index 45b7f7413..a1513e0c5 100644 --- a/arch/i386/kernel/entry-xen.S +++ b/arch/i386/kernel/entry-xen.S @@ -30,12 +30,13 @@ * 18(%esp) - %eax * 1C(%esp) - %ds * 20(%esp) - %es - * 24(%esp) - orig_eax - * 28(%esp) - %eip - * 2C(%esp) - %cs - * 30(%esp) - %eflags - * 34(%esp) - %oldesp - * 38(%esp) - %oldss + * 24(%esp) - %gs + * 28(%esp) - orig_eax + * 2C(%esp) - %eip + * 30(%esp) - %cs + * 34(%esp) - %eflags + * 38(%esp) - %oldesp + * 3C(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -48,27 +49,25 @@ #include #include #include +#include #include #include "irq_vectors.h" #include -#define nr_syscalls ((syscall_table_size)/4) +/* + * We use macros for low-level operations which need to be overridden + * for paravirtualization. The following will never clobber any registers: + * INTERRUPT_RETURN (aka. "iret") + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). + * + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). + * Allowing a register to be clobbered can shrink the paravirt replacement + * enough to patch inline, increasing performance. + */ -EBX = 0x00 -ECX = 0x04 -EDX = 0x08 -ESI = 0x0C -EDI = 0x10 -EBP = 0x14 -EAX = 0x18 -DS = 0x1C -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C -EFLAGS = 0x30 -OLDESP = 0x34 -OLDSS = 0x38 +#define nr_syscalls ((syscall_table_size)/4) CF_MASK = 0x00000001 TF_MASK = 0x00000100 @@ -79,10 +78,7 @@ VM_MASK = 0x00020000 /* Pseudo-eflags. */ NMI_MASK = 0x80000000 -#ifndef CONFIG_XEN -#define DISABLE_INTERRUPTS cli -#define ENABLE_INTERRUPTS sti -#else +#ifdef CONFIG_XEN /* Offsets into shared_info_t. */ #define evtchn_upcall_pending /* 0 */ #define evtchn_upcall_mask 1 @@ -90,7 +86,7 @@ NMI_MASK = 0x80000000 #define sizeof_vcpu_shift 6 #ifdef CONFIG_SMP -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ +#define GET_VCPU_INFO movl %gs:PDA_cpu,%esi ; \ shl $sizeof_vcpu_shift,%esi ; \ addl HYPERVISOR_shared_info,%esi #else @@ -99,23 +95,19 @@ NMI_MASK = 0x80000000 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) -#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ - __DISABLE_INTERRUPTS -#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ - __ENABLE_INTERRUPTS #define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) #endif #ifdef CONFIG_PREEMPT -#define preempt_stop cli; TRACE_IRQS_OFF +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else -#define preempt_stop +#define preempt_stop(clobbers) #define resume_kernel restore_nocheck #endif .macro TRACE_IRQS_IRET #ifdef CONFIG_TRACE_IRQFLAGS - testl $IF_MASK,EFLAGS(%esp) # interrupts off? + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? jz 1f TRACE_IRQS_ON 1: @@ -130,6 +122,9 @@ NMI_MASK = 0x80000000 #define SAVE_ALL \ cld; \ + pushl %gs; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET gs, 0;*/\ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ /*CFI_REL_OFFSET es, 0;*/\ @@ -159,7 +154,9 @@ NMI_MASK = 0x80000000 CFI_REL_OFFSET ebx, 0;\ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ - movl %edx, %es; + movl %edx, %es; \ + movl $(__KERNEL_PDA), %edx; \ + movl %edx, %gs #define RESTORE_INT_REGS \ popl %ebx; \ @@ -192,44 +189,52 @@ NMI_MASK = 0x80000000 2: popl %es; \ CFI_ADJUST_CFA_OFFSET -4;\ /*CFI_RESTORE es;*/\ -.section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ +3: popl %gs; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE gs;*/\ +.pushsection .fixup,"ax"; \ 4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ jmp 2b; \ -.previous; \ +6: movl $0,(%esp); \ + jmp 3b; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ -.previous + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.popsection #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 3*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_EC_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 4*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_PTREGS_FRAME \ CFI_STARTPROC simple;\ - CFI_DEF_CFA esp, OLDESP-EBX;\ - /*CFI_OFFSET cs, CS-OLDESP;*/\ - CFI_OFFSET eip, EIP-OLDESP;\ - /*CFI_OFFSET es, ES-OLDESP;*/\ - /*CFI_OFFSET ds, DS-OLDESP;*/\ - CFI_OFFSET eax, EAX-OLDESP;\ - CFI_OFFSET ebp, EBP-OLDESP;\ - CFI_OFFSET edi, EDI-OLDESP;\ - CFI_OFFSET esi, ESI-OLDESP;\ - CFI_OFFSET edx, EDX-OLDESP;\ - CFI_OFFSET ecx, ECX-OLDESP;\ - CFI_OFFSET ebx, EBX-OLDESP + CFI_SIGNAL_FRAME;\ + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ + CFI_OFFSET ebx, PT_EBX-PT_OLDESP ENTRY(ret_from_fork) CFI_STARTPROC @@ -257,16 +262,18 @@ ENTRY(ret_from_fork) ALIGN RING0_PTREGS_FRAME ret_from_exception: - preempt_stop + preempt_stop(CLBR_ANY) ret_from_intr: GET_THREAD_INFO(%ebp) check_userspace: - movl EFLAGS(%esp), %eax # mix EFLAGS and CS - movb CS(%esp), %al - testl $(VM_MASK | 2), %eax - jz resume_kernel + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace + ENTRY(resume_userspace) - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -277,14 +284,14 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cli + DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq jmp need_resched @@ -297,6 +304,7 @@ need_resched: # sysenter call handler stub ENTRY(sysenter_entry) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA esp, 0 CFI_REGISTER esp, ebp movl SYSENTER_stack_esp0(%esp),%esp @@ -305,7 +313,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS(CLBR_NONE) pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -317,12 +325,16 @@ sysenter_past_esp: pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET cs, 0*/ +#ifndef CONFIG_COMPAT_VDSO /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) +#else + pushl $SYSENTER_RETURN +#endif CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 @@ -344,28 +356,29 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys call *sys_call_table(,%eax,4) - movl %eax,EAX(%esp) - DISABLE_INTERRUPTS + movl %eax,PT_EAX(%esp) + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work /* if something modifies registers it must also disable sysexit */ - movl EIP(%esp), %edx - movl OLDESP(%esp), %ecx + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON +1: mov PT_GS(%esp), %gs #ifdef CONFIG_XEN __ENABLE_INTERRUPTS sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ __TEST_PENDING jnz 14f # process more events if necessary... - movl ESI(%esp), %esi + movl PT_ESI(%esp), %esi sysexit 14: __DISABLE_INTERRUPTS sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ @@ -376,11 +389,16 @@ sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ CFI_ADJUST_CFA_OFFSET -4 jmp ret_from_intr #else - sti - sysexit + ENABLE_INTERRUPTS_SYSEXIT #endif /* !CONFIG_XEN */ CFI_ENDPROC - +.pushsection .fixup,"ax" +2: movl $0,PT_GS(%esp) + jmp 1b +.section __ex_table,"a" + .align 4 + .long 1b,2b +.popsection # system call handler stub ENTRY(system_call) @@ -389,21 +407,21 @@ ENTRY(system_call) CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) - testl $TF_MASK,EFLAGS(%esp) + testl $TF_MASK,PT_EFLAGS(%esp) jz no_singlestep orl $_TIF_SINGLESTEP,TI_flags(%ebp) no_singlestep: # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) - movl %eax,EAX(%esp) # store the return value + movl %eax,PT_EAX(%esp) # store the return value syscall_exit: - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -413,19 +431,20 @@ syscall_exit: restore_all: #ifndef CONFIG_XEN - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS - # Warning: OLDSS(%esp) contains the wrong/random values if we + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we # are returning to the kernel. # See comments in process.c:copy_thread() for details. - movb OLDSS(%esp), %ah - movb CS(%esp), %al - andl $(VM_MASK | (4 << 8) | 3), %eax - cmpl $((4 << 8) | 3), %eax + movb PT_OLDSS(%esp), %ah + movb PT_CS(%esp), %al + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax + CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: #else restore_nocheck: - movl EFLAGS(%esp), %eax + movl PT_EFLAGS(%esp), %eax testl $(VM_MASK|NMI_MASK), %eax jnz hypervisor_iret shr $9, %eax # EAX[0] == IRET_EFLAGS.IF @@ -433,19 +452,19 @@ restore_nocheck: andb evtchn_upcall_mask(%esi),%al andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask jnz restore_all_enable_events # != 0 => enable event delivery + CFI_REMEMBER_STATE #endif TRACE_IRQS_IRET - CFI_REMEMBER_STATE restore_nocheck_notrace: RESTORE_REGS - addl $4, %esp + addl $4, %esp # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 -1: iret +1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: #ifndef CONFIG_XEN TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) #endif pushl $0 # no error code pushl $do_iret_error @@ -459,36 +478,45 @@ iret_exc: CFI_RESTORE_STATE #ifndef CONFIG_XEN ldt_ss: - larl OLDSS(%esp), %eax + larl PT_OLDSS(%esp), %eax jnz restore_nocheck testl $0x00400000, %eax # returning to 32bit stack? jnz restore_nocheck # allright, normal return + +#ifdef CONFIG_PARAVIRT + /* + * The kernel can't run on a non-flat stack if paravirt mode + * is active. Rather than try to fixup the high bits of + * ESP, bypass this code entirely. This may break DOSemu + * and/or Wine support in a paravirt VM, although the option + * is still available to implement the setting of the high + * 16-bits in the INTERRUPT_RETURN paravirt-op. + */ + cmpl $0, paravirt_ops+PARAVIRT_enabled + jne restore_nocheck +#endif + /* If returning to userspace with 16bit stack, * try to fix the higher word of ESP, as the CPU * won't restore it. * This is an "official" bug of all the x86-compatible * CPUs, which we can try to work around to make * dosemu and wine happy. */ - subl $8, %esp # reserve space for switch16 pointer - CFI_ADJUST_CFA_OFFSET 8 - cli + movl PT_OLDESP(%esp), %eax + movl %esp, %edx + call patch_espfix_desc + pushl $__ESPFIX_SS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + DISABLE_INTERRUPTS(CLBR_EAX) TRACE_IRQS_OFF - movl %esp, %eax - /* Set up the 16bit stack frame with switch32 pointer on top, - * and a switch16 pointer on top of the current frame. */ - call setup_x86_bogus_stack - CFI_ADJUST_CFA_OFFSET -8 # frame has moved - TRACE_IRQS_IRET - RESTORE_REGS - lss 20+4(%esp), %esp # switch to 16bit stack -1: iret -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 + jmp restore_nocheck #else hypervisor_iret: - andl $~NMI_MASK, EFLAGS(%esp) + andl $~NMI_MASK, PT_EFLAGS(%esp) RESTORE_REGS addl $4, %esp jmp hypercall_page + (__HYPERVISOR_iret * 32) @@ -503,7 +531,7 @@ work_pending: jz work_notifysig work_resched: call schedule - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -516,7 +544,8 @@ work_resched: work_notifysig: # deal with pending signals and # notify-resume requests - testl $VM_MASK, EFLAGS(%esp) +#ifdef CONFIG_VM86 + testl $VM_MASK, PT_EFLAGS(%esp) movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space @@ -526,29 +555,27 @@ work_notifysig: # deal with pending signals and ALIGN work_notifysig_v86: -#ifdef CONFIG_VM86 pushl %ecx # save ti_flags for do_notify_resume CFI_ADJUST_CFA_OFFSET 4 call save_v86_state # %eax contains pt_regs pointer popl %ecx CFI_ADJUST_CFA_OFFSET -4 movl %eax, %esp +#else + movl %esp, %eax +#endif xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig -#endif # perform syscall exit tracing ALIGN syscall_trace_entry: - movl $-ENOSYS,EAX(%esp) + movl $-ENOSYS,PT_EAX(%esp) movl %esp, %eax xorl %edx,%edx call do_syscall_trace - cmpl $0, %eax - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, - # so must skip actual syscall - movl ORIG_EAX(%esp), %eax + movl PT_ORIG_EAX(%esp), %eax cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit @@ -559,7 +586,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending TRACE_IRQS_ON - ENABLE_INTERRUPTS # could let do_syscall_trace() call + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -573,40 +600,39 @@ syscall_fault: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) - movl $-EFAULT,EAX(%esp) + movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace syscall_badsys: - movl $-ENOSYS,EAX(%esp) + movl $-ENOSYS,PT_EAX(%esp) jmp resume_userspace CFI_ENDPROC #ifndef CONFIG_XEN #define FIXUP_ESPFIX_STACK \ - movl %esp, %eax; \ - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ - /* copy data from 16bit stack to 32bit stack */ \ - call fixup_x86_bogus_stack; \ - /* put ESP to the proper location */ \ - movl %eax, %esp; -#define UNWIND_ESPFIX_STACK \ + /* since we are on a wrong stack, we cant make it a C code :( */ \ + movl %gs:PDA_cpu, %ebx; \ + PER_CPU(cpu_gdt_descr, %ebx); \ + movl GDS_address(%ebx), %ebx; \ + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ + addl %esp, %eax; \ + pushl $__KERNEL_DS; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl %eax; \ CFI_ADJUST_CFA_OFFSET 4; \ + lss (%esp), %esp; \ + CFI_ADJUST_CFA_OFFSET -8; +#define UNWIND_ESPFIX_STACK \ movl %ss, %eax; \ - /* see if on 16bit stack */ \ + /* see if on espfix stack */ \ cmpw $__ESPFIX_SS, %ax; \ - je 28f; \ -27: popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4; \ -.section .fixup,"ax"; \ -28: movl $__KERNEL_DS, %eax; \ + jne 27f; \ + movl $__KERNEL_DS, %eax; \ movl %eax, %ds; \ movl %eax, %es; \ - /* switch to 32bit stack */ \ + /* switch to normal stack */ \ FIXUP_ESPFIX_STACK; \ - jmp 27b; \ -.previous +27:; /* * Build the entry stubs and pointer table with @@ -648,7 +674,6 @@ common_interrupt: #define BUILD_INTERRUPT(name, nr) \ ENTRY(name) \ - SAVE_ALL \ RING0_INT_FRAME; \ pushl $~(nr); \ CFI_ADJUST_CFA_OFFSET 4; \ @@ -665,21 +690,22 @@ ENTRY(name) \ #define UNWIND_ESPFIX_STACK #endif -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error +KPROBE_ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: + /* the function address is in %gs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ pushl %ds CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ds, 0*/ pushl %eax CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eax, 0 - xorl %eax, %eax pushl %ebp CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebp, 0 @@ -692,7 +718,6 @@ error_code: pushl %edx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET edx, 0 - decl %eax # eax = -1 pushl %ecx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ecx, 0 @@ -700,18 +725,20 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %es + pushl %gs CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ + /*CFI_REL_OFFSET gs, 0*/ + movl $(__KERNEL_PDA), %ecx + movl %ecx, %gs UNWIND_ESPFIX_STACK popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ - movl ES(%esp), %edi # get the function address - movl ORIG_EAX(%esp), %edx # get the error code - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - /*CFI_REL_OFFSET es, ES*/ + movl PT_GS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_GS(%esp) + /*CFI_REL_OFFSET gs, ES*/ movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es @@ -719,6 +746,7 @@ error_code: call *%edi jmp ret_from_exception CFI_ENDPROC +KPROBE_END(page_fault) #ifdef CONFIG_XEN # A note on the "critical region" in our callback handler. @@ -742,7 +770,7 @@ ENTRY(hypervisor_callback) pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - movl EIP(%esp),%eax + movl PT_EIP(%esp),%eax cmpl $scrit,%eax jb 11f cmpl $ecrit,%eax @@ -752,7 +780,7 @@ ENTRY(hypervisor_callback) cmpl $sysexit_ecrit,%eax ja 11f # interrupted in sysexit critical - addl $0x34,%esp # Remove cs...ebx from stack frame. + addl $0x38,%esp # Remove cs...ebx from stack frame. # this popped off new frame to reuse the old one, therefore no # CFI_ADJUST_CFA_OFFSET here 11: push %esp @@ -796,7 +824,7 @@ critical_region_fixup: 15: mov %esp,%esi add %eax,%esi # %esi points at end of src region mov %esp,%edi - add $0x34,%edi # %edi points at end of dst region + add $0x38,%edi # %edi points at end of dst region mov %eax,%ecx shr $2,%ecx # convert words to bytes je 17f # skip loop if nothing to copy @@ -823,8 +851,9 @@ critical_fixup_table: .byte 0x18 # pop %eax .byte 0x1c # pop %ds .byte 0x20 # pop %es - .byte 0x24,0x24,0x24 # add $4,%esp - .byte 0x28 # iret + .byte 0x24 # pop %gs + .byte 0x28,0x28,0x28 # add $4,%esp + .byte 0x2c # iret .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) .byte 0x00,0x00 # jmp 11b @@ -909,7 +938,7 @@ ENTRY(device_not_available) CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL #ifndef CONFIG_XEN - movl %cr0, %eax + GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) je device_available_emulate pushl $0 # temporary storage for ORIG_EIP @@ -920,7 +949,7 @@ ENTRY(device_not_available) jmp ret_from_exception device_available_emulate: #endif - preempt_stop + preempt_stop(CLBR_ANY) call math_state_restore jmp ret_from_exception CFI_ENDPROC @@ -944,9 +973,15 @@ device_available_emulate: jne ok; \ label: \ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl $__KERNEL_CS; \ - pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 #endif /* CONFIG_XEN */ KPROBE_ENTRY(debug) @@ -965,7 +1000,7 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(debug) #ifndef CONFIG_XEN /* @@ -976,7 +1011,7 @@ debug_stack_correct: * check whether we got an NMI on the debug path where the debug * fault happened on the sysenter path. */ -ENTRY(nmi) +KPROBE_ENTRY(nmi) RING0_INT_FRAME pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -984,7 +1019,7 @@ ENTRY(nmi) cmpw $__ESPFIX_SS, %ax popl %eax CFI_ADJUST_CFA_OFFSET -4 - je nmi_16bit_stack + je nmi_espfix_stack cmpl $sysenter_entry,(%esp) je nmi_stack_fixup pushl %eax @@ -1001,6 +1036,7 @@ ENTRY(nmi) cmpl $sysenter_entry,12(%esp) je nmi_debug_stack_check nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL @@ -1011,9 +1047,12 @@ nmi_stack_correct: CFI_ENDPROC nmi_stack_fixup: + RING0_INT_FRAME FIX_STACK(12,nmi_stack_correct, 1) jmp nmi_stack_correct + nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct cmpl $debug,(%esp) @@ -1023,14 +1062,15 @@ nmi_debug_stack_check: FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct -nmi_16bit_stack: - RING0_INT_FRAME - /* create the pointer to lss back */ +nmi_espfix_stack: + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ pushl %ss CFI_ADJUST_CFA_OFFSET 4 pushl %esp CFI_ADJUST_CFA_OFFSET 4 - movzwl %sp, %esp addw $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 @@ -1041,19 +1081,20 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS - lss 12+4(%esp), %esp # back to 16bit stack -1: iret + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 +1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" .align 4 .long 1b,iret_exc .previous +KPROBE_END(nmi) #else -ENTRY(nmi) +KPROBE_ENTRY(nmi) RING0_INT_FRAME pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -1061,9 +1102,23 @@ ENTRY(nmi) xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - orl $NMI_MASK, EFLAGS(%esp) + orl $NMI_MASK, PT_EFLAGS(%esp) jmp restore_all CFI_ENDPROC +KPROBE_END(nmi) +#endif + +#ifdef CONFIG_PARAVIRT +ENTRY(native_iret) +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + +ENTRY(native_irq_enable_sysexit) + sti + sysexit #endif KPROBE_ENTRY(int3) @@ -1076,7 +1131,7 @@ KPROBE_ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(int3) ENTRY(overflow) RING0_INT_FRAME @@ -1141,7 +1196,7 @@ KPROBE_ENTRY(general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text +KPROBE_END(general_protection) ENTRY(alignment_check) RING0_EC_FRAME @@ -1150,13 +1205,14 @@ ENTRY(alignment_check) jmp error_code CFI_ENDPROC -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault +ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_divide_error CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) @@ -1176,36 +1232,18 @@ ENTRY(fixup_4gb_segment) jmp error_code CFI_ENDPROC -#ifdef CONFIG_STACK_UNWIND -ENTRY(arch_unwind_init_running) +ENTRY(kernel_thread_helper) + pushl $0 # fake return address for unwinder CFI_STARTPROC - movl 4(%esp), %edx - movl (%esp), %ecx - leal 4(%esp), %eax - movl %ebx, EBX(%edx) - xorl %ebx, %ebx - movl %ebx, ECX(%edx) - movl %ebx, EDX(%edx) - movl %esi, ESI(%edx) - movl %edi, EDI(%edx) - movl %ebp, EBP(%edx) - movl %ebx, EAX(%edx) - movl $__USER_DS, DS(%edx) - movl $__USER_DS, ES(%edx) - movl %ebx, ORIG_EAX(%edx) - movl %ecx, EIP(%edx) - movl 12(%esp), %ecx - movl $__KERNEL_CS, CS(%edx) - movl %ebx, EFLAGS(%edx) - movl %eax, OLDESP(%edx) - movl 8(%esp), %eax - movl %ecx, 8(%esp) - movl EBX(%edx), %ebx - movl $__KERNEL_DS, OLDSS(%edx) - jmpl *%eax + movl %edx,%eax + push %edx + CFI_ADJUST_CFA_OFFSET 4 + call *%ebx + push %eax + CFI_ADJUST_CFA_OFFSET 4 + call do_exit CFI_ENDPROC -ENDPROC(arch_unwind_init_running) -#endif +ENDPROC(kernel_thread_helper) .section .rodata,"a" #include "syscall_table.S"