X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Farm%2Fkernel%2Fentry-armv.S;h=e14278d59882367dc579f1dabeb5f2d191772ee2;hb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;hp=69f82e03dba0383ea7c0a749776673a79c4f25bb;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 69f82e03d..e14278d59 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -14,12 +14,12 @@ * it to save wrong values... Be aware! */ #include -#include -#include #include -#include #include +#include /* should be moved into entry-macro.S */ +#include /* should be moved into entry-macro.S */ +#include #include "entry-header.S" @@ -118,7 +118,7 @@ __dabt_svc: @ @ IRQs off again before pulling preserved data off the stack @ - disable_irq r0 + disable_irq @ @ restore SPSR and restart the instruction @@ -198,7 +198,7 @@ __und_svc: @ @ IRQs off again before pulling preserved data off the stack @ -1: disable_irq r0 +1: disable_irq @ @ restore SPSR and restart the instruction @@ -232,7 +232,7 @@ __pabt_svc: @ @ IRQs off again before pulling preserved data off the stack @ - disable_irq r0 + disable_irq @ @ restore SPSR and restart the instruction @@ -269,6 +269,12 @@ __pabt_svc: add r5, sp, #S_PC ldmia r7, {r2 - r4} @ Get USR pc, cpsr +#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) + @ make sure our user space atomic helper is aborted + cmp r2, #VIRT_OFFSET + bichs r3, r3, #PSR_Z_BIT +#endif + @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -280,13 +286,21 @@ __pabt_svc: @ stmia r5, {r2 - r4} stmdb r5, {sp, lr}^ + + @ + @ Enable the alignment trap while in kernel mode + @ + alignment_trap r7, r0, __temp_\sym + + @ + @ Clear FP to mark the first stack frame + @ + zero_fp .endm .align 5 __dabt_usr: usr_entry abt - alignment_trap r7, r0, __temp_abt - zero_fp @ @ Call the processor-specific abort handler: @@ -308,7 +322,7 @@ __dabt_usr: @ @ IRQs on, then call the main handler @ - enable_irq r2 + enable_irq mov r2, sp adr lr, ret_from_exception b do_DataAbort @@ -316,8 +330,7 @@ __dabt_usr: .align 5 __irq_usr: usr_entry irq - alignment_trap r7, r0, __temp_irq - zero_fp + #ifdef CONFIG_PREEMPT get_thread_info r8 ldr r9, [r8, #TI_PREEMPT] @ get preempt count @@ -348,8 +361,7 @@ __irq_usr: .align 5 __und_usr: usr_entry und - alignment_trap r7, r0, __temp_und - zero_fp + tst r3, #PSR_T_BIT @ Thumb mode? bne fpundefinstr @ ignore FP sub r4, r2, #4 @@ -412,7 +424,7 @@ call_fpe: movcss r7, r5, lsr #(TIF_USING_IWMMXT + 1) bcs iwmmxt_task_enable #endif - enable_irq r7 + enable_irq add pc, pc, r8, lsr #6 mov r0, r0 @@ -465,9 +477,8 @@ fpundefinstr: .align 5 __pabt_usr: usr_entry abt - alignment_trap r7, r0, __temp_abt - zero_fp - enable_irq r0 @ Enable interrupts + + enable_irq @ Enable interrupts mov r0, r2 @ address (pc) mov r1, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -494,8 +505,12 @@ ENTRY(__switch_to) mra r4, r5, acc0 stmia ip, {r4, r5} #endif +#if defined(CONFIG_HAS_TLS_REG) + mcr p15, 0, r3, c13, c0, 3 @ set TLS register +#elif !defined(CONFIG_TLS_REG_EMUL) mov r4, #0xffff0fff - str r3, [r4, #-3] @ Set TLS ptr + str r3, [r4, #-15] @ TLS val at 0xffff0ff0 +#endif mcr p15, 0, r6, c3, c0, 0 @ Set domain register #ifdef CONFIG_VFP @ Always disable VFP so we can lazily save/restore the old @@ -514,11 +529,215 @@ ENTRY(__switch_to) ldmib r2, {r4 - sl, fp, sp, pc} @ Load all regs saved previously __INIT + +/* + * User helpers. + * + * These are segment of kernel provided user code reachable from user space + * at a fixed address in kernel memory. This is used to provide user space + * with some operations which require kernel help because of unimplemented + * native feature and/or instructions in many ARM CPUs. The idea is for + * this code to be executed directly in user mode for best efficiency but + * which is too intimate with the kernel counter part to be left to user + * libraries. In fact this code might even differ from one CPU to another + * depending on the available instruction set and restrictions like on + * SMP systems. In other words, the kernel reserves the right to change + * this code as needed without warning. Only the entry points and their + * results are guaranteed to be stable. + * + * Each segment is 32-byte aligned and will be moved to the top of the high + * vector page. New segments (if ever needed) must be added in front of + * existing ones. This mechanism should be used only for things that are + * really small and justified, and not be abused freely. + * + * User space is expected to implement those things inline when optimizing + * for a processor that has the necessary native support, but only if such + * resulting binaries are already to be incompatible with earlier ARM + * processors due to the use of unsupported instructions other than what + * is provided here. In other words don't make binaries unable to run on + * earlier processors just for the sake of not using these kernel helpers + * if your compiled code is not going to use the new instructions for other + * purpose. + */ + + .align 5 + .globl __kuser_helper_start +__kuser_helper_start: + +/* + * Reference prototype: + * + * int __kernel_cmpxchg(int oldval, int newval, int *ptr) + * + * Input: + * + * r0 = oldval + * r1 = newval + * r2 = ptr + * lr = return address + * + * Output: + * + * r0 = returned value (zero or non-zero) + * C flag = set if r0 == 0, clear if r0 != 0 + * + * Clobbered: + * + * r3, ip, flags + * + * Definition and user space usage example: + * + * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr); + * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0) + * + * Atomically store newval in *ptr if *ptr is equal to oldval for user space. + * Return zero if *ptr was changed or non-zero if no exchange happened. + * The C flag is also set if *ptr was changed to allow for assembly + * optimization in the calling code. + * + * For example, a user space atomic_add implementation could look like this: + * + * #define atomic_add(ptr, val) \ + * ({ register unsigned int *__ptr asm("r2") = (ptr); \ + * register unsigned int __result asm("r1"); \ + * asm volatile ( \ + * "1: @ atomic_add\n\t" \ + * "ldr r0, [r2]\n\t" \ + * "mov r3, #0xffff0fff\n\t" \ + * "add lr, pc, #4\n\t" \ + * "add r1, r0, %2\n\t" \ + * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \ + * "bcc 1b" \ + * : "=&r" (__result) \ + * : "r" (__ptr), "rIL" (val) \ + * : "r0","r3","ip","lr","cc","memory" ); \ + * __result; }) + */ + +__kuser_cmpxchg: @ 0xffff0fc0 + +#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) + + /* + * Poor you. No fast solution possible... + * The kernel itself must perform the operation. + * A special ghost syscall is used for that (see traps.c). + */ + swi #0x9ffff0 + mov pc, lr + +#elif __LINUX_ARM_ARCH__ < 6 + + /* + * Theory of operation: + * + * We set the Z flag before loading oldval. If ever an exception + * occurs we can not be sure the loaded value will still be the same + * when the exception returns, therefore the user exception handler + * will clear the Z flag whenever the interrupted user code was + * actually from the kernel address space (see the usr_entry macro). + * + * The post-increment on the str is used to prevent a race with an + * exception happening just after the str instruction which would + * clear the Z flag although the exchange was done. + */ + teq ip, ip @ set Z flag + ldr ip, [r2] @ load current val + add r3, r2, #1 @ prepare store ptr + teqeq ip, r0 @ compare with oldval if still allowed + streq r1, [r3, #-1]! @ store newval if still allowed + subs r0, r2, r3 @ if r2 == r3 the str occured + mov pc, lr + +#else + + ldrex r3, [r2] + subs r3, r3, r0 + strexeq r3, r1, [r2] + rsbs r0, r3, #0 + mov pc, lr + +#endif + + .align 5 + +/* + * Reference prototype: + * + * int __kernel_get_tls(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * r0 = TLS value + * + * Clobbered: + * + * the Z flag might be lost + * + * Definition and user space usage example: + * + * typedef int (__kernel_get_tls_t)(void); + * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0) + * + * Get the TLS value as previously set via the __ARM_NR_set_tls syscall. + * + * This could be used as follows: + * + * #define __kernel_get_tls() \ + * ({ register unsigned int __val asm("r0"); \ + * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \ + * : "=r" (__val) : : "lr","cc" ); \ + * __val; }) + */ + +__kuser_get_tls: @ 0xffff0fe0 + +#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL) + + ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0 + mov pc, lr + +#else + + mrc p15, 0, r0, c13, c0, 3 @ read TLS register + mov pc, lr + +#endif + + .rep 5 + .word 0 @ pad up to __kuser_helper_version + .endr + +/* + * Reference declaration: + * + * extern unsigned int __kernel_helper_version; + * + * Definition and user space usage example: + * + * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc) + * + * User space may read this to determine the curent number of helpers + * available. + */ + +__kuser_helper_version: @ 0xffff0ffc + .word ((__kuser_helper_end - __kuser_helper_start) >> 5) + + .globl __kuser_helper_end +__kuser_helper_end: + + /* * Vector stubs. * - * This code is copied to 0x200 or 0xffff0200 so we can use branches in the - * vectors, rather than ldr's. + * This code is copied to 0xffff0200 so we can use branches in the + * vectors, rather than ldr's. Note that this code must not + * exceed 0x300 bytes. * * Common stub entry macro: * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC @@ -539,7 +758,7 @@ vector_\name: @ mrs r13, cpsr bic r13, r13, #MODE_MASK - orr r13, r13, #MODE_SVC + orr r13, r13, #SVC_MODE msr spsr_cxsf, r13 @ switch to SVC_32 mode and lr, lr, #15 @@ -547,6 +766,7 @@ vector_\name: movs pc, lr @ Changes mode and branches .endm + .globl __stubs_start __stubs_start: /* * Interrupt dispatcher @@ -681,37 +901,24 @@ vector_addrexcptn: .LCsabt: .word __temp_abt + .globl __stubs_end __stubs_end: - .equ __real_stubs_start, .LCvectors + 0x200 + .equ stubs_offset, __vectors_start + 0x200 - __stubs_start -.LCvectors: + .globl __vectors_start +__vectors_start: swi SYS_ERROR0 - b __real_stubs_start + (vector_und - __stubs_start) - ldr pc, __real_stubs_start + (.LCvswi - __stubs_start) - b __real_stubs_start + (vector_pabt - __stubs_start) - b __real_stubs_start + (vector_dabt - __stubs_start) - b __real_stubs_start + (vector_addrexcptn - __stubs_start) - b __real_stubs_start + (vector_irq - __stubs_start) - b __real_stubs_start + (vector_fiq - __stubs_start) - -ENTRY(__trap_init) - stmfd sp!, {r4 - r6, lr} - - mov r0, #0xff000000 - orr r0, r0, #0x00ff0000 @ high vectors position - adr r1, .LCvectors @ set up the vectors - ldmia r1, {r1, r2, r3, r4, r5, r6, ip, lr} - stmia r0, {r1, r2, r3, r4, r5, r6, ip, lr} - - add r2, r0, #0x200 - adr r0, __stubs_start @ copy stubs to 0x200 - adr r1, __stubs_end -1: ldr r3, [r0], #4 - str r3, [r2], #4 - cmp r0, r1 - blt 1b - LOADREGS(fd, sp!, {r4 - r6, pc}) + b vector_und + stubs_offset + ldr pc, .LCvswi + stubs_offset + b vector_pabt + stubs_offset + b vector_dabt + stubs_offset + b vector_addrexcptn + stubs_offset + b vector_irq + stubs_offset + b vector_fiq + stubs_offset + + .globl __vectors_end +__vectors_end: .data