vserver 2.0 rc7

[linux-2.6.git] / arch / arm / kernel / entry-armv.S
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S

index 69f82e0..e14278d 100644 (file)
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -14,12 +14,12 @@
   *  it to save wrong values...  Be aware!
   */
  #include <linux/config.h>
-#include <linux/init.h>
  
-#include <asm/thread_info.h>
  #include <asm/glue.h>
-#include <asm/ptrace.h>
  #include <asm/vfpmacros.h>
+#include <asm/hardware.h>              /* should be moved into entry-macro.S */
+#include <asm/arch/irqs.h>             /* should be moved into entry-macro.S */
+#include <asm/arch/entry-macro.S>
  
  #include "entry-header.S"
  
@@ -118,7 +118,7 @@ __dabt_svc:
         @
         @ IRQs off again before pulling preserved data off the stack
         @
-       disable_irq r0
+       disable_irq
  
         @
         @ restore SPSR and restart the instruction
@@ -198,7 +198,7 @@ __und_svc:
         @
         @ IRQs off again before pulling preserved data off the stack
         @
-1:     disable_irq r0
+1:     disable_irq
  
         @
         @ restore SPSR and restart the instruction
@@ -232,7 +232,7 @@ __pabt_svc:
         @
         @ IRQs off again before pulling preserved data off the stack
         @
-       disable_irq r0
+       disable_irq
  
         @
         @ restore SPSR and restart the instruction
@@ -269,6 +269,12 @@ __pabt_svc:
         add     r5, sp, #S_PC
         ldmia   r7, {r2 - r4}                   @ Get USR pc, cpsr
  
+#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+       @ make sure our user space atomic helper is aborted
+       cmp     r2, #VIRT_OFFSET
+       bichs   r3, r3, #PSR_Z_BIT
+#endif
+
         @
         @ We are now ready to fill in the remaining blanks on the stack:
         @
@@ -280,13 +286,21 @@ __pabt_svc:
         @
         stmia   r5, {r2 - r4}
         stmdb   r5, {sp, lr}^
+
+       @
+       @ Enable the alignment trap while in kernel mode
+       @
+       alignment_trap r7, r0, __temp_\sym
+
+       @
+       @ Clear FP to mark the first stack frame
+       @
+       zero_fp
         .endm
  
         .align  5
  __dabt_usr:
         usr_entry abt
-       alignment_trap r7, r0, __temp_abt
-       zero_fp
  
         @
         @ Call the processor-specific abort handler:
@@ -308,7 +322,7 @@ __dabt_usr:
         @
         @ IRQs on, then call the main handler
         @
-       enable_irq r2
+       enable_irq
         mov     r2, sp
         adr     lr, ret_from_exception
         b       do_DataAbort
@@ -316,8 +330,7 @@ __dabt_usr:
         .align  5
  __irq_usr:
         usr_entry irq
-       alignment_trap r7, r0, __temp_irq
-       zero_fp
+
  #ifdef CONFIG_PREEMPT
         get_thread_info r8
         ldr     r9, [r8, #TI_PREEMPT]           @ get preempt count
@@ -348,8 +361,7 @@ __irq_usr:
         .align  5
  __und_usr:
         usr_entry und
-       alignment_trap r7, r0, __temp_und
-       zero_fp
+
         tst     r3, #PSR_T_BIT                  @ Thumb mode?
         bne     fpundefinstr                    @ ignore FP
         sub     r4, r2, #4
@@ -412,7 +424,7 @@ call_fpe:
         movcss  r7, r5, lsr #(TIF_USING_IWMMXT + 1)
         bcs     iwmmxt_task_enable
  #endif
-       enable_irq r7
+       enable_irq
         add     pc, pc, r8, lsr #6
         mov     r0, r0
  
@@ -465,9 +477,8 @@ fpundefinstr:
         .align  5
  __pabt_usr:
         usr_entry abt
-       alignment_trap r7, r0, __temp_abt
-       zero_fp
-       enable_irq r0                           @ Enable interrupts
+
+       enable_irq                              @ Enable interrupts
         mov     r0, r2                          @ address (pc)
         mov     r1, sp                          @ regs
         bl      do_PrefetchAbort                @ call abort handler
@@ -494,8 +505,12 @@ ENTRY(__switch_to)
         mra     r4, r5, acc0
         stmia   ip, {r4, r5}
  #endif
+#if defined(CONFIG_HAS_TLS_REG)
+       mcr     p15, 0, r3, c13, c0, 3          @ set TLS register
+#elif !defined(CONFIG_TLS_REG_EMUL)
         mov     r4, #0xffff0fff
-       str     r3, [r4, #-3]                   @ Set TLS ptr
+       str     r3, [r4, #-15]                  @ TLS val at 0xffff0ff0
+#endif
         mcr     p15, 0, r6, c3, c0, 0           @ Set domain register
  #ifdef CONFIG_VFP
         @ Always disable VFP so we can lazily save/restore the old
@@ -514,11 +529,215 @@ ENTRY(__switch_to)
         ldmib   r2, {r4 - sl, fp, sp, pc}       @ Load all regs saved previously
  
         __INIT
+
+/*
+ * User helpers.
+ *
+ * These are segment of kernel provided user code reachable from user space
+ * at a fixed address in kernel memory.  This is used to provide user space
+ * with some operations which require kernel help because of unimplemented
+ * native feature and/or instructions in many ARM CPUs. The idea is for
+ * this code to be executed directly in user mode for best efficiency but
+ * which is too intimate with the kernel counter part to be left to user
+ * libraries.  In fact this code might even differ from one CPU to another
+ * depending on the available  instruction set and restrictions like on
+ * SMP systems.  In other words, the kernel reserves the right to change
+ * this code as needed without warning. Only the entry points and their
+ * results are guaranteed to be stable.
+ *
+ * Each segment is 32-byte aligned and will be moved to the top of the high
+ * vector page.  New segments (if ever needed) must be added in front of
+ * existing ones.  This mechanism should be used only for things that are
+ * really small and justified, and not be abused freely.
+ *
+ * User space is expected to implement those things inline when optimizing
+ * for a processor that has the necessary native support, but only if such
+ * resulting binaries are already to be incompatible with earlier ARM
+ * processors due to the use of unsupported instructions other than what
+ * is provided here.  In other words don't make binaries unable to run on
+ * earlier processors just for the sake of not using these kernel helpers
+ * if your compiled code is not going to use the new instructions for other
+ * purpose.
+ */
+
+       .align  5
+       .globl  __kuser_helper_start
+__kuser_helper_start:
+
+/*
+ * Reference prototype:
+ *
+ *     int __kernel_cmpxchg(int oldval, int newval, int *ptr)
+ *
+ * Input:
+ *
+ *     r0 = oldval
+ *     r1 = newval
+ *     r2 = ptr
+ *     lr = return address
+ *
+ * Output:
+ *
+ *     r0 = returned value (zero or non-zero)
+ *     C flag = set if r0 == 0, clear if r0 != 0
+ *
+ * Clobbered:
+ *
+ *     r3, ip, flags
+ *
+ * Definition and user space usage example:
+ *
+ *     typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
+ *     #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
+ *
+ * Atomically store newval in *ptr if *ptr is equal to oldval for user space.
+ * Return zero if *ptr was changed or non-zero if no exchange happened.
+ * The C flag is also set if *ptr was changed to allow for assembly
+ * optimization in the calling code.
+ *
+ * For example, a user space atomic_add implementation could look like this:
+ *
+ * #define atomic_add(ptr, val) \
+ *     ({ register unsigned int *__ptr asm("r2") = (ptr); \
+ *        register unsigned int __result asm("r1"); \
+ *        asm volatile ( \
+ *            "1: @ atomic_add\n\t" \
+ *            "ldr     r0, [r2]\n\t" \
+ *            "mov     r3, #0xffff0fff\n\t" \
+ *            "add     lr, pc, #4\n\t" \
+ *            "add     r1, r0, %2\n\t" \
+ *            "add     pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
+ *            "bcc     1b" \
+ *            : "=&r" (__result) \
+ *            : "r" (__ptr), "rIL" (val) \
+ *            : "r0","r3","ip","lr","cc","memory" ); \
+ *        __result; })
+ */
+
+__kuser_cmpxchg:                               @ 0xffff0fc0
+
+#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+
+       /*
+        * Poor you.  No fast solution possible...
+        * The kernel itself must perform the operation.
+        * A special ghost syscall is used for that (see traps.c).
+        */
+       swi     #0x9ffff0
+       mov     pc, lr
+
+#elif __LINUX_ARM_ARCH__ < 6
+
+       /*
+        * Theory of operation:
+        *
+        * We set the Z flag before loading oldval. If ever an exception
+        * occurs we can not be sure the loaded value will still be the same
+        * when the exception returns, therefore the user exception handler
+        * will clear the Z flag whenever the interrupted user code was
+        * actually from the kernel address space (see the usr_entry macro).
+        *
+        * The post-increment on the str is used to prevent a race with an
+        * exception happening just after the str instruction which would
+        * clear the Z flag although the exchange was done.
+        */
+       teq     ip, ip                  @ set Z flag
+       ldr     ip, [r2]                @ load current val
+       add     r3, r2, #1              @ prepare store ptr
+       teqeq   ip, r0                  @ compare with oldval if still allowed
+       streq   r1, [r3, #-1]!          @ store newval if still allowed
+       subs    r0, r2, r3              @ if r2 == r3 the str occured
+       mov     pc, lr
+
+#else
+
+       ldrex   r3, [r2]
+       subs    r3, r3, r0
+       strexeq r3, r1, [r2]
+       rsbs    r0, r3, #0
+       mov     pc, lr
+
+#endif
+
+       .align  5
+
+/*
+ * Reference prototype:
+ *
+ *     int __kernel_get_tls(void)
+ *
+ * Input:
+ *
+ *     lr = return address
+ *
+ * Output:
+ *
+ *     r0 = TLS value
+ *
+ * Clobbered:
+ *
+ *     the Z flag might be lost
+ *
+ * Definition and user space usage example:
+ *
+ *     typedef int (__kernel_get_tls_t)(void);
+ *     #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
+ *
+ * Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+ *
+ * This could be used as follows:
+ *
+ * #define __kernel_get_tls() \
+ *     ({ register unsigned int __val asm("r0"); \
+ *         asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
+ *             : "=r" (__val) : : "lr","cc" ); \
+ *        __val; })
+ */
+
+__kuser_get_tls:                               @ 0xffff0fe0
+
+#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
+
+       ldr     r0, [pc, #(16 - 8)]             @ TLS stored at 0xffff0ff0
+       mov     pc, lr
+
+#else
+
+       mrc     p15, 0, r0, c13, c0, 3          @ read TLS register
+       mov     pc, lr
+
+#endif
+
+       .rep    5
+       .word   0                       @ pad up to __kuser_helper_version
+       .endr
+
+/*
+ * Reference declaration:
+ *
+ *     extern unsigned int __kernel_helper_version;
+ *
+ * Definition and user space usage example:
+ *
+ *     #define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
+ *
+ * User space may read this to determine the curent number of helpers
+ * available.
+ */
+
+__kuser_helper_version:                                @ 0xffff0ffc
+       .word   ((__kuser_helper_end - __kuser_helper_start) >> 5)
+
+       .globl  __kuser_helper_end
+__kuser_helper_end:
+
+
  /*
   * Vector stubs.
   *
- * This code is copied to 0x200 or 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.
+ * This code is copied to 0xffff0200 so we can use branches in the
+ * vectors, rather than ldr's.  Note that this code must not
+ * exceed 0x300 bytes.
   *
   * Common stub entry macro:
   *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
@@ -539,7 +758,7 @@ vector_\name:
         @
         mrs     r13, cpsr
         bic     r13, r13, #MODE_MASK
-       orr     r13, r13, #MODE_SVC
+       orr     r13, r13, #SVC_MODE
         msr     spsr_cxsf, r13                  @ switch to SVC_32 mode
  
         and     lr, lr, #15
@@ -547,6 +766,7 @@ vector_\name:
         movs    pc, lr                          @ Changes mode and branches
         .endm
  
+       .globl  __stubs_start
  __stubs_start:
  /*
   * Interrupt dispatcher
@@ -681,37 +901,24 @@ vector_addrexcptn:
  .LCsabt:
         .word   __temp_abt
  
+       .globl  __stubs_end
  __stubs_end:
  
-       .equ    __real_stubs_start, .LCvectors + 0x200
+       .equ    stubs_offset, __vectors_start + 0x200 - __stubs_start
  
-.LCvectors:
+       .globl  __vectors_start
+__vectors_start:
         swi     SYS_ERROR0
-       b       __real_stubs_start + (vector_und - __stubs_start)
-       ldr     pc, __real_stubs_start + (.LCvswi - __stubs_start)
-       b       __real_stubs_start + (vector_pabt - __stubs_start)
-       b       __real_stubs_start + (vector_dabt - __stubs_start)
-       b       __real_stubs_start + (vector_addrexcptn - __stubs_start)
-       b       __real_stubs_start + (vector_irq - __stubs_start)
-       b       __real_stubs_start + (vector_fiq - __stubs_start)
-
-ENTRY(__trap_init)
-       stmfd   sp!, {r4 - r6, lr}
-
-       mov     r0, #0xff000000
-       orr     r0, r0, #0x00ff0000             @ high vectors position
-       adr     r1, .LCvectors                  @ set up the vectors
-       ldmia   r1, {r1, r2, r3, r4, r5, r6, ip, lr}
-       stmia   r0, {r1, r2, r3, r4, r5, r6, ip, lr}
-
-       add     r2, r0, #0x200
-       adr     r0, __stubs_start               @ copy stubs to 0x200
-       adr     r1, __stubs_end
-1:     ldr     r3, [r0], #4
-       str     r3, [r2], #4
-       cmp     r0, r1
-       blt     1b
-       LOADREGS(fd, sp!, {r4 - r6, pc})
+       b       vector_und + stubs_offset
+       ldr     pc, .LCvswi + stubs_offset
+       b       vector_pabt + stubs_offset
+       b       vector_dabt + stubs_offset
+       b       vector_addrexcptn + stubs_offset
+       b       vector_irq + stubs_offset
+       b       vector_fiq + stubs_offset
+
+       .globl  __vectors_end
+__vectors_end:
  
         .data