Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / arch / arm / lib / memcpy.S
index f5a593c..7e71d67 100644 (file)
 /*
  *  linux/arch/arm/lib/memcpy.S
  *
- *  Copyright (C) 1995-1999 Russell King
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: MontaVista Software, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
  */
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-               .text
-
-#define ENTER  \
-               mov     ip,sp   ;\
-               stmfd   sp!,{r0,r4-r9,fp,ip,lr,pc}      ;\
-               sub     fp,ip,#4
-
-#define EXIT   \
-               LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
-
-#define EXITEQ \
-               LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
-
-/*
- * Prototype: void memcpy(void *to,const void *from,unsigned long n);
- */
-ENTRY(memcpy)
-ENTRY(memmove)
-               ENTER
-               cmp     r1, r0
-               bcc     23f
-               subs    r2, r2, #4
-               blt     6f
-       PLD(    pld     [r1, #0]                )
-               ands    ip, r0, #3
-               bne     7f
-               ands    ip, r1, #3
-               bne     8f
+       .macro ldr1w ptr reg abort
+       ldr \reg, [\ptr], #4
+       .endm
 
-1:             subs    r2, r2, #8
-               blt     5f
-               subs    r2, r2, #20
-               blt     4f
-       PLD(    pld     [r1, #28]               )
-       PLD(    subs    r2, r2, #64             )
-       PLD(    blt     3f                      )
-2:     PLD(    pld     [r1, #60]               )
-       PLD(    pld     [r1, #92]               )
-               ldmia   r1!, {r3 - r9, ip}
-               subs    r2, r2, #32
-               stmgeia r0!, {r3 - r9, ip}
-               ldmgeia r1!, {r3 - r9, ip}
-               subges  r2, r2, #32
-               stmia   r0!, {r3 - r9, ip}
-               bge     2b
-3:     PLD(    ldmia   r1!, {r3 - r9, ip}      )
-       PLD(    adds    r2, r2, #32             )
-       PLD(    stmgeia r0!, {r3 - r9, ip}      )
-       PLD(    ldmgeia r1!, {r3 - r9, ip}      )
-       PLD(    subges  r2, r2, #32             )
-       PLD(    stmia   r0!, {r3 - r9, ip}      )
-4:             cmn     r2, #16
-               ldmgeia r1!, {r3 - r6}
-               subge   r2, r2, #16
-               stmgeia r0!, {r3 - r6}
-               adds    r2, r2, #20
-               ldmgeia r1!, {r3 - r5}
-               subge   r2, r2, #12
-               stmgeia r0!, {r3 - r5}
-5:             adds    r2, r2, #8
-               blt     6f
-               subs    r2, r2, #4
-               ldrlt   r3, [r1], #4
-               ldmgeia r1!, {r4, r5}
-               subge   r2, r2, #4
-               strlt   r3, [r0], #4
-               stmgeia r0!, {r4, r5}
+       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+       .endm
 
-6:             adds    r2, r2, #4
-               EXITEQ
-               cmp     r2, #2
-               ldrb    r3, [r1], #1
-               ldrgeb  r4, [r1], #1
-               ldrgtb  r5, [r1], #1
-               strb    r3, [r0], #1
-               strgeb  r4, [r0], #1
-               strgtb  r5, [r0], #1
-               EXIT
+       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+       .endm
 
-7:             rsb     ip, ip, #4
-               cmp     ip, #2
-               ldrb    r3, [r1], #1
-               ldrgeb  r4, [r1], #1
-               ldrgtb  r5, [r1], #1
-               strb    r3, [r0], #1
-               strgeb  r4, [r0], #1
-               strgtb  r5, [r0], #1
-               subs    r2, r2, ip
-               blt     6b
-               ands    ip, r1, #3
-               beq     1b
+       .macro ldr1b ptr reg cond=al abort
+       ldr\cond\()b \reg, [\ptr], #1
+       .endm
 
-8:             bic     r1, r1, #3
-               ldr     r7, [r1], #4
-               cmp     ip, #2
-               bgt     18f
-               beq     13f
-               cmp     r2, #12
-               blt     11f
-       PLD(    pld     [r1, #12]               )
-               sub     r2, r2, #12
-       PLD(    subs    r2, r2, #32             )
-       PLD(    blt     10f                     )
-       PLD(    pld     [r1, #28]               )
-9:     PLD(    pld     [r1, #44]               )
-10:            mov     r3, r7, pull #8
-               ldmia   r1!, {r4 - r7}
-               subs    r2, r2, #16
-               orr     r3, r3, r4, push #24
-               mov     r4, r4, pull #8
-               orr     r4, r4, r5, push #24
-               mov     r5, r5, pull #8
-               orr     r5, r5, r6, push #24
-               mov     r6, r6, pull #8
-               orr     r6, r6, r7, push #24
-               stmia   r0!, {r3 - r6}
-               bge     9b
-       PLD(    cmn     r2, #32                 )
-       PLD(    bge     10b                     )
-       PLD(    add     r2, r2, #32             )
-               adds    r2, r2, #12
-               blt     12f
-11:            mov     r3, r7, pull #8
-               ldr     r7, [r1], #4
-               subs    r2, r2, #4
-               orr     r3, r3, r7, push #24
-               str     r3, [r0], #4
-               bge     11b
-12:            sub     r1, r1, #3
-               b       6b
+       .macro str1w ptr reg abort
+       str \reg, [\ptr], #4
+       .endm
 
-13:            cmp     r2, #12
-               blt     16f
-       PLD(    pld     [r1, #12]               )
-               sub     r2, r2, #12
-       PLD(    subs    r2, r2, #32             )
-       PLD(    blt     15f                     )
-       PLD(    pld     [r1, #28]               )
-14:    PLD(    pld     [r1, #44]               )
-15:            mov     r3, r7, pull #16
-               ldmia   r1!, {r4 - r7}
-               subs    r2, r2, #16
-               orr     r3, r3, r4, push #16
-               mov     r4, r4, pull #16
-               orr     r4, r4, r5, push #16
-               mov     r5, r5, pull #16
-               orr     r5, r5, r6, push #16
-               mov     r6, r6, pull #16
-               orr     r6, r6, r7, push #16
-               stmia   r0!, {r3 - r6}
-               bge     14b
-       PLD(    cmn     r2, #32                 )
-       PLD(    bge     15b                     )
-       PLD(    add     r2, r2, #32             )
-               adds    r2, r2, #12
-               blt     17f
-16:            mov     r3, r7, pull #16
-               ldr     r7, [r1], #4
-               subs    r2, r2, #4
-               orr     r3, r3, r7, push #16
-               str     r3, [r0], #4
-               bge     16b
-17:            sub     r1, r1, #2
-               b       6b
+       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+       stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+       .endm
 
-18:            cmp     r2, #12
-               blt     21f
-       PLD(    pld     [r1, #12]               )
-               sub     r2, r2, #12
-       PLD(    subs    r2, r2, #32             )
-       PLD(    blt     20f                     )
-       PLD(    pld     [r1, #28]               )
-19:    PLD(    pld     [r1, #44]               )
-20:            mov     r3, r7, pull #24
-               ldmia   r1!, {r4 - r7}
-               subs    r2, r2, #16
-               orr     r3, r3, r4, push #8
-               mov     r4, r4, pull #24
-               orr     r4, r4, r5, push #8
-               mov     r5, r5, pull #24
-               orr     r5, r5, r6, push #8
-               mov     r6, r6, pull #24
-               orr     r6, r6, r7, push #8
-               stmia   r0!, {r3 - r6}
-               bge     19b
-       PLD(    cmn     r2, #32                 )
-       PLD(    bge     20b                     )
-       PLD(    add     r2, r2, #32             )
-               adds    r2, r2, #12
-               blt     22f
-21:            mov     r3, r7, pull #24
-               ldr     r7, [r1], #4
-               subs    r2, r2, #4
-               orr     r3, r3, r7, push #8
-               str     r3, [r0], #4
-               bge     21b
-22:            sub     r1, r1, #1
-               b       6b
+       .macro str1b ptr reg cond=al abort
+       str\cond\()b \reg, [\ptr], #1
+       .endm
 
+       .macro enter reg1 reg2
+       stmdb sp!, {r0, \reg1, \reg2}
+       .endm
 
-23:            add     r1, r1, r2
-               add     r0, r0, r2
-               subs    r2, r2, #4
-               blt     29f
-       PLD(    pld     [r1, #-4]               )
-               ands    ip, r0, #3
-               bne     30f
-               ands    ip, r1, #3
-               bne     31f
+       .macro exit reg1 reg2
+       ldmfd sp!, {r0, \reg1, \reg2}
+       .endm
 
-24:            subs    r2, r2, #8
-               blt     28f
-               subs    r2, r2, #20
-               blt     27f
-       PLD(    pld     [r1, #-32]              )
-       PLD(    subs    r2, r2, #64             )
-       PLD(    blt     26f                     )
-25:    PLD(    pld     [r1, #-64]              )
-       PLD(    pld     [r1, #-96]              )
-               ldmdb   r1!, {r3 - r9, ip}
-               subs    r2, r2, #32
-               stmgedb r0!, {r3 - r9, ip}
-               ldmgedb r1!, {r3 - r9, ip}
-               subges  r2, r2, #32
-               stmdb   r0!, {r3 - r9, ip}
-               bge     25b
-26:    PLD(    ldmdb   r1!, {r3 - r9, ip}      )
-       PLD(    adds    r2, r2, #32             )
-       PLD(    stmgedb r0!, {r3 - r9, ip}      )
-       PLD(    ldmgedb r1!, {r3 - r9, ip}      )
-       PLD(    subges  r2, r2, #32             )
-       PLD(    stmdb   r0!, {r3 - r9, ip}      )
-27:            cmn     r2, #16
-               ldmgedb r1!, {r3 - r6}
-               subge   r2, r2, #16
-               stmgedb r0!, {r3 - r6}
-               adds    r2, r2, #20
-               ldmgedb r1!, {r3 - r5}
-               subge   r2, r2, #12
-               stmgedb r0!, {r3 - r5}
-28:            adds    r2, r2, #8
-               blt     29f
-               subs    r2, r2, #4
-               ldrlt   r3, [r1, #-4]!
-               ldmgedb r1!, {r4, r5}
-               subge   r2, r2, #4
-               strlt   r3, [r0, #-4]!
-               stmgedb r0!, {r4, r5}
+       .text
 
-29:            adds    r2, r2, #4
-               EXITEQ
-               cmp     r2, #2
-               ldrb    r3, [r1, #-1]!
-               ldrgeb  r4, [r1, #-1]!
-               ldrgtb  r5, [r1, #-1]!
-               strb    r3, [r0, #-1]!
-               strgeb  r4, [r0, #-1]!
-               strgtb  r5, [r0, #-1]!
-               EXIT
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
-30:            cmp     ip, #2
-               ldrb    r3, [r1, #-1]!
-               ldrgeb  r4, [r1, #-1]!
-               ldrgtb  r5, [r1, #-1]!
-               strb    r3, [r0, #-1]!
-               strgeb  r4, [r0, #-1]!
-               strgtb  r5, [r0, #-1]!
-               subs    r2, r2, ip
-               blt     29b
-               ands    ip, r1, #3
-               beq     24b
-
-31:            bic     r1, r1, #3
-               ldr     r3, [r1], #0
-               cmp     ip, #2
-               blt     41f
-               beq     36f
-               cmp     r2, #12
-               blt     34f
-       PLD(    pld     [r1, #-16]              )
-               sub     r2, r2, #12
-       PLD(    subs    r2, r2, #32             )
-       PLD(    blt     33f                     )
-       PLD(    pld     [r1, #-32]              )
-32:    PLD(    pld     [r1, #-48]              )
-33:            mov     r7, r3, push #8
-               ldmdb   r1!, {r3, r4, r5, r6}
-               subs    r2, r2, #16
-               orr     r7, r7, r6, pull #24
-               mov     r6, r6, push #8
-               orr     r6, r6, r5, pull #24
-               mov     r5, r5, push #8
-               orr     r5, r5, r4, pull #24
-               mov     r4, r4, push #8
-               orr     r4, r4, r3, pull #24
-               stmdb   r0!, {r4, r5, r6, r7}
-               bge     32b
-       PLD(    cmn     r2, #32                 )
-       PLD(    bge     33b                     )
-       PLD(    add     r2, r2, #32             )
-               adds    r2, r2, #12
-               blt     35f
-34:            mov     ip, r3, push #8
-               ldr     r3, [r1, #-4]!
-               subs    r2, r2, #4
-               orr     ip, ip, r3, pull #24
-               str     ip, [r0, #-4]!
-               bge     34b
-35:            add     r1, r1, #3
-               b       29b
-
-36:            cmp     r2, #12
-               blt     39f
-       PLD(    pld     [r1, #-16]              )
-               sub     r2, r2, #12
-       PLD(    subs    r2, r2, #32             )
-       PLD(    blt     38f                     )
-       PLD(    pld     [r1, #-32]              )
-37:    PLD(    pld     [r1, #-48]              )
-38:            mov     r7, r3, push #16
-               ldmdb   r1!, {r3, r4, r5, r6}
-               subs    r2, r2, #16
-               orr     r7, r7, r6, pull #16
-               mov     r6, r6, push #16
-               orr     r6, r6, r5, pull #16
-               mov     r5, r5, push #16
-               orr     r5, r5, r4, pull #16
-               mov     r4, r4, push #16
-               orr     r4, r4, r3, pull #16
-               stmdb   r0!, {r4, r5, r6, r7}
-               bge     37b
-       PLD(    cmn     r2, #32                 )
-       PLD(    bge     38b                     )
-       PLD(    add     r2, r2, #32             )
-               adds    r2, r2, #12
-               blt     40f
-39:            mov     ip, r3, push #16
-               ldr     r3, [r1, #-4]!
-               subs    r2, r2, #4
-               orr     ip, ip, r3, pull #16
-               str     ip, [r0, #-4]!
-               bge     39b
-40:            add     r1, r1, #2
-               b       29b
+ENTRY(memcpy)
 
-41:            cmp     r2, #12
-               blt     44f
-       PLD(    pld     [r1, #-16]              )
-               sub     r2, r2, #12
-       PLD(    subs    r2, r2, #32             )
-       PLD(    blt     43f                     )
-       PLD(    pld     [r1, #-32]              )
-42:    PLD(    pld     [r1, #-48]              )
-43:            mov     r7, r3, push #24
-               ldmdb   r1!, {r3, r4, r5, r6}
-               subs    r2, r2, #16
-               orr     r7, r7, r6, pull #8
-               mov     r6, r6, push #24
-               orr     r6, r6, r5, pull #8
-               mov     r5, r5, push #24
-               orr     r5, r5, r4, pull #8
-               mov     r4, r4, push #24
-               orr     r4, r4, r3, pull #8
-               stmdb   r0!, {r4, r5, r6, r7}
-               bge     42b
-       PLD(    cmn     r2, #32                 )
-       PLD(    bge     43b                     )
-       PLD(    add     r2, r2, #32             )
-               adds    r2, r2, #12
-               blt     45f
-44:            mov     ip, r3, push #24
-               ldr     r3, [r1, #-4]!
-               subs    r2, r2, #4
-               orr     ip, ip, r3, pull #8
-               str     ip, [r0, #-4]!
-               bge     44b
-45:            add     r1, r1, #1
-               b       29b
+#include "copy_template.S"