X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Farm%2Flib%2Fmemcpy.S;fp=arch%2Farm%2Flib%2Fmemcpy.S;h=7e71d6708a8d668142fba5f12b932eb5cb1ac626;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=f5a593ceb8cc1e2637271d8ec52b8aacaa765ad0;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index f5a593ceb..7e71d6708 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -1,393 +1,59 @@ /* * linux/arch/arm/lib/memcpy.S * - * Copyright (C) 1995-1999 Russell King + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ASM optimised string functions + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. */ + #include #include - .text - -#define ENTER \ - mov ip,sp ;\ - stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\ - sub fp,ip,#4 - -#define EXIT \ - LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc}) - -#define EXITEQ \ - LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc}) - -/* - * Prototype: void memcpy(void *to,const void *from,unsigned long n); - */ -ENTRY(memcpy) -ENTRY(memmove) - ENTER - cmp r1, r0 - bcc 23f - subs r2, r2, #4 - blt 6f - PLD( pld [r1, #0] ) - ands ip, r0, #3 - bne 7f - ands ip, r1, #3 - bne 8f + .macro ldr1w ptr reg abort + ldr \reg, [\ptr], #4 + .endm -1: subs r2, r2, #8 - blt 5f - subs r2, r2, #20 - blt 4f - PLD( pld [r1, #28] ) - PLD( subs r2, r2, #64 ) - PLD( blt 3f ) -2: PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - ldmia r1!, {r3 - r9, ip} - subs r2, r2, #32 - stmgeia r0!, {r3 - r9, ip} - ldmgeia r1!, {r3 - r9, ip} - subges r2, r2, #32 - stmia r0!, {r3 - r9, ip} - bge 2b -3: PLD( ldmia r1!, {r3 - r9, ip} ) - PLD( adds r2, r2, #32 ) - PLD( stmgeia r0!, {r3 - r9, ip} ) - PLD( ldmgeia r1!, {r3 - r9, ip} ) - PLD( subges r2, r2, #32 ) - PLD( stmia r0!, {r3 - r9, ip} ) -4: cmn r2, #16 - ldmgeia r1!, {r3 - r6} - subge r2, r2, #16 - stmgeia r0!, {r3 - r6} - adds r2, r2, #20 - ldmgeia r1!, {r3 - r5} - subge r2, r2, #12 - stmgeia r0!, {r3 - r5} -5: adds r2, r2, #8 - blt 6f - subs r2, r2, #4 - ldrlt r3, [r1], #4 - ldmgeia r1!, {r4, r5} - subge r2, r2, #4 - strlt r3, [r0], #4 - stmgeia r0!, {r4, r5} + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm -6: adds r2, r2, #4 - EXITEQ - cmp r2, #2 - ldrb r3, [r1], #1 - ldrgeb r4, [r1], #1 - ldrgtb r5, [r1], #1 - strb r3, [r0], #1 - strgeb r4, [r0], #1 - strgtb r5, [r0], #1 - EXIT + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm -7: rsb ip, ip, #4 - cmp ip, #2 - ldrb r3, [r1], #1 - ldrgeb r4, [r1], #1 - ldrgtb r5, [r1], #1 - strb r3, [r0], #1 - strgeb r4, [r0], #1 - strgtb r5, [r0], #1 - subs r2, r2, ip - blt 6b - ands ip, r1, #3 - beq 1b + .macro ldr1b ptr reg cond=al abort + ldr\cond\()b \reg, [\ptr], #1 + .endm -8: bic r1, r1, #3 - ldr r7, [r1], #4 - cmp ip, #2 - bgt 18f - beq 13f - cmp r2, #12 - blt 11f - PLD( pld [r1, #12] ) - sub r2, r2, #12 - PLD( subs r2, r2, #32 ) - PLD( blt 10f ) - PLD( pld [r1, #28] ) -9: PLD( pld [r1, #44] ) -10: mov r3, r7, pull #8 - ldmia r1!, {r4 - r7} - subs r2, r2, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 - stmia r0!, {r3 - r6} - bge 9b - PLD( cmn r2, #32 ) - PLD( bge 10b ) - PLD( add r2, r2, #32 ) - adds r2, r2, #12 - blt 12f -11: mov r3, r7, pull #8 - ldr r7, [r1], #4 - subs r2, r2, #4 - orr r3, r3, r7, push #24 - str r3, [r0], #4 - bge 11b -12: sub r1, r1, #3 - b 6b + .macro str1w ptr reg abort + str \reg, [\ptr], #4 + .endm -13: cmp r2, #12 - blt 16f - PLD( pld [r1, #12] ) - sub r2, r2, #12 - PLD( subs r2, r2, #32 ) - PLD( blt 15f ) - PLD( pld [r1, #28] ) -14: PLD( pld [r1, #44] ) -15: mov r3, r7, pull #16 - ldmia r1!, {r4 - r7} - subs r2, r2, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 - stmia r0!, {r3 - r6} - bge 14b - PLD( cmn r2, #32 ) - PLD( bge 15b ) - PLD( add r2, r2, #32 ) - adds r2, r2, #12 - blt 17f -16: mov r3, r7, pull #16 - ldr r7, [r1], #4 - subs r2, r2, #4 - orr r3, r3, r7, push #16 - str r3, [r0], #4 - bge 16b -17: sub r1, r1, #2 - b 6b + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm -18: cmp r2, #12 - blt 21f - PLD( pld [r1, #12] ) - sub r2, r2, #12 - PLD( subs r2, r2, #32 ) - PLD( blt 20f ) - PLD( pld [r1, #28] ) -19: PLD( pld [r1, #44] ) -20: mov r3, r7, pull #24 - ldmia r1!, {r4 - r7} - subs r2, r2, #16 - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 - stmia r0!, {r3 - r6} - bge 19b - PLD( cmn r2, #32 ) - PLD( bge 20b ) - PLD( add r2, r2, #32 ) - adds r2, r2, #12 - blt 22f -21: mov r3, r7, pull #24 - ldr r7, [r1], #4 - subs r2, r2, #4 - orr r3, r3, r7, push #8 - str r3, [r0], #4 - bge 21b -22: sub r1, r1, #1 - b 6b + .macro str1b ptr reg cond=al abort + str\cond\()b \reg, [\ptr], #1 + .endm + .macro enter reg1 reg2 + stmdb sp!, {r0, \reg1, \reg2} + .endm -23: add r1, r1, r2 - add r0, r0, r2 - subs r2, r2, #4 - blt 29f - PLD( pld [r1, #-4] ) - ands ip, r0, #3 - bne 30f - ands ip, r1, #3 - bne 31f + .macro exit reg1 reg2 + ldmfd sp!, {r0, \reg1, \reg2} + .endm -24: subs r2, r2, #8 - blt 28f - subs r2, r2, #20 - blt 27f - PLD( pld [r1, #-32] ) - PLD( subs r2, r2, #64 ) - PLD( blt 26f ) -25: PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) - ldmdb r1!, {r3 - r9, ip} - subs r2, r2, #32 - stmgedb r0!, {r3 - r9, ip} - ldmgedb r1!, {r3 - r9, ip} - subges r2, r2, #32 - stmdb r0!, {r3 - r9, ip} - bge 25b -26: PLD( ldmdb r1!, {r3 - r9, ip} ) - PLD( adds r2, r2, #32 ) - PLD( stmgedb r0!, {r3 - r9, ip} ) - PLD( ldmgedb r1!, {r3 - r9, ip} ) - PLD( subges r2, r2, #32 ) - PLD( stmdb r0!, {r3 - r9, ip} ) -27: cmn r2, #16 - ldmgedb r1!, {r3 - r6} - subge r2, r2, #16 - stmgedb r0!, {r3 - r6} - adds r2, r2, #20 - ldmgedb r1!, {r3 - r5} - subge r2, r2, #12 - stmgedb r0!, {r3 - r5} -28: adds r2, r2, #8 - blt 29f - subs r2, r2, #4 - ldrlt r3, [r1, #-4]! - ldmgedb r1!, {r4, r5} - subge r2, r2, #4 - strlt r3, [r0, #-4]! - stmgedb r0!, {r4, r5} + .text -29: adds r2, r2, #4 - EXITEQ - cmp r2, #2 - ldrb r3, [r1, #-1]! - ldrgeb r4, [r1, #-1]! - ldrgtb r5, [r1, #-1]! - strb r3, [r0, #-1]! - strgeb r4, [r0, #-1]! - strgtb r5, [r0, #-1]! - EXIT +/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ -30: cmp ip, #2 - ldrb r3, [r1, #-1]! - ldrgeb r4, [r1, #-1]! - ldrgtb r5, [r1, #-1]! - strb r3, [r0, #-1]! - strgeb r4, [r0, #-1]! - strgtb r5, [r0, #-1]! - subs r2, r2, ip - blt 29b - ands ip, r1, #3 - beq 24b - -31: bic r1, r1, #3 - ldr r3, [r1], #0 - cmp ip, #2 - blt 41f - beq 36f - cmp r2, #12 - blt 34f - PLD( pld [r1, #-16] ) - sub r2, r2, #12 - PLD( subs r2, r2, #32 ) - PLD( blt 33f ) - PLD( pld [r1, #-32] ) -32: PLD( pld [r1, #-48] ) -33: mov r7, r3, push #8 - ldmdb r1!, {r3, r4, r5, r6} - subs r2, r2, #16 - orr r7, r7, r6, pull #24 - mov r6, r6, push #8 - orr r6, r6, r5, pull #24 - mov r5, r5, push #8 - orr r5, r5, r4, pull #24 - mov r4, r4, push #8 - orr r4, r4, r3, pull #24 - stmdb r0!, {r4, r5, r6, r7} - bge 32b - PLD( cmn r2, #32 ) - PLD( bge 33b ) - PLD( add r2, r2, #32 ) - adds r2, r2, #12 - blt 35f -34: mov ip, r3, push #8 - ldr r3, [r1, #-4]! - subs r2, r2, #4 - orr ip, ip, r3, pull #24 - str ip, [r0, #-4]! - bge 34b -35: add r1, r1, #3 - b 29b - -36: cmp r2, #12 - blt 39f - PLD( pld [r1, #-16] ) - sub r2, r2, #12 - PLD( subs r2, r2, #32 ) - PLD( blt 38f ) - PLD( pld [r1, #-32] ) -37: PLD( pld [r1, #-48] ) -38: mov r7, r3, push #16 - ldmdb r1!, {r3, r4, r5, r6} - subs r2, r2, #16 - orr r7, r7, r6, pull #16 - mov r6, r6, push #16 - orr r6, r6, r5, pull #16 - mov r5, r5, push #16 - orr r5, r5, r4, pull #16 - mov r4, r4, push #16 - orr r4, r4, r3, pull #16 - stmdb r0!, {r4, r5, r6, r7} - bge 37b - PLD( cmn r2, #32 ) - PLD( bge 38b ) - PLD( add r2, r2, #32 ) - adds r2, r2, #12 - blt 40f -39: mov ip, r3, push #16 - ldr r3, [r1, #-4]! - subs r2, r2, #4 - orr ip, ip, r3, pull #16 - str ip, [r0, #-4]! - bge 39b -40: add r1, r1, #2 - b 29b +ENTRY(memcpy) -41: cmp r2, #12 - blt 44f - PLD( pld [r1, #-16] ) - sub r2, r2, #12 - PLD( subs r2, r2, #32 ) - PLD( blt 43f ) - PLD( pld [r1, #-32] ) -42: PLD( pld [r1, #-48] ) -43: mov r7, r3, push #24 - ldmdb r1!, {r3, r4, r5, r6} - subs r2, r2, #16 - orr r7, r7, r6, pull #8 - mov r6, r6, push #24 - orr r6, r6, r5, pull #8 - mov r5, r5, push #24 - orr r5, r5, r4, pull #8 - mov r4, r4, push #24 - orr r4, r4, r3, pull #8 - stmdb r0!, {r4, r5, r6, r7} - bge 42b - PLD( cmn r2, #32 ) - PLD( bge 43b ) - PLD( add r2, r2, #32 ) - adds r2, r2, #12 - blt 45f -44: mov ip, r3, push #24 - ldr r3, [r1, #-4]! - subs r2, r2, #4 - orr ip, ip, r3, pull #8 - str ip, [r0, #-4]! - bge 44b -45: add r1, r1, #1 - b 29b +#include "copy_template.S"