VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / ppc64 / lib / string.S
index 84d14d1..813587e 100644 (file)
@@ -66,28 +66,68 @@ _GLOBAL(strlen)
        blr
 
 _GLOBAL(memset)
+       neg     r0,r3
        rlwimi  r4,r4,8,16,23
+       andi.   r0,r0,7                 /* # bytes to be 8-byte aligned */
        rlwimi  r4,r4,16,0,15
-       addi    r6,r3,-4
-       cmplwi  0,r5,4
-       blt     7f
-       stwu    r4,4(r6)
-       beqlr
-       andi.   r0,r6,3
-       add     r5,r0,r5
-       subf    r6,r0,r6
-       srwi    r0,r5,2
+       cmplw   cr1,r5,r0               /* do we get that far? */
+       rldimi  r4,r4,32,0
+       mtcrf   1,r0
+       mr      r6,r3
+       blt     cr1,8f
+       beq+    3f                      /* if already 8-byte aligned */
+       subf    r5,r0,r5
+       bf      31,1f
+       stb     r4,0(r6)
+       addi    r6,r6,1
+1:     bf      30,2f
+       sth     r4,0(r6)
+       addi    r6,r6,2
+2:     bf      29,3f
+       stw     r4,0(r6)
+       addi    r6,r6,4
+3:     srdi.   r0,r5,6
+       clrldi  r5,r5,58
        mtctr   r0
-       bdz     6f
-1:     stwu    r4,4(r6)
-       bdnz    1b
-6:     andi.   r5,r5,3
-7:     cmpwi   0,r5,0
-       beqlr
-       mtctr   r5
-       addi    r6,r6,3
-8:     stbu    r4,1(r6)
-       bdnz    8b
+       beq     5f
+4:     std     r4,0(r6)
+       std     r4,8(r6)
+       std     r4,16(r6)
+       std     r4,24(r6)
+       std     r4,32(r6)
+       std     r4,40(r6)
+       std     r4,48(r6)
+       std     r4,56(r6)
+       addi    r6,r6,64
+       bdnz    4b
+5:     srwi.   r0,r5,3
+       clrlwi  r5,r5,29
+       mtcrf   1,r0
+       beq     8f
+       bf      29,6f
+       std     r4,0(r6)
+       std     r4,8(r6)
+       std     r4,16(r6)
+       std     r4,24(r6)
+       addi    r6,r6,32
+6:     bf      30,7f
+       std     r4,0(r6)
+       std     r4,8(r6)
+       addi    r6,r6,16
+7:     bf      31,8f
+       std     r4,0(r6)
+       addi    r6,r6,8
+8:     cmpwi   r5,0
+       mtcrf   1,r5
+       beqlr+
+       bf      29,9f
+       stw     r4,0(r6)
+       addi    r6,r6,4
+9:     bf      30,10f
+       sth     r4,0(r6)
+       addi    r6,r6,2
+10:    bflr    31
+       stb     r4,0(r6)
        blr
 
 _GLOBAL(memmove)