X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Flib%2Fmemcpy.S;fp=arch%2Fx86_64%2Flib%2Fmemcpy.S;h=5554948b55549e2c7837264ce9bcae6b552e5a94;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=c6c46494fef50bec9f061ce6a679d017e90ac921;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index c6c46494f..5554948b5 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S @@ -24,11 +24,11 @@ memcpy: movl %edx,%ecx shrl $6,%ecx jz .Lhandle_tail - + .p2align 4 .Lloop_64: decl %ecx - + movq (%rsi),%r11 movq 8(%rsi),%r8 @@ -40,7 +40,7 @@ memcpy: movq %r9,2*8(%rdi) movq %r10,3*8(%rdi) - + movq 4*8(%rsi),%r11 movq 5*8(%rsi),%r8 @@ -63,10 +63,10 @@ memcpy: shrl $3,%ecx jz .Lhandle_7 .p2align 4 -.Lloop_8: +.Lloop_8: decl %ecx movq (%rsi),%r8 - movq %r8,(%rdi) + movq %r8,(%rdi) leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jnz .Lloop_8 @@ -78,34 +78,34 @@ memcpy: .p2align 4 .Lloop_1: movb (%rsi),%r8b - movb %r8b,(%rdi) + movb %r8b,(%rdi) incq %rdi incq %rsi decl %ecx jnz .Lloop_1 - -.Lende: + +.Lende: popq %rbx ret .Lfinal: - - /* C stepping K8 run faster using the string copy instructions. + + /* Some CPUs run faster using the string copy instructions. It is also a lot simpler. Use this when possible */ - + .section .altinstructions,"a" .align 8 .quad memcpy .quad memcpy_c - .byte X86_FEATURE_K8_C + .byte X86_FEATURE_REP_GOOD .byte .Lfinal-memcpy - .byte memcpy_c_end-memcpy_c + .byte memcpy_c_end-memcpy_c .previous .section .altinstr_replacement,"ax" /* rdi destination * rsi source * rdx count - */ + */ memcpy_c: movq %rdi,%rax movl %edx,%ecx