X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Flib%2Fmemset.S;h=ad397f2c7de8fa7c73185793ba973bdfaed39b0b;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=4b4c40638640ee8128510155b36dc5550044098a;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index 4b4c40638..ad397f2c7 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S @@ -22,32 +22,32 @@ __memset: mul %rcx /* with rax, clobbers rdx */ /* align dst */ - movl %edi,%r9d - andl $7,%r9d + movl %edi,%r9d + andl $7,%r9d jnz .Lbad_alignment .Lafter_bad_alignment: - + movl %r11d,%ecx shrl $6,%ecx jz .Lhandle_tail .p2align 4 -.Lloop_64: +.Lloop_64: decl %ecx - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) - movq %rax,32(%rdi) - movq %rax,40(%rdi) - movq %rax,48(%rdi) - movq %rax,56(%rdi) + movq %rax,(%rdi) + movq %rax,8(%rdi) + movq %rax,16(%rdi) + movq %rax,24(%rdi) + movq %rax,32(%rdi) + movq %rax,40(%rdi) + movq %rax,48(%rdi) + movq %rax,56(%rdi) leaq 64(%rdi),%rdi jnz .Lloop_64 /* Handle tail in loops. The loops should be faster than hard - to predict jump tables. */ - .p2align 4 + to predict jump tables. */ + .p2align 4 .Lhandle_tail: movl %r11d,%ecx andl $63&(~7),%ecx @@ -70,8 +70,8 @@ __memset: movb %al,(%rdi) leaq 1(%rdi),%rdi jnz .Lloop_1 - -.Lende: + +.Lende: movq %r10,%rax ret @@ -79,22 +79,22 @@ __memset: cmpq $7,%r11 jbe .Lhandle_7 movq %rax,(%rdi) /* unaligned store */ - movq $8,%r8 - subq %r9,%r8 + movq $8,%r8 + subq %r9,%r8 addq %r8,%rdi subq %r8,%r11 jmp .Lafter_bad_alignment - /* C stepping K8 run faster using the string instructions. + /* Some CPUs run faster using the string instructions. It is also a lot simpler. Use this when possible */ -#include - +#include + .section .altinstructions,"a" .align 8 .quad memset .quad memset_c - .byte X86_FEATURE_K8_C + .byte X86_FEATURE_REP_GOOD .byte memset_c_end-memset_c .byte memset_c_end-memset_c .previous @@ -103,8 +103,8 @@ __memset: /* rdi destination * rsi value * rdx count - */ -memset_c: + */ +memset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d