Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff

[linux-2.6.git] / arch / x86_64 / lib / memset.S
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S

index 4b4c406..ad397f2 100644 (file)
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -22,32 +22,32 @@ __memset:
         mul    %rcx             /* with rax, clobbers rdx */
  
         /* align dst */
         mul    %rcx             /* with rax, clobbers rdx */
  
         /* align dst */
-       movl  %edi,%r9d         
-       andl  $7,%r9d   
+       movl  %edi,%r9d
+       andl  $7,%r9d
         jnz  .Lbad_alignment
  .Lafter_bad_alignment:
         jnz  .Lbad_alignment
  .Lafter_bad_alignment:
-       
+
         movl %r11d,%ecx
         shrl $6,%ecx
         jz       .Lhandle_tail
  
         .p2align 4
         movl %r11d,%ecx
         shrl $6,%ecx
         jz       .Lhandle_tail
  
         .p2align 4
-.Lloop_64:     
+.Lloop_64:
         decl   %ecx
         decl   %ecx
-       movq  %rax,(%rdi) 
-       movq  %rax,8(%rdi) 
-       movq  %rax,16(%rdi) 
-       movq  %rax,24(%rdi) 
-       movq  %rax,32(%rdi) 
-       movq  %rax,40(%rdi) 
-       movq  %rax,48(%rdi) 
-       movq  %rax,56(%rdi) 
+       movq  %rax,(%rdi)
+       movq  %rax,8(%rdi)
+       movq  %rax,16(%rdi)
+       movq  %rax,24(%rdi)
+       movq  %rax,32(%rdi)
+       movq  %rax,40(%rdi)
+       movq  %rax,48(%rdi)
+       movq  %rax,56(%rdi)
         leaq  64(%rdi),%rdi
         jnz    .Lloop_64
  
         /* Handle tail in loops. The loops should be faster than hard
         leaq  64(%rdi),%rdi
         jnz    .Lloop_64
  
         /* Handle tail in loops. The loops should be faster than hard
-          to predict jump tables. */ 
-       .p2align 4         
+          to predict jump tables. */
+       .p2align 4
  .Lhandle_tail:
         movl    %r11d,%ecx
         andl    $63&(~7),%ecx
  .Lhandle_tail:
         movl    %r11d,%ecx
         andl    $63&(~7),%ecx
@@ -70,8 +70,8 @@ __memset:
         movb    %al,(%rdi)
         leaq    1(%rdi),%rdi
         jnz     .Lloop_1
         movb    %al,(%rdi)
         leaq    1(%rdi),%rdi
         jnz     .Lloop_1
-       
-.Lende:        
+
+.Lende:
         movq    %r10,%rax
         ret
  
         movq    %r10,%rax
         ret
  
@@ -79,22 +79,22 @@ __memset:
         cmpq $7,%r11
         jbe     .Lhandle_7
         movq %rax,(%rdi)        /* unaligned store */
         cmpq $7,%r11
         jbe     .Lhandle_7
         movq %rax,(%rdi)        /* unaligned store */
-       movq $8,%r8                     
-       subq %r9,%r8 
+       movq $8,%r8
+       subq %r9,%r8
         addq %r8,%rdi
         subq %r8,%r11
         jmp .Lafter_bad_alignment
  
         addq %r8,%rdi
         subq %r8,%r11
         jmp .Lafter_bad_alignment
  
-       /* C stepping K8 run faster using the string instructions.
+       /* Some CPUs run faster using the string instructions.
            It is also a lot simpler. Use this when possible */
  
            It is also a lot simpler. Use this when possible */
  
-#include <asm/cpufeature.h>    
-               
+#include <asm/cpufeature.h>
+
         .section .altinstructions,"a"
         .align 8
         .quad  memset
         .quad  memset_c
         .section .altinstructions,"a"
         .align 8
         .quad  memset
         .quad  memset_c
-       .byte  X86_FEATURE_K8_C
+       .byte  X86_FEATURE_REP_GOOD
         .byte  memset_c_end-memset_c
         .byte  memset_c_end-memset_c
         .previous
         .byte  memset_c_end-memset_c
         .byte  memset_c_end-memset_c
         .previous
@@ -103,8 +103,8 @@ __memset:
   /* rdi        destination
    * rsi value
    * rdx count
   /* rdi        destination
    * rsi value
    * rdx count
-  */                   
-memset_c:      
+  */
+memset_c:
         movq %rdi,%r9
         movl %edx,%r8d
         andl $7,%r8d            
         movq %rdi,%r9
         movl %edx,%r8d
         andl $7,%r8d