fedora core 6 1.2949 + vserver 2.2.0

[linux-2.6.git] / arch / x86_64 / lib / copy_page.S
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S

index dd3aa47..727a5d4 100644 (file)
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,21 +1,36 @@
  /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
-       
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+       ALIGN
+copy_page_c:
+       CFI_STARTPROC
+       movl $4096/8,%ecx
+       rep movsq
+       ret
+       CFI_ENDPROC
+ENDPROC(copy_page_c)
+
  /* Don't use streaming store because it's better when the target
     ends up in cache. */
             
  /* Could vary the prefetch distance based on SMP/UP */
  
-       .globl copy_page
-       .p2align 4
-copy_page:
+ENTRY(copy_page)
+       CFI_STARTPROC
         subq    $3*8,%rsp
+       CFI_ADJUST_CFA_OFFSET 3*8
         movq    %rbx,(%rsp)
+       CFI_REL_OFFSET rbx, 0
         movq    %r12,1*8(%rsp)
+       CFI_REL_OFFSET r12, 1*8
         movq    %r13,2*8(%rsp)
-                       
+       CFI_REL_OFFSET r13, 2*8
+
         movl    $(4096/64)-5,%ecx
         .p2align 4
-.Loop64:       
+.Loop64:
         dec     %rcx
  
         movq        (%rsi), %rax
@@ -45,7 +60,7 @@ copy_page:
  
         movl    $5,%ecx
         .p2align 4
-.Loop2:        
+.Loop2:
         decl   %ecx
  
         movq        (%rsi), %rax
@@ -65,37 +80,40 @@ copy_page:
         movq     %r10, 40 (%rdi)
         movq     %r11, 48 (%rdi)
         movq     %r12, 56 (%rdi)
-       
-       leaq    64(%rdi),%rdi                   
-       leaq    64(%rsi),%rsi                   
-       
-       jnz     .Loop2          
-       
+
+       leaq    64(%rdi),%rdi
+       leaq    64(%rsi),%rsi
+
+       jnz     .Loop2
+
         movq    (%rsp),%rbx
+       CFI_RESTORE rbx
         movq    1*8(%rsp),%r12
+       CFI_RESTORE r12
         movq    2*8(%rsp),%r13
+       CFI_RESTORE r13
         addq    $3*8,%rsp
+       CFI_ADJUST_CFA_OFFSET -3*8
         ret
-       
-       /* C stepping K8 run faster using the string copy instructions.
+.Lcopy_page_end:
+       CFI_ENDPROC
+ENDPROC(copy_page)
+
+       /* Some CPUs run faster using the string copy instructions.
            It is also a lot simpler. Use this when possible */
  
-#include <asm/cpufeature.h>            
-               
-       .section .altinstructions,"a"
-       .align 8
-       .quad  copy_page
-       .quad  copy_page_c
-       .byte  X86_FEATURE_K8_C
-       .byte  copy_page_c_end-copy_page_c
-       .byte  copy_page_c_end-copy_page_c
-       .previous
+#include <asm/cpufeature.h>
  
         .section .altinstr_replacement,"ax"
-copy_page_c:
-       movl $4096/8,%ecx
-       rep 
-       movsq 
-       ret
-copy_page_c_end:
+1:     .byte 0xeb                                      /* jmp <disp8> */
+       .byte (copy_page_c - copy_page) - (2f - 1b)     /* offset */
+2:
+       .previous
+       .section .altinstructions,"a"
+       .align 8
+       .quad copy_page
+       .quad 1b
+       .byte X86_FEATURE_REP_GOOD
+       .byte .Lcopy_page_end - copy_page
+       .byte 2b - 1b
         .previous