X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Flib%2Fmemcpy.S;h=0ea0ddc875a7128203b8a0c8a7c4f5784fb2b680;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=c6c46494fef50bec9f061ce6a679d017e90ac921;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git

diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index c6c46494f..0ea0ddc87 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,9 @@
 /* Copyright 2002 Andi Kleen */
-	
-	#include <asm/cpufeature.h>		
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+
 /*
  * memcpy - Copy a memory block.
  *
@@ -13,22 +16,36 @@
  * rax original destination
  */	
 
- 	.globl __memcpy
-	.globl memcpy
-	.p2align 4
-__memcpy:
-memcpy:		
+	ALIGN
+memcpy_c:
+	CFI_STARTPROC
+	movq %rdi,%rax
+	movl %edx,%ecx
+	shrl $3,%ecx
+	andl $7,%edx
+	rep movsq
+	movl %edx,%ecx
+	rep movsb
+	ret
+	CFI_ENDPROC
+ENDPROC(memcpy_c)
+
+ENTRY(__memcpy)
+ENTRY(memcpy)
+	CFI_STARTPROC
 	pushq %rbx
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET rbx, 0
 	movq %rdi,%rax
 
 	movl %edx,%ecx
 	shrl $6,%ecx
 	jz .Lhandle_tail
-	
+
 	.p2align 4
 .Lloop_64:
 	decl %ecx
-	
+
 	movq (%rsi),%r11
 	movq 8(%rsi),%r8
 
@@ -40,7 +57,7 @@ memcpy:
 
 	movq %r9,2*8(%rdi)
 	movq %r10,3*8(%rdi)
-		
+
 	movq 4*8(%rsi),%r11
 	movq 5*8(%rsi),%r8
 
@@ -63,10 +80,10 @@ memcpy:
 	shrl $3,%ecx
 	jz   .Lhandle_7
 	.p2align 4
-.Lloop_8: 
+.Lloop_8:
 	decl %ecx
 	movq (%rsi),%r8
-	movq %r8,(%rdi) 
+	movq %r8,(%rdi)
 	leaq 8(%rdi),%rdi
 	leaq 8(%rsi),%rsi
 	jnz  .Lloop_8
@@ -78,44 +95,35 @@ memcpy:
 	.p2align 4
 .Lloop_1:
 	movb (%rsi),%r8b
-	movb %r8b,(%rdi) 
+	movb %r8b,(%rdi)
 	incq %rdi
 	incq %rsi
 	decl %ecx
 	jnz .Lloop_1
-	
-.Lende: 	
+
+.Lende:
 	popq %rbx
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE rbx
 	ret
 .Lfinal:
-	
-	/* C stepping K8 run faster using the string copy instructions.
+	CFI_ENDPROC
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
+
+	/* Some CPUs run faster using the string copy instructions.
 	   It is also a lot simpler. Use this when possible */
-	
-	.section .altinstructions,"a"
-	.align 8
-	.quad  memcpy
-	.quad  memcpy_c
-	.byte  X86_FEATURE_K8_C
-	.byte  .Lfinal-memcpy
-	.byte  memcpy_c_end-memcpy_c	
-	.previous
 
 	.section .altinstr_replacement,"ax"
- /* rdi	destination
-  * rsi source
-  * rdx count
-  */			
-memcpy_c:
-	movq %rdi,%rax
-	movl %edx,%ecx
-	shrl $3,%ecx
-	andl $7,%edx	
-	rep 
-	movsq 
-	movl %edx,%ecx
-	rep
-	movsb
-	ret
-memcpy_c_end:
+1:	.byte 0xeb				/* jmp <disp8> */
+	.byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */
+2:
+	.previous
+	.section .altinstructions,"a"
+	.align 8
+	.quad memcpy
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lfinal - memcpy
+	.byte 2b - 1b
 	.previous