git://git.onelab.eu
/
linux-2.6.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git]
/
arch
/
x86_64
/
lib
/
memcpy.S
diff --git
a/arch/x86_64/lib/memcpy.S
b/arch/x86_64/lib/memcpy.S
index
c6c4649
..
0ea0ddc
100644
(file)
--- a/
arch/x86_64/lib/memcpy.S
+++ b/
arch/x86_64/lib/memcpy.S
@@
-1,6
+1,9
@@
/* Copyright 2002 Andi Kleen */
/* Copyright 2002 Andi Kleen */
-
- #include <asm/cpufeature.h>
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+
/*
* memcpy - Copy a memory block.
*
/*
* memcpy - Copy a memory block.
*
@@
-13,22
+16,36
@@
* rax original destination
*/
* rax original destination
*/
- .globl __memcpy
- .globl memcpy
- .p2align 4
-__memcpy:
-memcpy:
+ ALIGN
+memcpy_c:
+ CFI_STARTPROC
+ movq %rdi,%rax
+ movl %edx,%ecx
+ shrl $3,%ecx
+ andl $7,%edx
+ rep movsq
+ movl %edx,%ecx
+ rep movsb
+ ret
+ CFI_ENDPROC
+ENDPROC(memcpy_c)
+
+ENTRY(__memcpy)
+ENTRY(memcpy)
+ CFI_STARTPROC
pushq %rbx
pushq %rbx
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbx, 0
movq %rdi,%rax
movl %edx,%ecx
shrl $6,%ecx
jz .Lhandle_tail
movq %rdi,%rax
movl %edx,%ecx
shrl $6,%ecx
jz .Lhandle_tail
-
+
.p2align 4
.Lloop_64:
decl %ecx
.p2align 4
.Lloop_64:
decl %ecx
-
+
movq (%rsi),%r11
movq 8(%rsi),%r8
movq (%rsi),%r11
movq 8(%rsi),%r8
@@
-40,7
+57,7
@@
memcpy:
movq %r9,2*8(%rdi)
movq %r10,3*8(%rdi)
movq %r9,2*8(%rdi)
movq %r10,3*8(%rdi)
-
+
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
@@
-63,10
+80,10
@@
memcpy:
shrl $3,%ecx
jz .Lhandle_7
.p2align 4
shrl $3,%ecx
jz .Lhandle_7
.p2align 4
-.Lloop_8:
+.Lloop_8:
decl %ecx
movq (%rsi),%r8
decl %ecx
movq (%rsi),%r8
- movq %r8,(%rdi)
+ movq %r8,(%rdi)
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jnz .Lloop_8
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jnz .Lloop_8
@@
-78,44
+95,35
@@
memcpy:
.p2align 4
.Lloop_1:
movb (%rsi),%r8b
.p2align 4
.Lloop_1:
movb (%rsi),%r8b
- movb %r8b,(%rdi)
+ movb %r8b,(%rdi)
incq %rdi
incq %rsi
decl %ecx
jnz .Lloop_1
incq %rdi
incq %rsi
decl %ecx
jnz .Lloop_1
-
-.Lende:
+
+.Lende:
popq %rbx
popq %rbx
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rbx
ret
.Lfinal:
ret
.Lfinal:
-
- /* C stepping K8 run faster using the string copy instructions.
+ CFI_ENDPROC
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
+
+ /* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
It is also a lot simpler. Use this when possible */
-
- .section .altinstructions,"a"
- .align 8
- .quad memcpy
- .quad memcpy_c
- .byte X86_FEATURE_K8_C
- .byte .Lfinal-memcpy
- .byte memcpy_c_end-memcpy_c
- .previous
.section .altinstr_replacement,"ax"
.section .altinstr_replacement,"ax"
- /* rdi destination
- * rsi source
- * rdx count
- */
-memcpy_c:
- movq %rdi,%rax
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
- rep
- movsq
- movl %edx,%ecx
- rep
- movsb
- ret
-memcpy_c_end:
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
+2:
+ .previous
+ .section .altinstructions,"a"
+ .align 8
+ .quad memcpy
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lfinal - memcpy
+ .byte 2b - 1b
.previous
.previous