git://git.onelab.eu
/
linux-2.6.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git]
/
arch
/
x86_64
/
lib
/
memset.S
diff --git
a/arch/x86_64/lib/memset.S
b/arch/x86_64/lib/memset.S
index
4b4c406
..
ad397f2
100644
(file)
--- a/
arch/x86_64/lib/memset.S
+++ b/
arch/x86_64/lib/memset.S
@@
-22,32
+22,32
@@
__memset:
mul %rcx /* with rax, clobbers rdx */
/* align dst */
mul %rcx /* with rax, clobbers rdx */
/* align dst */
- movl %edi,%r9d
- andl $7,%r9d
+ movl %edi,%r9d
+ andl $7,%r9d
jnz .Lbad_alignment
.Lafter_bad_alignment:
jnz .Lbad_alignment
.Lafter_bad_alignment:
-
+
movl %r11d,%ecx
shrl $6,%ecx
jz .Lhandle_tail
.p2align 4
movl %r11d,%ecx
shrl $6,%ecx
jz .Lhandle_tail
.p2align 4
-.Lloop_64:
+.Lloop_64:
decl %ecx
decl %ecx
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- movq %rax,24(%rdi)
- movq %rax,32(%rdi)
- movq %rax,40(%rdi)
- movq %rax,48(%rdi)
- movq %rax,56(%rdi)
+ movq %rax,(%rdi)
+ movq %rax,8(%rdi)
+ movq %rax,16(%rdi)
+ movq %rax,24(%rdi)
+ movq %rax,32(%rdi)
+ movq %rax,40(%rdi)
+ movq %rax,48(%rdi)
+ movq %rax,56(%rdi)
leaq 64(%rdi),%rdi
jnz .Lloop_64
/* Handle tail in loops. The loops should be faster than hard
leaq 64(%rdi),%rdi
jnz .Lloop_64
/* Handle tail in loops. The loops should be faster than hard
- to predict jump tables. */
- .p2align 4
+ to predict jump tables. */
+ .p2align 4
.Lhandle_tail:
movl %r11d,%ecx
andl $63&(~7),%ecx
.Lhandle_tail:
movl %r11d,%ecx
andl $63&(~7),%ecx
@@
-70,8
+70,8
@@
__memset:
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
-
-.Lende:
+
+.Lende:
movq %r10,%rax
ret
movq %r10,%rax
ret
@@
-79,22
+79,22
@@
__memset:
cmpq $7,%r11
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
cmpq $7,%r11
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
- movq $8,%r8
- subq %r9,%r8
+ movq $8,%r8
+ subq %r9,%r8
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
- /*
C stepping K8
run faster using the string instructions.
+ /*
Some CPUs
run faster using the string instructions.
It is also a lot simpler. Use this when possible */
It is also a lot simpler. Use this when possible */
-#include <asm/cpufeature.h>
-
+#include <asm/cpufeature.h>
+
.section .altinstructions,"a"
.align 8
.quad memset
.quad memset_c
.section .altinstructions,"a"
.align 8
.quad memset
.quad memset_c
- .byte X86_FEATURE_
K8_C
+ .byte X86_FEATURE_
REP_GOOD
.byte memset_c_end-memset_c
.byte memset_c_end-memset_c
.previous
.byte memset_c_end-memset_c
.byte memset_c_end-memset_c
.previous
@@
-103,8
+103,8
@@
__memset:
/* rdi destination
* rsi value
* rdx count
/* rdi destination
* rsi value
* rdx count
- */
-memset_c:
+ */
+memset_c:
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d