.stabs "arch/ppc/lib/",N_SO,0,0,0f
.stabs "string.S",N_SO,0,0,0f
-CACHELINE_BYTES = L1_CACHE_LINE_SIZE
-LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
-CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
_GLOBAL(strcpy)
addi r5,r3,-1
bdnz 8b
blr
-_GLOBAL(bcopy)
- mr r6,r3
- mr r3,r4
- mr r4,r6
- b memcpy
-
/*
* This version uses dcbz on the complete cache lines in the
* destination area to reduce memory traffic. This requires that
dcbz r11,r6
#endif
COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 32
+#if L1_CACHE_BYTES >= 32
COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 64
+#if L1_CACHE_BYTES >= 64
COPY_16_BYTES
COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 128
+#if L1_CACHE_BYTES >= 128
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
73: stwu r9,4(r6)
bdnz 72b
+ .section __ex_table,"a"
+ .align 2
+ .long 70b,100f
+ .long 71b,101f
+ .long 72b,102f
+ .long 73b,103f
+ .text
+
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
clrlwi r5,r5,32-LG_CACHELINE_BYTES
li r11,4
beq 63f
-#if !defined(CONFIG_8xx)
+#ifdef CONFIG_8xx
+ /* Don't use prefetch on 8xx */
+ mtctr r0
+ li r0,0
+53: COPY_16_BYTES_WITHEX(0)
+ bdnz 53b
+
+#else /* not CONFIG_8xx */
/* Here we decide how far ahead to prefetch the source */
+ li r3,4
+ cmpwi r0,1
+ li r7,0
+ ble 114f
+ li r7,1
#if MAX_COPY_PREFETCH > 1
/* Heuristically, for large transfers we prefetch
MAX_COPY_PREFETCH cachelines ahead. For small transfers
we prefetch 1 cacheline ahead. */
cmpwi r0,MAX_COPY_PREFETCH
- li r7,1
- li r3,4
- ble 111f
+ ble 112f
li r7,MAX_COPY_PREFETCH
-111: mtctr r7
-112: dcbt r3,r4
+112: mtctr r7
+111: dcbt r3,r4
addi r3,r3,CACHELINE_BYTES
- bdnz 112b
-#else /* MAX_COPY_PREFETCH == 1 */
- li r3,CACHELINE_BYTES + 4
- dcbt r11,r4
-#endif /* MAX_COPY_PREFETCH */
-#endif /* CONFIG_8xx */
-
- mtctr r0
-53:
-#if !defined(CONFIG_8xx)
+ bdnz 111b
+#else
dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114: subf r8,r7,r0
+ mr r0,r7
+ mtctr r8
+
+53: dcbt r3,r4
54: dcbz r11,r6
-#endif
-/* had to move these to keep extable in order */
.section __ex_table,"a"
.align 2
- .long 70b,100f
- .long 71b,101f
- .long 72b,102f
- .long 73b,103f
-#if !defined(CONFIG_8xx)
.long 54b,105f
-#endif
.text
/* the main body of the cacheline loop */
COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_LINE_SIZE >= 32
+#if L1_CACHE_BYTES >= 32
COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_LINE_SIZE >= 64
+#if L1_CACHE_BYTES >= 64
COPY_16_BYTES_WITHEX(2)
COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_LINE_SIZE >= 128
+#if L1_CACHE_BYTES >= 128
COPY_16_BYTES_WITHEX(4)
COPY_16_BYTES_WITHEX(5)
COPY_16_BYTES_WITHEX(6)
#endif
#endif
bdnz 53b
+ cmpwi r0,0
+ li r3,4
+ li r7,0
+ bne 114b
+#endif /* CONFIG_8xx */
63: srwi. r0,r5,2
mtctr r0
* 104f (if in read part) or 105f (if in write part), after updating r5
*/
COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_LINE_SIZE >= 32
+#if L1_CACHE_BYTES >= 32
COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_LINE_SIZE >= 64
+#if L1_CACHE_BYTES >= 64
COPY_16_BYTES_EXCODE(2)
COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_LINE_SIZE >= 128
+#if L1_CACHE_BYTES >= 128
COPY_16_BYTES_EXCODE(4)
COPY_16_BYTES_EXCODE(5)
COPY_16_BYTES_EXCODE(6)
/* or write fault in cacheline loop */
105: li r9,1
92: li r3,LG_CACHELINE_BYTES
- b 99f
+ mfctr r8
+ add r0,r0,r8
+ b 106f
/* read fault in final word loop */
108: li r9,0
b 93f
* r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
*/
99: mfctr r0
- slw r3,r0,r3
+106: slw r3,r0,r3
add. r3,r3,r5
beq 120f /* shouldn't happen */
cmpwi 0,r9,0