1 /* $Id: VISbzero.S,v 1.11 2001/03/15 08:51:24 anton Exp $
2 * VISbzero.S: High speed clear operations utilizing the UltraSparc
3 * Visual Instruction Set.
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
12 #include <asm/visasm.h>
14 #define EXN(x,y,a,b,z) \
18 99: ba VISbzerofixup_ret##z; \
20 .section __ex_table; \
25 #define EXC(x,y,a,b,c...) \
30 ba VISbzerofixup_ret0; \
32 .section __ex_table; \
39 .section __ex_table; \
41 .word 98b, VISbzerofixup_reto1; \
44 #define EX(x,y,a,b) EXN(x,y,a,b,0)
45 #define EX1(x,y,a,b) EXN(x,y,a,b,1)
46 #define EX2(x,y,a,b) EXN(x,y,a,b,2)
47 #define EXT(start,end,handler) \
48 .section __ex_table; \
50 .word start, 0, end, handler; \
54 #define EX(x,y,a,b) x,y
55 #define EX1(x,y,a,b) x,y
56 #define EX2(x,y,a,b) x,y
57 #define EXC(x,y,a,b,c...) x,y
62 #define ZERO_BLOCKS(base, offset, source) \
63 STX source, [base - offset - 0x38] ASINORMAL; \
64 STX source, [base - offset - 0x30] ASINORMAL; \
65 STX source, [base - offset - 0x28] ASINORMAL; \
66 STX source, [base - offset - 0x20] ASINORMAL; \
67 STX source, [base - offset - 0x18] ASINORMAL; \
68 STX source, [base - offset - 0x10] ASINORMAL; \
69 STX source, [base - offset - 0x08] ASINORMAL; \
70 STX source, [base - offset - 0x00] ASINORMAL;
75 #define RETL mov %g3, %o0
78 /* Well, bzero is a lot easier to get right than bcopy... */
80 .section __ex_table,#alloc
81 .section .fixup,#alloc,#execinstr
88 .globl __bzero, __bzero_noasi
91 ba,pt %xcc, __bzero+12
95 wr %g0, ASI_P, %asi ! LSU Group
113 EXO1(STB %g0, [%o0 + 0x00] ASINORMAL)
116 EX(STB %g0, [%o0 + 0x01] ASINORMAL, sub %o1, 1)
117 EX(STB %g0, [%o0 + 0x02] ASINORMAL, sub %o1, 2)
124 EXO1(STW %g0, [%o0] ASINORMAL)
134 EX(STX %g0, [%o0] ASINORMAL, sub %o1, 0)
136 1: andcc %o5, 16, %g0
139 EX1(STX %g0, [%o0] ASINORMAL, add %g0, 0)
140 EX1(STX %g0, [%o0 + 8] ASINORMAL, sub %g0, 8)
142 1: andcc %o5, 32, %g0
144 andncc %o1, 0x3f, %o3
145 EX(STX %g0, [%o0] ASINORMAL, add %o1, 32)
146 EX(STX %g0, [%o0 + 8] ASINORMAL, add %o1, 24)
147 EX(STX %g0, [%o0 + 16] ASINORMAL, add %o1, 16)
148 EX(STX %g0, [%o0 + 24] ASINORMAL, add %o1, 8)
150 6: andncc %o1, 0x3f, %o3
153 or %o4, ASI_BLK_OR, %g7
157 wr %g0, ASI_BLK_P, %asi
159 membar #StoreLoad | #StoreStore | #LoadStore
174 EXC(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o2, add %o2, %o1, %o2)
177 EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 64, %o2)
178 EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 128, %o2)
181 10: EX(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o1)
182 EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o1, sub %o1, 64, %o1)
183 EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o1, sub %o1, 128, %o1)
184 EXC(STBLK %f0, [%o0 + 0xc0] ASIBLK, add %o3, %o1, sub %o1, 192, %o1)
185 11: subcc %o3, 256, %o3
194 wr %g0, FPRS_FEF, %fprs
197 membar #StoreLoad | #StoreStore
198 9: andcc %o1, 0xf8, %o2
202 14: sethi %hi(13f), %o4
205 jmpl %o4 + %lo(13f), %g0
211 jmpl %o4 + (13f - 14b), %g0
214 12: ZERO_BLOCKS(%o0, 0xc8, %g0)
215 ZERO_BLOCKS(%o0, 0x88, %g0)
216 ZERO_BLOCKS(%o0, 0x48, %g0)
217 ZERO_BLOCKS(%o0, 0x08, %g0)
218 EXT(12b,13f,VISbzerofixup_zb)
223 EX(STW %g0, [%o0] ASINORMAL, and %o1, 7)
227 EX(STH %g0, [%o0] ASINORMAL, and %o1, 3)
230 EX(STB %g0, [%o0] ASINORMAL, add %g0, 1)
243 EX(STB %g0, [%o0 - 1] ASINORMAL, add %o1, 1)
263 ba,pt %xcc, VISbzerofixup_ret0
268 ba,pt %xcc, VISbzerofixup_ret0
274 ba,pt %xcc, VISbzerofixup_ret0