1 /* $Id: VISmemset.S,v 1.10 1999/12/23 17:02:16 jj Exp $
2 * VISmemset.S: High speed memset operations utilizing the UltraSparc
3 * Visual Instruction Set.
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996, 1997, 1999 Jakub Jelinek (jakub@redhat.com)
12 #define SET_BLOCKS(base, offset, source) \
13 stx source, [base - offset - 0x18]; \
14 stx source, [base - offset - 0x10]; \
15 stx source, [base - offset - 0x08]; \
16 stx source, [base - offset - 0x00];
18 #define SET_BLOCKS(base, offset, source) \
19 stw source, [base - offset - 0x18]; \
20 stw source, [base - offset - 0x14]; \
21 stw source, [base - offset - 0x10]; \
22 stw source, [base - offset - 0x0c]; \
23 stw source, [base - offset - 0x08]; \
24 stw source, [base - offset - 0x04]; \
25 stw source, [base - offset - 0x00]; \
26 stw source, [base - offset + 0x04];
30 /* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */
35 #include <asm/visasm.h>
38 /* Well, memset is a lot easier to get right than bcopy... */
48 brz,a,pt %o1, bzero_private
103 1: andcc %o5, 16, %g0
116 1: andcc %o5, 32, %g0
118 andncc %o2, 0x3f, %o3
142 wr %g0, ASI_BLK_P, %asi
143 membar #StoreStore | #LoadStore
157 stda %f0, [%o0 + 0x00] %asi
160 stda %f0, [%o0 + 0x40] %asi
161 stda %f0, [%o0 + 0x80] %asi
164 10: stda %f0, [%o0 + 0x00] %asi
165 stda %f0, [%o0 + 0x40] %asi
166 stda %f0, [%o0 + 0x80] %asi
167 stda %f0, [%o0 + 0xc0] %asi
168 11: subcc %o3, 256, %o3
177 wr %g0, FPRS_FEF, %fprs
180 membar #StoreLoad | #StoreStore
181 9: andcc %o2, 0x78, %g5
188 jmpl %g3 + %lo(13f), %g0
198 jmpl %g3 + (13f - 14b), %g0
201 12: SET_BLOCKS(%o0, 0x68, %o1)
202 SET_BLOCKS(%o0, 0x48, %o1)
203 SET_BLOCKS(%o0, 0x28, %o1)
204 SET_BLOCKS(%o0, 0x08, %o1)
233 andncc %o2, 0x3f, %o3